'final': {
        'func_name': 'fully_connected',
        'input_arg': 'inputs',
        'layer_para': {
            'inputs': None,
            'num_outputs': n_action,
            'biases_initializer': None,
            'activation_fn': tf.nn.relu,
            'weights_initializer': tf.ones_initializer()
        }
    }
}

state_in = tf.placeholder(shape=[1], dtype=tf.int32)

N = basics.Network(state_in)
N.build_layers(one_hot)
N.add_layer_duplicates(output_layer, 1)

# Create learning object and perform training
RL_Train = RL.ContextualBandit(N, config_train, EG_Train)

sess = tf.Session()
RL_Train.process(sess, save=False, restore=False)

# Extract training results
action = RL_Train.recorder.record['NETWORK_ACTION']
reward = RL_Train.recorder.record['ENGINE_REWARD']
print(np.mean(reward))

df1 = pd.DataFrame()