'final': { 'func_name': 'fully_connected', 'input_arg': 'inputs', 'layer_para': { 'inputs': None, 'num_outputs': n_action, 'biases_initializer': None, 'activation_fn': tf.nn.relu, 'weights_initializer': tf.ones_initializer() } } } state_in = tf.placeholder(shape=[1], dtype=tf.int32) N = basics.Network(state_in) N.build_layers(one_hot) N.add_layer_duplicates(output_layer, 1) # Create learning object and perform training RL_Train = RL.ContextualBandit(N, config_train, EG_Train) sess = tf.Session() RL_Train.process(sess, save=False, restore=False) # Extract training results action = RL_Train.recorder.record['NETWORK_ACTION'] reward = RL_Train.recorder.record['ENGINE_REWARD'] print(np.mean(reward)) df1 = pd.DataFrame()