act_index = 0 act_fills = [] for num_step in range(num_processing_steps): act_fill = [] for batch_seq in range(batch_size_tr): act_fill.append(action_storage[acts[batch_seq][num_step]]) act_fills.append(act_fill) act_fills = np.array(act_fills) state_losses_supervise = getFinalState(state) graph_dicts = [] for j in range(batch_size_tr): input_g = base_graph(state[j][0], grounding_size, first_action, dimension,proposition_nodes) graph_dicts.append(input_g) feed_dicts = utils_tf.get_feed_dict( graphs_tuple_ph, utils_np.data_dicts_to_graphs_tuple(graph_dicts)) feed_dicts[realstate] = state_losses_supervise feed_dicts[action_filled] = act_fills train_values = sess.run({ "step": step_op, "loss": loss_op_tr, "outputs": output_ops_tr, "output_graphs":sta_vecs, "aaa":graphs_tuple_ph, "test":action_filled, }, feed_dict=feed_dicts) # update action storage act_index = 0
act_fills = np.array(act_fills) act_fills_index = np.array(act_fills_index) state_losses_supervise = getFinalState(state) graph_dicts = [] goal_graph_dicts = [] for j in range(batch_size_tr): input_g = base_graph(state[j][0], grounding_size, first_action, dimension, proposition_nodes) input_goal_g = base_graph(state[j][num_processing_steps], grounding_size, first_action, dimension, proposition_nodes) graph_dicts.append(input_g) goal_graph_dicts.append(input_goal_g) feed_dicts = utils_tf.get_feed_dict( graphs_tuple_ph, utils_np.data_dicts_to_graphs_tuple(graph_dicts)) goal_feed_dicts = utils_tf.get_feed_dict( graphs_tuple_goal, utils_np.data_dicts_to_graphs_tuple(goal_graph_dicts)) feed_dicts.update(goal_feed_dicts) feed_dicts[realstate] = state_losses_supervise feed_dicts[action_filled] = act_fills feed_dicts[action_filled_index] = act_fills_index train_values = sess.run( { "update_para": update_para, "loss": loss_heu, "outputs": output_ops_tr, "heuristic": heuristic_output, "training_output": training_output, },