def main(): # running_reward = 0 prior_efficiency = 0 continuous_trigger = 0 best_efficiency = -1000 best_epoch = 0 # 1エピソードのループ while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break nodes_pos, _, _, _ = env.extract_node_edge_info() first_node_num = nodes_pos.shape[0] # run inifinitely many episodes for epoch in tqdm(range(train_num)): # for epoch in count(1): # reset environment and episode reward while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break state = env.reset() ep_reward = 0 continuous_trigger = 0 # for each episode, only run 9999 steps so that we don't # infinite loop while learning for t in range(max_action): # select action from policy action = select_action(first_node_num) nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info( ) # take the action state, _, done, info = env.step(action) if (t == (max_action - 1)) and (done is not True): # max_action内にてactionが終わらない時 reward = -final_penalty elif env.confirm_graph_is_connected(): efficiency = env.calculate_simulation() if continuous_trigger == 1: reward = efficiency - prior_efficiency else: reward = efficiency + continuous_reward continuous_trigger = 1 prior_efficiency = efficiency elif continuous_trigger == 1: reward = -penalty else: reward = 0 GCN.rewards.append(reward) ep_reward += reward if done: steps = t break # update cumulative reward # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward # perform backprop loss = finish_episode() # efficiencyの最終結果を求める if env.confirm_graph_is_connected(): result_efficiency = env.calculate_simulation() else: result_efficiency = -1 if best_efficiency < result_efficiency: best_epoch = epoch best_efficiency = result_efficiency save_model(save_name="Good") # env.render(os.path.join( # log_dir, 'render_image/{}.png'.format(epoch+1))) history['epoch'].append(epoch + 1) history['loss'].append(loss) history['ep_reward'].append(ep_reward) history['result_efficiency'].append(result_efficiency) history['steps'].append(steps + 1) # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: f.writelines( 'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n' % (epoch + 1, loss, ep_reward, result_efficiency)) with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f: f.writelines('epoch %d, best_efficiency: %.4f\n' % (best_epoch + 1, best_efficiency)) save_model(save_name="Last") plot_loss_history(history, os.path.join(log_dir, 'learning_loss_curve.png')) plot_reward_history(history, os.path.join(log_dir, 'learning_reward_curve.png')) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png')) plot_steps_history(history, os.path.join(log_dir, 'learning_steps_curve.png'))
Select_node2 = model.Select_node2_model(node_features + node_out_features, 2).double() Edge_thickness = model.Edge_thickness_model(node_features + node_out_features, 2).double() # 連続状態を作成 while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): # env.render('render_image/yatta.png') break Saved_Action = namedtuple('SavedAction', ['action', 'value']) Saved_prob_Action = namedtuple('SavedAction', ['log_prob']) Saved_mean_std_Action = namedtuple('SavedAction', ['mean', 'variance']) GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr) X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr) Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr) Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr) Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr) Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr) def select_action(first_node_num):