def main(): # running_reward = 0 prior_efficiency = 0 continuous_trigger = 0 best_efficiency = -1000 best_epoch = 0 # 1エピソードのループ while(1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = FEMGym(new_node_pos, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break nodes_pos, _, _, _ = env.extract_node_edge_info() first_node_num = nodes_pos.shape[0] # run inifinitely many episodes for epoch in tqdm(range(train_num)): # for epoch in count(1): # reset environment and episode reward while(1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = FEMGym(new_node_pos, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break state = env.reset() ep_reward = 0 continuous_trigger = 0 # for each episode, only run 9999 steps so that we don't # infinite loop while learning for t in range(max_action): # select action from policy action = select_action(first_node_num) nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info() # take the action state, _, done, info = env.step(action) if (t == (max_action-1)) and (done is not True): # max_action内にてactionが終わらない時 reward = -final_penalty elif env.confirm_graph_is_connected(): efficiency = env.calculate_simulation() if continuous_trigger == 1: reward = efficiency-prior_efficiency else: reward = efficiency+continuous_reward continuous_trigger = 1 prior_efficiency = efficiency elif continuous_trigger == 1: reward = -penalty else: reward = 0 GCN.rewards.append(reward) ep_reward += reward if done: steps = t break # update cumulative reward # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward # perform backprop loss = finish_episode() # efficiencyの最終結果を求める if env.confirm_graph_is_connected(): result_efficiency = env.calculate_simulation() else: result_efficiency = -1 if best_efficiency < result_efficiency: best_epoch = epoch best_efficiency = result_efficiency save_model(save_name="Good") env.render(os.path.join( log_dir, 'render_image/{}.png'.format(epoch+1))) history['epoch'].append(epoch+1) history['loss'].append(loss) history['ep_reward'].append(ep_reward) history['result_efficiency'].append(result_efficiency) history['steps'].append(steps+1) # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: f.writelines('epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n' % (epoch + 1, loss, ep_reward, result_efficiency)) with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f: f.writelines('epoch %d, best_efficiency: %.4f\n' % (best_epoch+1, best_efficiency)) save_model(save_name="Last") plot_loss_history(history, os.path.join( log_dir, 'learning_loss_curve.png')) plot_reward_history(history, os.path.join( log_dir, 'learning_reward_curve.png')) plot_efficiency_history(history, os.path.join( log_dir, 'learning_effi_curve.png')) plot_steps_history(history, os.path.join( log_dir, 'learning_steps_curve.png'))
# 1エピソードのループ state = env.reset() env.confirm_graph_is_connected() env.render("fem_images/image_first.png") total_time = 0 total_calc_time = 0 for i in range(500): # ランダム行動の取得 action = env.random_action() # 1ステップの実行 state, reward, done, info = env.step(action) if env.confirm_graph_is_connected(): reward = 0 start = time.time() efficiency = env.calculate_simulation() elapsed_time = time.time() - start print("elapsed_time:{0}".format(elapsed_time) + "[sec]") reward = efficiency total_time += elapsed_time total_calc_time += 1 else: reward = -1 print("一回辺りの計算時間:", total_time/total_calc_time) # エピソード完了 # if done: # print('done') # break
]) origin_frozen_nodes = [1, 3, 5, 7, 9, 11, 13, 15] # gymに入力する要素を抽出 new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_main_node_edge_info( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = FEMGym(new_node_pos, new_edges_indices, new_edges_thickness) # 1エピソードのループ state = env.reset() env.confirm_graph_is_connected() env.render("fem_images/image_first.png") for i in range(100): # ランダム行動の取得 action = env.random_action() # 1ステップの実行 state, reward, done, info = env.step(action) if env.confirm_graph_is_connected(): reward = env.calculate_simulation() # env.render("fem_images/graph_connected{}.png".format(i)) print('{}steps reward:{}'.format(i, reward)) # エピソード完了 # if done: # print('done') # break