def actor_gcn_critic_gcn_mean(test_num=5, max_episodes=5000, test_name="test", log_file=None): """Actor-Criticの5回実験したときの平均グラフを作成する関数""" log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" os.makedirs(log_dir, exist_ok=True) history = {} for i in range(test_num): history["{}".format(i)] = actor_gcn_critic_gcn( max_episodes=max_episodes, test_name=os.path.join(test_name, str(i)), log_file=log_file) mean = np.stack([ history["{}".format(i)]['result_efficiency'] for i in range(test_num) ]) std = np.std(mean[:, -1]) print('最終結果の標準偏差:', std) mean = np.mean(mean, axis=0) meanhistory = {} meanhistory['epoch'] = history['0']['epoch'] meanhistory['result_efficiency'] = mean # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) plot_efficiency_history( meanhistory, os.path.join(log_dir, 'mean_learning_effi_curve.png'))
def actor_gcn_critic_gcn(max_episodes=5000, test_name="test", log_file=False, save_pth=False): """Actor-Criticを行う.Actor,CriticはGCN Actorの指定できるものは,ノード1とノード2であり,一つのエッジのみを選択できる. max_episodes:学習回数 test_name:保存ファイルの名前 log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる.""" history = {} history['epoch'] = [] history['result_efficiency'] = [] history['mean_efficiency'] = [] # a_meanの値の時のηの値を収納する history['a'] = [] history['a_mean'] = [] history['a_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') actorNet = Select_node1_model(2, 1, 400, 400).to(device).double() actorNet2 = Select_node2_model(400 + 2, 400).to(device).double() criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() edgethickNet = Edgethick_Actor(400).to(device).double() optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor) optimizer_actor2 = optim.Adam(actorNet2.parameters(), lr=lr_actor) optimizer_edgethick = optim.Adam(edgethickNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) for step in range(max_steps): action = select_action_gcn_critic_gcn(env, actorNet, actorNet2, criticNet, edgethickNet, device, log_dir=log_file) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation() criticNet.rewards.append(reward) loss = finish_episode(criticNet, actorNet, actorNet2, edgethickNet, optimizer_critic, optimizer_actor, optimizer_actor2, optimizer_edgethick, gamma, log_dir=log_file) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) if episode % 100 == 0: if save_pth: save_model(criticNet, edgethickNet, os.path.join(log_dir, "pth"), save_name=str(episode)) env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png')) return history
origin_output_vectors, origin_frozen_nodes, current_edges_indices, current_edges_thickness) if gif: os.makedirs(os.path.join(log_dir, 'epochs/epoch{}'.format(epoch)), exist_ok=True) save_graph_info_npy( os.path.join(log_dir, 'epochs/epoch{}'.format(epoch)), origin_nodes_positions, barfem_input_nodes, origin_input_vectors, barfem_output_nodes, origin_output_vectors, origin_frozen_nodes, current_edges_indices, current_edges_thickness) env = BarFemOutputGym(origin_nodes_positions, barfem_input_nodes, origin_input_vectors, barfem_output_nodes, origin_output_vectors, origin_frozen_nodes, current_edges_indices, current_edges_thickness, origin_frozen_nodes) env.reset() env.render(os.path.join(log_dir, 'render_image/{}.png'.format(epoch)), edge_size=100) #render_graph(origin_nodes_positions, current_edges_indices, current_edges_thickness, os.path.join(log_dir, 'render_image/{}.png'.format(epoch)), display_number=False) history['epoch'].append(epoch + 1) history['result_efficiency'].append(current_efficiency) # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png'))
def main(): # running_reward = 0 prior_efficiency = 0 continuous_trigger = 0 best_efficiency = -1000 best_epoch = 0 # 1エピソードのループ while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break nodes_pos, _, _, _ = env.extract_node_edge_info() first_node_num = nodes_pos.shape[0] # run inifinitely many episodes for epoch in tqdm(range(train_num)): # for epoch in count(1): # reset environment and episode reward while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break state = env.reset() ep_reward = 0 continuous_trigger = 0 # for each episode, only run 9999 steps so that we don't # infinite loop while learning for t in range(max_action): # select action from policy action = select_action(first_node_num) nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info( ) # take the action state, _, done, info = env.step(action) if (t == (max_action - 1)) and (done is not True): # max_action内にてactionが終わらない時 reward = -final_penalty elif env.confirm_graph_is_connected(): efficiency = env.calculate_simulation() if continuous_trigger == 1: reward = efficiency - prior_efficiency else: reward = efficiency + continuous_reward continuous_trigger = 1 prior_efficiency = efficiency elif continuous_trigger == 1: reward = -penalty else: reward = 0 GCN.rewards.append(reward) ep_reward += reward if done: steps = t break # update cumulative reward # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward # perform backprop loss = finish_episode() # efficiencyの最終結果を求める if env.confirm_graph_is_connected(): result_efficiency = env.calculate_simulation() else: result_efficiency = -1 if best_efficiency < result_efficiency: best_epoch = epoch best_efficiency = result_efficiency save_model(save_name="Good") # env.render(os.path.join( # log_dir, 'render_image/{}.png'.format(epoch+1))) history['epoch'].append(epoch + 1) history['loss'].append(loss) history['ep_reward'].append(ep_reward) history['result_efficiency'].append(result_efficiency) history['steps'].append(steps + 1) # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: f.writelines( 'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n' % (epoch + 1, loss, ep_reward, result_efficiency)) with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f: f.writelines('epoch %d, best_efficiency: %.4f\n' % (best_epoch + 1, best_efficiency)) save_model(save_name="Last") plot_loss_history(history, os.path.join(log_dir, 'learning_loss_curve.png')) plot_reward_history(history, os.path.join(log_dir, 'learning_reward_curve.png')) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png')) plot_steps_history(history, os.path.join(log_dir, 'learning_steps_curve.png'))
def load_actor_gcn_critic_gcn(load_dir, load_epoch, max_episodes=5000, test_name="test", history=None, log_file=False): """ActorCriticにおいて保存されpthをロードし,そこから学習を開始する. Args: load_dir ([type]): ロードする対象のpthが複数存在するディレクトリのパスを指定する. load_epoch ([type]): いつのepochから学習を開始するかを決める. max_episodes (int, optional): 学習回数. Defaults to 5000. test_name (str, optional): 保存ファイルの名前. Defaults to "test". history ([type], optional): 保存したhistory.これを指定した時,グラフにもロード結果が適用される. Defaults to None. log_file (bool, optional): Trueにすると,progress.txtに損失関数などの情報のログをとる. Defaults to False. """ if history is None: history = {} history['epoch'] = [] history['result_efficiency'] = [] history['mean_efficiency'] = [] # a_meanの値の時のηの値を収納する history['a'] = [] history['a_mean'] = [] history['a_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] else: for key in history.keys(): history[key] = history[key][:load_epoch] log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() edgethickNet = Edgethick_Actor(2, 1, 400, 400).to(device).double() criticNet.load_state_dict( torch.load( os.path.join(load_dir, "pth/{}_criticNet.pth".format(load_epoch)))) edgethickNet.load_state_dict( torch.load( os.path.join(load_dir, "pth/{}_edgethickNet.pth".format(load_epoch)))) optimizer_edgethick = optim.SGD(edgethickNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(load_epoch, max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) for step in range(max_steps): action = select_action_gcn_critic_gcn(env, criticNet, edgethickNet, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation(mode='force') criticNet.rewards.append(reward) loss = finish_episode(criticNet, edgethickNet, optimizer_critic, optimizer_edgethick, gamma, log_dir=log_file, history=history) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png')) return history
def actor_gcn_critic_gcn(max_episodes=5000, test_name="test", log_file=False, save_pth=False): """Actor-Criticを行う.Actor,CriticはGCN Actorの指定できるものは,一つのエッジのみの幅を選択できる. max_episodes:学習回数 test_name:保存ファイルの名前 log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる.""" history = {} history['epoch'] = [] history['result_efficiency'] = [] history['x'] = [] history['x_mean'] = [] history['x_sigma'] = [] history['y'] = [] history['y_mean'] = [] history['y_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() x_y_Net = X_Y_Actor(2, 1, 400, 400).to(device).double() node1Net = Select_node1_model(2, 1, 400, 400).to(device).double() node2Net = Select_node2_model(400 + 2, 400).to( device).double() # 400+2における400は,Select_node1_modelのinput3の部分に対応 optimizer_node1 = optim.Adam(node1Net.parameters(), lr=lr_actor) optimizer_node2 = optim.Adam(node2Net.parameters(), lr=lr_actor) optimizer_xy = optim.Adam(x_y_Net.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) action = select_action_gcn_critic_gcn(env, criticNet, node1Net, node2Net, x_y_Net, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = env.step(action) if 4 in action['which_node']: env.input_nodes = [2, 4] env.input_vectors = np.array([[1, 0], [0, 1]]) if 2 in action['which_node'] and 4 in action[ 'which_node']: # TODO [2,4]を選択しないように学習させる reward = np.array([0]) else: reward = env.calculate_simulation() criticNet.rewards.append(reward) loss = finish_episode(criticNet, x_y_Net, node1Net, node2Net, optimizer_critic, optimizer_xy, optimizer_node1, optimizer_node2, gamma, log_dir=log_file, history=history) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png')) if episode % 100 == 0: if save_pth: save_model(criticNet, x_y_Net, os.path.join(log_dir, "pth"), save_name=str(episode)) env.close() with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) return history
def ddpg(): """DDPGを利用して強化学習を行う. Actorの指定できる幅は0.1-1となっている""" history = {} history['epoch'] = [] history['result_efficiency'] = [] max_episodes = 500 memory_capacity = 1e6 # バッファの容量 gamma = 0.99 # 割引率 tau = 1e-3 # ターゲットの更新率 epsilon = 1.0 # ノイズの量をいじりたい場合、多分いらない batch_size = 64 lr_actor = 1e-4 lr_critic = 1e-3 logger_interval = 10 weight_decay = 1e-2 test_name = "ddpg_500_v2" # 実験名 log_dir = "confirm/step1/ddpg_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" os.makedirs(log_dir) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = Step1Gym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() num_state = 2 num_action = 1 max_steps = 1 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') actorNet = ActorNetwork(num_state, num_action).to(device) criticNet = CriticNetwork(num_state, num_action).to(device) optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) replay_buffer = ReplayBuffer(capacity=memory_capacity) agent = DDPG(actorNet, criticNet, optimizer_actor, optimizer_critic, replay_buffer, device, gamma, tau, epsilon, batch_size) for episode in range(max_episodes): observation = env.reset() observation = np.array([0, 1]) total_reward = 0 for step in range(max_steps): edges_thickness = agent.get_action(observation) action = {} action['which_node'] = np.array([0, 1]) action['end'] = 0 action['edge_thickness'] = edges_thickness action['new_node'] = np.array([[0, 2]]) next_observation, _, done, _ = env.step(action) reward = env.calculate_simulation(mode='force') next_observation = np.array([0, 1]) total_reward += reward agent.add_memory(observation, edges_thickness, next_observation, reward, done) agent.train() observation = next_observation if done: break history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) if reward < 0: print(edges_thickness) if episode % logger_interval == 0: print("episode:{} total reward:{}".format(episode, total_reward)) for episode in range(3): observation = env.reset() observation = np.array([0, 1]) for step in range(max_steps): edges_thickness = agent.get_action(observation, greedy=True) action = {} action['which_node'] = np.array([0, 1]) action['end'] = 0 action['edge_thickness'] = edges_thickness action['new_node'] = np.array([[0, 2]]) next_observation, reward, done, _ = env.step(action) reward = env.calculate_simulation(mode='force') observation = np.array([0, 1]) if done: break env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png'))
def actor_critic_mean(max_episodes, test_name): """Actor-Criticの5回実験したときの平均グラフを作成する関数""" test_num = 5 log_dir = "confirm/step1/ac_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" os.makedirs(log_dir, exist_ok=True) history = {} for i in range(test_num): history["{}".format(i)] = {} history["{}".format(i)]['epoch'] = [] history["{}".format(i)]['result_efficiency'] = [] node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') actorNet = Edgethick_Actor().to(device) criticNet = Edgethick_Critic().to(device) optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(max_episodes)): observation = np.array([0, 1]) for step in range(max_steps): action = select_action(observation, actorNet, criticNet, device) edges_thickness = action['edge_thickness'] displacement = barfem(node_pos, edges_indices, edges_thickness, input_nodes, input_vectors, frozen_nodes, mode="force") reward = displacement[1 * 3] criticNet.rewards.append(reward) loss = finish_episode(criticNet, actorNet, optimizer_critic, optimizer_actor, gamma) history["{}".format(i)]['epoch'].append(episode + 1) history["{}".format(i)]['result_efficiency'].append(reward) if episode % 1000 == 0: print("episode:{} total reward:{}".format(episode, reward)) plot_efficiency_history( history["{}".format(i)], os.path.join(log_dir, 'learning_effi_curve{}.png'.format(i))) mean = np.stack([ history["{}".format(i)]['result_efficiency'] for i in range(test_num) ]) std = np.std(mean[:, -1]) print('最終結果の標準偏差:', std) mean = np.mean(mean, axis=0) meanhistory = {} meanhistory['epoch'] = history['0']['epoch'] meanhistory['result_efficiency'] = mean # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) plot_efficiency_history( meanhistory, os.path.join(log_dir, 'mean_learning_effi_curve.png'))
def actor_critic(): """Actor-Criticを行う.学習はDL(GCN以外)で Actorの指定できる幅は0.1-1となっている""" max_episodes = 500 test_name = "500_moto" # 実験名 history = {} history['epoch'] = [] history['result_efficiency'] = [] log_dir = "confirm/step1/ac_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = Step1Gym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') actorNet = Edgethick_Actor().to(device) criticNet = Edgethick_Critic().to(device) optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in range(max_episodes): observation = env.reset() observation = np.array([0, 1]) for step in range(max_steps): action = select_action(observation, actorNet, criticNet, device) next_observation, _, done, _ = env.step(action) reward = env.calculate_simulation(mode='force') criticNet.rewards.append(reward) loss = finish_episode(criticNet, actorNet, optimizer_critic, optimizer_actor, gamma) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) if episode % 10 == 0: print("episode:{} total reward:{}".format(episode, reward)) env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png'))