def run(self, max_episodes=5000, test_name="test", log_file=False, save_pth=False, history=None, device=torch.device('cpu')): while self.g_ep.value < self.total_episodes: # 入力ノードを再設定している為,ここに追加 node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() self.env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) self.env.reset() for episode in range(max_episodes): action = select_action_gcn_critic_gcn(self.env, self.local_criticNet, self.local_node1Net, self.local_node2Net, self.local_x_y_Net, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = self.env.step(action) if 4 in action['which_node']: self.env.input_nodes = [2, 4] self.env.input_vectors = np.array([[1, 0], [0, 1]]) if 2 in action['which_node'] and 4 in action[ 'which_node']: # TODO [2,4]を選択しないように学習させる reward = np.array([0]) else: reward = self.env.calculate_simulation() self.local_criticNet.rewards.append(reward) done = True # 今回はonestepの為 if done: # update global and assign to local net record(self.g_ep, self.g_ep_r, reward, self.res_queue, self.name) # sync # 各プロセスの重み更新をglobalにpushし,その更新後のものを各プロセスの重みに戻す self.finish_episode() break self.res_queue.put(None)
def check_maximum_node_pos(): # step4において,最も良い値を出力する条件を調査する為の関数 resolution = 100 node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) rewards = np.zeros((resolution, resolution)) max = 0 for ix, x in tqdm(enumerate(np.linspace(0, 1, resolution))): for iy, y in enumerate(np.linspace(1, 0, resolution)): env.reset() action = {} action['which_node'] = np.array([0, 4]) action['end'] = 0 action['edge_thickness'] = np.array([1]) action['new_node'] = np.array([[x, y]]) next_nodes_pos, _, done, _ = env.step(action) action = {} action['which_node'] = np.array([2, 4]) action['end'] = 0 action['edge_thickness'] = np.array([1]) action['new_node'] = np.array([[0, 1]]) next_nodes_pos, _, done, _ = env.step(action) action = {} action['which_node'] = np.array([3, 4]) action['end'] = 0 action['edge_thickness'] = np.array([1]) action['new_node'] = np.array([[0, 1]]) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation() if max < reward: max = reward max_x = x max_y = y max_reward = reward rewards[iy, ix] = reward fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(111) im = plt.imshow(rewards, extent=(0, 1, 0, 1)) plt.colorbar(im) ax.set_xlabel(r"x", fontsize=20) ax.set_ylabel(r"y", fontsize=20) ax.tick_params(axis='x', labelsize=20) ax.tick_params(axis='y', labelsize=20) plt.savefig("distribution.png") print(max_x, max_y) print(max_reward)
def confirm_max_status(): "最大値となる状態を求める." max = 0 x = 1000 for i in np.arange(0.1, 1.001, 0.001): node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() action = {} action['which_node'] = np.array([0, 3]) action['end'] = 0 action['edge_thickness'] = np.array([i]) action['new_node'] = np.array([[0, 2]]) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation() if max < reward: max = reward x = i print("最小は", x, max)
def __init__(self, global_criticNet, global_x_y_Net, global_node1Net, global_node2Net, Critic_opt, x_y_opt, Node1_opt, Node2_opt, global_ep, global_ep_r, res_queue, name, gamma=0.99, total_episodes=5000): super(Worker, self).__init__() self.name = 'w%i' % name self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue self.Critic_opt, self.x_y_opt, self.Node1_opt, self.Node2_opt = Critic_opt, x_y_opt, Node1_opt, Node2_opt self.global_criticNet, self.global_x_y_Net,\ self.global_node1Net, self.global_node2Net = global_criticNet, global_x_y_Net, global_node1Net, global_node2Net device = torch.device('cpu') self.local_criticNet = CriticNetwork_GCN(2, 1, 400, 400).double().to(device) self.local_x_y_Net = X_Y_Actor(2, 1, 400, 400).double().to(device) self.local_node1Net = Select_node1_model(2, 1, 400, 400).double().to(device) self.local_node2Net = Select_node2_model(400 + 2, 400).double().to(device) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() self.env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) self.env.reset() self.gamma = gamma # 報酬減衰率 self.total_episodes = total_episodes # すべてのプロセスにおいての合計epoch
def calculate_efficiency(gene_nodes_pos, gene_edges_thickness, gene_adj_element, np_save_path=False): condition_nodes_pos, input_nodes, input_vectors, output_nodes, \ output_vectors, frozen_nodes, condition_edges_indices, condition_edges_thickness\ = make_main_node_edge_info(*condition(), condition_edge_thickness=0.2) # make edge_indices edges_indices = make_adj_triu_matrix(gene_adj_element, node_num, condition_edges_indices) # make nodes_pos nodes_pos = np.concatenate([condition_nodes_pos, gene_nodes_pos]) # 条件ノードが含まれている部分グラフを抽出 G = nx.Graph() G.add_nodes_from(np.arange(len(nodes_pos))) G.add_edges_from(edges_indices) condition_node_list = input_nodes + output_nodes + frozen_nodes trigger = 0 # 条件ノードが全て接続するグラフが存在するとき,トリガーを発動する for c in nx.connected_components(G): sg = G.subgraph(c) # 部分グラフ if set(condition_node_list) <= set(sg.nodes): # 条件ノードが全て含まれているか edges_indices = np.array(sg.edges) trigger = 1 break if trigger == 0: # ペナルティを発動する return -10.0 # make edges_thickness edges_thickness = make_edge_thick_triu_matrix(gene_edges_thickness, node_num, condition_edges_indices, condition_edges_thickness, edges_indices) env = BarFemGym(nodes_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() efficiency = env.calculate_simulation() if np_save_path: env.render(save_path=os.path.join(np_save_path, "image.png")) np.save(os.path.join(np_save_path, "nodes_pos.npy"), nodes_pos) np.save(os.path.join(np_save_path, "edges_indices.npy"), edges_indices) np.save(os.path.join(np_save_path, "edges_thickness.npy"), edges_thickness) return float(efficiency)
def select_action_gcn_critic_gcn(env, criticNet, edgethickNet, device, log_dir=None, history=None): nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) node, edge, node_adj, edge_adj, D_v, D_e, T = make_torch_type_for_GCN( nodes_pos, edges_indices, edges_thickness, node_adj) state_value = criticNet(node, edge, node_adj, edge_adj, D_v, D_e, T) node1 = 0 node2 = 3 edge_thickness = edgethickNet(node, edge, node_adj, edge_adj, D_v, D_e, T, node1, node2) edge_thickness_tdist = tdist.Normal(edge_thickness[0][0].item(), edge_thickness[0][1].item()) edge_thickness_action = edge_thickness_tdist.sample() edge_thickness_action = torch.clamp(edge_thickness_action, min=0, max=1) action = {} action['which_node'] = np.array([node1, node2]) action['end'] = 0 action['edge_thickness'] = np.array([edge_thickness_action.item()]) action['new_node'] = np.array([[0, 2]]) # save to action buffer criticNet.saved_actions.append(Saved_Action(action, state_value)) edgethickNet.saved_actions.append( Saved_mean_std_Action(edge_thickness[0][0], edge_thickness[0][1])) if log_dir is not None: # lossの確認事項 with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('edge_thick_mean:', edge_thickness[0][0].item(), file=f) print('edge_thick_std:', edge_thickness[0][1].item(), file=f) print('edge_thickness:', edge_thickness_action.item(), file=f) if history is not None: node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() calc_effi_env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) calc_effi_env.reset() mean_action = {} mean_action['which_node'] = np.array([node1, node2]) mean_action['end'] = 0 mean_action['edge_thickness'] = np.array([edge_thickness[0][0].item()]) mean_action['new_node'] = np.array([[0, 2]]) next_nodes_pos, _, done, _ = calc_effi_env.step(mean_action) mean_efficiency = calc_effi_env.calculate_simulation(mode='force') # historyにログを残す history['mean_efficiency'].append(mean_efficiency) history['a'].append(edge_thickness_action.item()) history['a_mean'].append(edge_thickness[0][0].item()) history['a_sigma'].append(edge_thickness[0][1].item()) history['critic_value'].append(state_value.item()) return action
def select_action_gcn_critic_gcn(env, node1Net, node2Net, criticNet, edgethickNet, device, log_dir=None, history=None): nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) node_num = nodes_pos.shape[0] node, edge, node_adj, edge_adj, D_v, D_e, T = make_torch_type_for_GCN( nodes_pos, edges_indices, edges_thickness, node_adj) state_value = criticNet(node, edge, node_adj, edge_adj, D_v, D_e, T) # ノード1を求める emb_node, node1_prob = node1Net(node, edge, node_adj, edge_adj, D_v, D_e, T) node1_categ = Categorical(node1_prob) node1 = node1_categ.sample() # ノード1を除いたnodeの作成 non_node1_node = torch.cat([node[:, 0:node1, :], node[:, node1 + 1:, :]], 1) # ノード1の情報抽出 H1 = emb_node[0][node1] H1_cat = H1.repeat(node_num - 1, 1) H1_cat = H1_cat.unsqueeze(0) # HとH1のノード情報をconcat emb_graph_cat = torch.cat([non_node1_node, H1_cat], 2) # ノード2を求める emb_edge, node2_prob = node2Net(emb_graph_cat) node2_categ = Categorical(node2_prob) node2_temp = node2_categ.sample() if node2_temp >= node1: node2 = node2_temp + 1 # node1分の調整 else: node2 = node2_temp edge_thickness = edgethickNet(emb_edge) edge_thickness_tdist = tdist.Normal(edge_thickness[0][0].item(), edge_thickness[0][1].item()) edge_thickness_action = edge_thickness_tdist.sample() edge_thickness_action = torch.clamp(edge_thickness_action, min=0, max=1) action = {} action['which_node'] = np.array([node1.item(), node2.item()]) action['end'] = 0 action['edge_thickness'] = np.array([edge_thickness_action.item()]) action['new_node'] = np.array([[0, 2]]) # save to action buffer criticNet.saved_actions.append(Saved_Action(action, state_value)) node1Net.saved_actions.append( Saved_prob_Action(node1_categ.log_prob(node1))) node2Net.saved_actions.append( Saved_prob_Action(node2_categ.log_prob(node2_temp))) edgethickNet.saved_actions.append( Saved_mean_std_Action(edge_thickness[0][0], edge_thickness[0][1])) if log_dir is not None: # lossの確認事項 with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('node1_prob:', node1_prob, file=f) print('node1:', node1.item(), file=f) print('node2_prob:', node2_prob, file=f) print('node2:', node2.item(), file=f) print('edge_thick_mean:', edge_thickness[0][0].item(), file=f) print('edge_thick_std:', edge_thickness[0][1].item(), file=f) print('edge_thickness:', edge_thickness_action.item(), file=f) if history is not None: node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() calc_effi_env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) calc_effi_env.reset() mean_action = {} mean_action['which_node'] = np.array([node1, node2]) mean_action['end'] = 0 mean_action['edge_thickness'] = np.array([edge_thickness[0][0].item()]) mean_action['new_node'] = np.array([[0, 2]]) next_nodes_pos, _, done, _ = calc_effi_env.step(mean_action) mean_efficiency = calc_effi_env.calculate_simulation() # historyにログを残す history['mean_efficiency'].append(mean_efficiency) history['a'].append(edge_thickness_action.item()) history['a_mean'].append(edge_thickness[0][0].item()) history['a_sigma'].append(edge_thickness[0][1].item()) history['critic_value'].append(state_value.item()) return action
def load_actor_gcn_critic_gcn(load_dir, load_epoch, max_episodes=5000, test_name="test", history=None, log_file=False): """ActorCriticにおいて保存されpthをロードし,そこから学習を開始する. Args: load_dir ([type]): ロードする対象のpthが複数存在するディレクトリのパスを指定する. load_epoch ([type]): いつのepochから学習を開始するかを決める. max_episodes (int, optional): 学習回数. Defaults to 5000. test_name (str, optional): 保存ファイルの名前. Defaults to "test". history ([type], optional): 保存したhistory.これを指定した時,グラフにもロード結果が適用される. Defaults to None. log_file (bool, optional): Trueにすると,progress.txtに損失関数などの情報のログをとる. Defaults to False. """ if history is None: history = {} history['epoch'] = [] history['result_efficiency'] = [] history['mean_efficiency'] = [] # a_meanの値の時のηの値を収納する history['a'] = [] history['a_mean'] = [] history['a_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] else: for key in history.keys(): history[key] = history[key][:load_epoch] log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() edgethickNet = Edgethick_Actor(2, 1, 400, 400).to(device).double() criticNet.load_state_dict( torch.load( os.path.join(load_dir, "pth/{}_criticNet.pth".format(load_epoch)))) edgethickNet.load_state_dict( torch.load( os.path.join(load_dir, "pth/{}_edgethickNet.pth".format(load_epoch)))) optimizer_edgethick = optim.SGD(edgethickNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(load_epoch, max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) for step in range(max_steps): action = select_action_gcn_critic_gcn(env, criticNet, edgethickNet, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation(mode='force') criticNet.rewards.append(reward) loss = finish_episode(criticNet, edgethickNet, optimizer_critic, optimizer_edgethick, gamma, log_dir=log_file, history=history) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png')) return history
def actor_gcn_critic_gcn(max_episodes=5000, test_name="test", log_file=False, save_pth=False): """Actor-Criticを行う.Actor,CriticはGCN Actorの指定できるものは,一つのエッジのみの幅を選択できる. max_episodes:学習回数 test_name:保存ファイルの名前 log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる.""" history = {} history['epoch'] = [] history['result_efficiency'] = [] history['x'] = [] history['x_mean'] = [] history['x_sigma'] = [] history['y'] = [] history['y_mean'] = [] history['y_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() x_y_Net = X_Y_Actor(2, 1, 400, 400).to(device).double() node1Net = Select_node1_model(2, 1, 400, 400).to(device).double() node2Net = Select_node2_model(400 + 2, 400).to( device).double() # 400+2における400は,Select_node1_modelのinput3の部分に対応 optimizer_node1 = optim.Adam(node1Net.parameters(), lr=lr_actor) optimizer_node2 = optim.Adam(node2Net.parameters(), lr=lr_actor) optimizer_xy = optim.Adam(x_y_Net.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) action = select_action_gcn_critic_gcn(env, criticNet, node1Net, node2Net, x_y_Net, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = env.step(action) if 4 in action['which_node']: env.input_nodes = [2, 4] env.input_vectors = np.array([[1, 0], [0, 1]]) if 2 in action['which_node'] and 4 in action[ 'which_node']: # TODO [2,4]を選択しないように学習させる reward = np.array([0]) else: reward = env.calculate_simulation() criticNet.rewards.append(reward) loss = finish_episode(criticNet, x_y_Net, node1Net, node2Net, optimizer_critic, optimizer_xy, optimizer_node1, optimizer_node2, gamma, log_dir=log_file, history=history) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png')) if episode % 100 == 0: if save_pth: save_model(criticNet, x_y_Net, os.path.join(log_dir, "pth"), save_name=str(episode)) env.close() with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) return history
origin_frozen_nodes = [1, 3, 5, 7, 9, 11, 13, 15] origin_input_vectors = np.array([ [0., -0.1], ]) origin_output_vectors = np.array([ [-1, 0], ]) barfem_input_nodes = [84] barfem_output_nodes = [68] condition_nodes = origin_input_nodes + origin_output_nodes + origin_frozen_nodes env = BarFemGym(origin_nodes_positions, barfem_input_nodes, origin_input_vectors, barfem_output_nodes, origin_output_vectors, origin_frozen_nodes, origin_edges_indices, origin_edges_thickness, origin_frozen_nodes) env.reset() # env.render(os.path.join(log_dir, 'render_image/first.png')) # 初期状態を作成 best_efficiency = -1000 if env.confirm_graph_is_connected(): current_efficiency = env.calculate_simulation() else: current_efficiency = -1 current_edges_indices = origin_edges_indices.copy()
Saved_Action = namedtuple('SavedAction', ['action', 'value']) Saved_prob_Action = namedtuple('SavedAction', ['log_prob']) Saved_mean_std_Action = namedtuple('SavedAction', ['mean', 'variance']) GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr) X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr) Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr) Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr) Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr) Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr) # アニーリングの情報を引き継ぐ. node_pos, input_nodes, input_vectors, output_nodes,\ output_vectors, frozen_nodes, edges_indices, edges_thickness = load_graph_info( os.path.join(annealing_dir, 'graph_info')) env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() def select_action(condition_nodes): nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) node_num = nodes_pos.shape[0] # ラベル作成 label = np.zeros((node_num, 1)) label[condition_nodes] = 1 nodes_pos = np.concatenate([nodes_pos, label], 1)
origin_output_nodes = [68] origin_output_vectors = np.array([ [-1, 0], ]) origin_frozen_nodes = [1, 3, 5, 7, 9, 11, 13, 15] # gymに入力する要素を抽出 new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_main_node_edge_info( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) # 1エピソードのループ state = env.reset() total_time = 0 total_calc_time = 0 for i in range(500): # ランダム行動の取得 action = env.random_action() # 1ステップの実行 state, reward, done, info = env.step(action) if env.confirm_graph_is_connected(): reward = 0 start = time.time() efficiency = env.calculate_simulation()
class Worker(mp.Process): def __init__(self, global_criticNet, global_x_y_Net, global_node1Net, global_node2Net, Critic_opt, x_y_opt, Node1_opt, Node2_opt, global_ep, global_ep_r, res_queue, name, gamma=0.99, total_episodes=5000): super(Worker, self).__init__() self.name = 'w%i' % name self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue self.Critic_opt, self.x_y_opt, self.Node1_opt, self.Node2_opt = Critic_opt, x_y_opt, Node1_opt, Node2_opt self.global_criticNet, self.global_x_y_Net,\ self.global_node1Net, self.global_node2Net = global_criticNet, global_x_y_Net, global_node1Net, global_node2Net device = torch.device('cpu') self.local_criticNet = CriticNetwork_GCN(2, 1, 400, 400).double().to(device) self.local_x_y_Net = X_Y_Actor(2, 1, 400, 400).double().to(device) self.local_node1Net = Select_node1_model(2, 1, 400, 400).double().to(device) self.local_node2Net = Select_node2_model(400 + 2, 400).double().to(device) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() self.env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) self.env.reset() self.gamma = gamma # 報酬減衰率 self.total_episodes = total_episodes # すべてのプロセスにおいての合計epoch def finish_episode(self, log_dir=None, history=None): R = 0 GCN_saved_actions = self.local_criticNet.saved_actions x_y_saved_actions = self.local_x_y_Net.saved_actions node1Net_saved_actions = self.local_node1Net.saved_actions node2Net_saved_actions = self.local_node2Net.saved_actions policy_losses = [] # list to save actor (policy) loss value_losses = [] # list to save critic (value) loss returns = [] # list to save the true values # calculate the true value using rewards returned from the environment for r in self.local_criticNet.rewards[::-1]: # calculate the discounted value R = r + self.gamma * R returns.insert(0, R) returns = torch.tensor(returns) x_y_opt_trigger = False # advantage>0の場合したときにx_y_optを作動出来るようにする為のトリガー for (action, value), (x_y_mean, x_y_std, x_dist, y_dist), (node1_prob, node1_dist), (node2_prob, node2_dist), R in zip( GCN_saved_actions, x_y_saved_actions, node1Net_saved_actions, node2Net_saved_actions, returns): advantage = R - value.item() # calculate actor (policy) loss if action["end"]: print("okasii") else: log_probs = torch.cat([node1_prob, node2_prob]) policy_loss = -torch.mean(log_probs) * advantage policy_losses.append(policy_loss) if advantage > 0: x_y_mean_loss = F.l1_loss( torch.from_numpy(action["new_node"][0]).double(), x_y_mean.double()) x_y_var_loss = F.l1_loss( torch.from_numpy( np.abs( action["new_node"][0] - x_y_mean.to('cpu').detach().numpy().copy())), x_y_std.double()) policy_losses.append( (x_y_mean_loss + x_y_var_loss) * advantage) x_y_opt_trigger = True # x_y_optのトリガーを起動 else: x_y_mean_loss = torch.zeros(1) x_y_var_loss = torch.zeros(1) # calculate critic (value) loss using L1 loss value_losses.append( F.l1_loss(value.double(), torch.tensor([[R]]).double())) # reset gradients self.Critic_opt.zero_grad() self.Node1_opt.zero_grad() self.Node2_opt.zero_grad() if x_y_opt_trigger: self.x_y_opt.zero_grad() # sum up all the values of policy_losses and value_losses if len(policy_losses) == 0: loss = torch.stack(value_losses).sum() else: loss = torch.stack(policy_losses).sum() + \ torch.stack(value_losses).sum() # perform backprop loss.backward() for lp, gp in zip(self.local_criticNet.parameters(), self.global_criticNet.parameters()): gp._grad = lp.grad for lp, gp in zip(self.local_node1Net.parameters(), self.global_node1Net.parameters()): gp._grad = lp.grad for lp, gp in zip(self.local_node2Net.parameters(), self.global_node2Net.parameters()): gp._grad = lp.grad if x_y_opt_trigger: for lp, gp in zip(self.local_x_y_Net.parameters(), self.global_x_y_Net.parameters()): gp._grad = lp.grad self.Critic_opt.step() self.Node1_opt.step() self.Node2_opt.step() if x_y_opt_trigger: self.x_y_opt.step() # pull global parameters self.local_criticNet.load_state_dict( self.global_criticNet.state_dict()) self.local_x_y_Net.load_state_dict(self.global_x_y_Net.state_dict()) self.local_node1Net.load_state_dict(self.global_node1Net.state_dict()) self.local_node2Net.load_state_dict(self.global_node2Net.state_dict()) # reset rewards and action buffer del self.local_criticNet.rewards[:] del self.local_criticNet.saved_actions[:] del self.local_x_y_Net.saved_actions[:] del self.local_node1Net.saved_actions[:] del self.local_node2Net.saved_actions[:] if history is not None: history['advantage'].append(advantage.item()) return loss.item() def run(self, max_episodes=5000, test_name="test", log_file=False, save_pth=False, history=None, device=torch.device('cpu')): while self.g_ep.value < self.total_episodes: # 入力ノードを再設定している為,ここに追加 node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() self.env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) self.env.reset() for episode in range(max_episodes): action = select_action_gcn_critic_gcn(self.env, self.local_criticNet, self.local_node1Net, self.local_node2Net, self.local_x_y_Net, device, log_dir=log_file, history=history) next_nodes_pos, _, done, _ = self.env.step(action) if 4 in action['which_node']: self.env.input_nodes = [2, 4] self.env.input_vectors = np.array([[1, 0], [0, 1]]) if 2 in action['which_node'] and 4 in action[ 'which_node']: # TODO [2,4]を選択しないように学習させる reward = np.array([0]) else: reward = self.env.calculate_simulation() self.local_criticNet.rewards.append(reward) done = True # 今回はonestepの為 if done: # update global and assign to local net record(self.g_ep, self.g_ep_r, reward, self.res_queue, self.name) # sync # 各プロセスの重み更新をglobalにpushし,その更新後のものを各プロセスの重みに戻す self.finish_episode() break self.res_queue.put(None)
from FEM.bar_fem import barfem import numpy as np from env.gym_barfem import BarFemGym nodes_pos = np.array([[0, 0], [0.5, 0], [0.5, 0.5], [0, 0.5]]) nodes_pos[:, 0] += 0.25 nodes_pos[:, 1] += 0.25 edges_indices = np.array([[0, 1], [1, 2], [0, 3], [2, 3], [1, 3]]) edges_thickness = np.array([1.0, 1.0, 1.0, 1.0, 1]) input_nodes = [2] input_vectors = np.array([[1, 0]]) frozen_nodes = [1] output_nodes = [0] output_vectors = np.array([[0.5, 0.5]]) env = BarFemGym(nodes_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() print(env.calculate_simulation())
search_edge_indice_1 = [1, 11] # 探したいエッジの幅のindice search_edge_indice_2 = [0, 2] # 探したいエッジの幅のindice np_save_path = "GA/confirm_data_for_cross_process/50" nodes_pos = np.load(os.path.join(np_save_path, "nodes_pos.npy")) #nodes_pos[10, :] = np.array([0.5, 0.5]) edges_indices = np.load(os.path.join(np_save_path, "edges_indices.npy")) edges_thickness = np.load(os.path.join(np_save_path, "edges_thickness.npy")) condition_nodes_pos, input_nodes, input_vectors, output_nodes, \ output_vectors, frozen_nodes, condition_edges_indices, condition_edges_thickness\ = make_main_node_edge_info(*condition(), condition_edge_thickness=0.05) # スレンダー比を考慮し,長さ方向に対して1/20の値の幅にした env = BarFemGym(nodes_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() efficiency = env.calculate_simulation() #env.render(save_path="image/image_preprocess.png", display_number=True) # 同じノード位置にあるものを排除する. processed_nodes_pos, processed_edges_indices, processed_edges_thickness = preprocess_graph_info( nodes_pos, edges_indices, edges_thickness) # 傾きが一致するものをグループ分けし,エッジ分割を行う. processed_edges_indices, processed_edges_thickness = separate_same_line_procedure( processed_nodes_pos, processed_edges_indices, processed_edges_thickness) processed_nodes_pos, processed_edges_indices, processed_edges_thickness =\ seperate_cross_line_procedure(processed_nodes_pos, processed_edges_indices, processed_edges_thickness)
X_Y = model.X_Y_model(node_out_features, 2).double() # なぜかdoubleが必要だった Stop = model.Stop_model(node_out_features, 2).double() Select_node1 = model.Select_node1_model(node_out_features, 2).double() Select_node2 = model.Select_node2_model(node_features + node_out_features, 2).double() Edge_thickness = model.Edge_thickness_model(node_features + node_out_features, 2).double() # 連続状態を作成 while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): # env.render('render_image/yatta.png') break Saved_Action = namedtuple('SavedAction', ['action', 'value']) Saved_prob_Action = namedtuple('SavedAction', ['log_prob']) Saved_mean_std_Action = namedtuple('SavedAction', ['mean', 'variance']) GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr) X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr) Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr) Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr) Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr) Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr)
def main(): # running_reward = 0 prior_efficiency = 0 continuous_trigger = 0 best_efficiency = -1000 best_epoch = 0 # 1エピソードのループ while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break nodes_pos, _, _, _ = env.extract_node_edge_info() first_node_num = nodes_pos.shape[0] # run inifinitely many episodes for epoch in tqdm(range(train_num)): # for epoch in count(1): # reset environment and episode reward while (1): new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness) env.reset() if env.confirm_graph_is_connected(): break state = env.reset() ep_reward = 0 continuous_trigger = 0 # for each episode, only run 9999 steps so that we don't # infinite loop while learning for t in range(max_action): # select action from policy action = select_action(first_node_num) nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info( ) # take the action state, _, done, info = env.step(action) if (t == (max_action - 1)) and (done is not True): # max_action内にてactionが終わらない時 reward = -final_penalty elif env.confirm_graph_is_connected(): efficiency = env.calculate_simulation() if continuous_trigger == 1: reward = efficiency - prior_efficiency else: reward = efficiency + continuous_reward continuous_trigger = 1 prior_efficiency = efficiency elif continuous_trigger == 1: reward = -penalty else: reward = 0 GCN.rewards.append(reward) ep_reward += reward if done: steps = t break # update cumulative reward # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward # perform backprop loss = finish_episode() # efficiencyの最終結果を求める if env.confirm_graph_is_connected(): result_efficiency = env.calculate_simulation() else: result_efficiency = -1 if best_efficiency < result_efficiency: best_epoch = epoch best_efficiency = result_efficiency save_model(save_name="Good") # env.render(os.path.join( # log_dir, 'render_image/{}.png'.format(epoch+1))) history['epoch'].append(epoch + 1) history['loss'].append(loss) history['ep_reward'].append(ep_reward) history['result_efficiency'].append(result_efficiency) history['steps'].append(steps + 1) # 学習履歴を保存 with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f: pickle.dump(history, f) with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: f.writelines( 'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n' % (epoch + 1, loss, ep_reward, result_efficiency)) with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f: f.writelines('epoch %d, best_efficiency: %.4f\n' % (best_epoch + 1, best_efficiency)) save_model(save_name="Last") plot_loss_history(history, os.path.join(log_dir, 'learning_loss_curve.png')) plot_reward_history(history, os.path.join(log_dir, 'learning_reward_curve.png')) plot_efficiency_history( history, os.path.join(log_dir, 'learning_effi_curve.png')) plot_steps_history(history, os.path.join(log_dir, 'learning_steps_curve.png'))
error_num = 0 tmax = 10000 # 共益勾配法の最大試行回数 eps = 1e-8 # 共益勾配法の許容閾値 for i in tqdm(range(1000)): nodes_pos, edges_indices, edges_thickness,\ input_nodes, input_vectors, frozen_nodes = \ make_random_fem_condition_with_ER(max_node_num, max_edge_pos) output_nodes = [0] output_vectors = np.array([[1, 1]]) result = compare_apdl_barfem(nodes_pos, edges_indices, edges_thickness, input_nodes, input_vectors, frozen_nodes, tmax, eps) if not result: error_num += 1 log_dir = "check_fem_by_ansys/conditions/error{}".format(error_num) os.makedirs(log_dir, exist_ok=True) np.save(os.path.join(log_dir, 'nodes_pos.npy'), nodes_pos) np.save(os.path.join(log_dir, 'edges_indices.npy'), edges_indices) np.save(os.path.join(log_dir, 'edges_thickness.npy'), edges_thickness) np.save(os.path.join(log_dir, 'input_nodes.npy'), input_nodes) np.save(os.path.join(log_dir, 'input_vectors.npy'), input_vectors) np.save(os.path.join(log_dir, 'frozen_nodes.npy'), frozen_nodes) env = BarFemGym(nodes_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() env.render(save_path=os.path.join(log_dir, "image.png"), display_number=True)
def actor_gcn_critic_gcn(max_episodes=5000, test_name="test", log_file=False, save_pth=False): """Actor-Criticを行う.Actor,CriticはGCN Actorの指定できるものは,ノード1とノード2であり,一つのエッジのみを選択できる. max_episodes:学習回数 test_name:保存ファイルの名前 log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる.""" history = {} history['epoch'] = [] history['result_efficiency'] = [] history['mean_efficiency'] = [] # a_meanの値の時のηの値を収納する history['a'] = [] history['a_mean'] = [] history['a_sigma'] = [] history['advantage'] = [] history['critic_value'] = [] log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name) assert not os.path.exists(log_dir), "already folder exists" if log_file: log_file = log_dir else: log_file = None os.makedirs(log_dir, exist_ok=True) node_pos, input_nodes, input_vectors,\ output_nodes, output_vectors, frozen_nodes,\ edges_indices, edges_thickness, frozen_nodes = easy_dev() env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes, output_vectors, frozen_nodes, edges_indices, edges_thickness, frozen_nodes) env.reset() max_steps = 1 lr_actor = 1e-4 lr_critic = 1e-3 weight_decay = 1e-2 gamma = 0.99 device = torch.device('cpu') actorNet = Select_node1_model(2, 1, 400, 400).to(device).double() actorNet2 = Select_node2_model(400 + 2, 400).to(device).double() criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double() edgethickNet = Edgethick_Actor(400).to(device).double() optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor) optimizer_actor2 = optim.Adam(actorNet2.parameters(), lr=lr_actor) optimizer_edgethick = optim.Adam(edgethickNet.parameters(), lr=lr_actor) optimizer_critic = optim.Adam(criticNet.parameters(), lr=lr_critic, weight_decay=weight_decay) for episode in tqdm(range(max_episodes)): if log_file: with open(os.path.join(log_dir, "progress.txt"), mode='a') as f: print('\nepoch:', episode, file=f) env.reset() nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info( ) for step in range(max_steps): action = select_action_gcn_critic_gcn(env, actorNet, actorNet2, criticNet, edgethickNet, device, log_dir=log_file) next_nodes_pos, _, done, _ = env.step(action) reward = env.calculate_simulation() criticNet.rewards.append(reward) loss = finish_episode(criticNet, actorNet, actorNet2, edgethickNet, optimizer_critic, optimizer_actor, optimizer_actor2, optimizer_edgethick, gamma, log_dir=log_file) history['epoch'].append(episode + 1) history['result_efficiency'].append(reward) if episode % 100 == 0: if save_pth: save_model(criticNet, edgethickNet, os.path.join(log_dir, "pth"), save_name=str(episode)) env.close() plot_efficiency_history(history, os.path.join(log_dir, 'learning_effi_curve.png')) return history
GCN = model.GCN_fund_model(node_features, 1, node_out_features, 3).double() X_Y = model.X_Y_model(node_out_features, 2).double() # なぜかdoubleが必要だった Stop = model.Stop_model(node_out_features, 2).double() Select_node1 = model.Select_node1_model(node_out_features, 2).double() Select_node2 = model.Select_node2_model(node_features + node_out_features, 2).double() Edge_thickness = model.Edge_thickness_model(node_features + node_out_features, 2).double() # gymに入力する要素を抽出 new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_main_node_edge_info( origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors, origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS) env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness, new_frozen_nodes) Saved_Action = namedtuple('SavedAction', ['action', 'value']) Saved_prob_Action = namedtuple('SavedAction', ['log_prob']) Saved_mean_std_Action = namedtuple('SavedAction', ['mean', 'variance']) GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr) X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr) Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr) Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr) Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr) Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr) def select_action(first_node_num): nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(