def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,ノード1とノード2であり,一つのエッジのみを選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
    history['a'] = []
    history['a_mean'] = []
    history['a_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    actorNet = Select_node1_model(2, 1, 400, 400).to(device).double()
    actorNet2 = Select_node2_model(400 + 2, 400).to(device).double()
    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(400).to(device).double()
    optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
    optimizer_actor2 = optim.Adam(actorNet2.parameters(), lr=lr_actor)
    optimizer_edgethick = optim.Adam(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  actorNet,
                                                  actorNet2,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation()
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              actorNet,
                              actorNet2,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_actor,
                              optimizer_actor2,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           edgethickNet,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
def load_actor_gcn_critic_gcn(load_dir,
                              load_epoch,
                              max_episodes=5000,
                              test_name="test",
                              history=None,
                              log_file=False):
    """ActorCriticにおいて保存されpthをロードし,そこから学習を開始する.

    Args:
        load_dir ([type]): ロードする対象のpthが複数存在するディレクトリのパスを指定する.
        load_epoch ([type]): いつのepochから学習を開始するかを決める.
        max_episodes (int, optional): 学習回数. Defaults to 5000.
        test_name (str, optional): 保存ファイルの名前. Defaults to "test".
        history ([type], optional): 保存したhistory.これを指定した時,グラフにもロード結果が適用される. Defaults to None.
        log_file (bool, optional): Trueにすると,progress.txtに損失関数などの情報のログをとる. Defaults to False.
    """

    if history is None:
        history = {}
        history['epoch'] = []
        history['result_efficiency'] = []
        history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
        history['a'] = []
        history['a_mean'] = []
        history['a_sigma'] = []
        history['advantage'] = []
        history['critic_value'] = []
    else:
        for key in history.keys():
            history[key] = history[key][:load_epoch]

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(2, 1, 400, 400).to(device).double()

    criticNet.load_state_dict(
        torch.load(
            os.path.join(load_dir, "pth/{}_criticNet.pth".format(load_epoch))))
    edgethickNet.load_state_dict(
        torch.load(
            os.path.join(load_dir,
                         "pth/{}_edgethickNet.pth".format(load_epoch))))

    optimizer_edgethick = optim.SGD(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(load_epoch, max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file,
                                                  history=history)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
Exemple #3
0
def main():
    # running_reward = 0
    prior_efficiency = 0
    continuous_trigger = 0

    best_efficiency = -1000
    best_epoch = 0

    # 1エピソードのループ
    while (1):
        new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
            origin_nodes_positions, origin_edges_indices, origin_input_nodes,
            origin_input_vectors, origin_output_nodes, origin_output_vectors,
            origin_frozen_nodes, EDGE_THICKNESS)
        env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                        new_output_nodes, new_output_vectors, new_frozen_nodes,
                        new_edges_indices, new_edges_thickness)
        env.reset()
        if env.confirm_graph_is_connected():
            break
    nodes_pos, _, _, _ = env.extract_node_edge_info()
    first_node_num = nodes_pos.shape[0]

    # run inifinitely many episodes
    for epoch in tqdm(range(train_num)):
        # for epoch in count(1):

        # reset environment and episode reward
        while (1):
            new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
                origin_nodes_positions, origin_edges_indices,
                origin_input_nodes, origin_input_vectors, origin_output_nodes,
                origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
            env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                            new_output_nodes, new_output_vectors,
                            new_frozen_nodes, new_edges_indices,
                            new_edges_thickness)
            env.reset()
            if env.confirm_graph_is_connected():
                break
        state = env.reset()
        ep_reward = 0
        continuous_trigger = 0

        # for each episode, only run 9999 steps so that we don't
        # infinite loop while learning
        for t in range(max_action):
            # select action from policy
            action = select_action(first_node_num)
            nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info(
            )

            # take the action
            state, _, done, info = env.step(action)
            if (t == (max_action -
                      1)) and (done is not True):  # max_action内にてactionが終わらない時
                reward = -final_penalty
            elif env.confirm_graph_is_connected():
                efficiency = env.calculate_simulation()
                if continuous_trigger == 1:
                    reward = efficiency - prior_efficiency
                else:
                    reward = efficiency + continuous_reward
                    continuous_trigger = 1
                prior_efficiency = efficiency

            elif continuous_trigger == 1:
                reward = -penalty
            else:
                reward = 0

            GCN.rewards.append(reward)

            ep_reward += reward
            if done:
                steps = t
                break

        # update cumulative reward
        # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward

        # perform backprop
        loss = finish_episode()

        # efficiencyの最終結果を求める
        if env.confirm_graph_is_connected():
            result_efficiency = env.calculate_simulation()
        else:
            result_efficiency = -1

        if best_efficiency < result_efficiency:
            best_epoch = epoch
            best_efficiency = result_efficiency
            save_model(save_name="Good")
            # env.render(os.path.join(
            #    log_dir, 'render_image/{}.png'.format(epoch+1)))

        history['epoch'].append(epoch + 1)
        history['loss'].append(loss)
        history['ep_reward'].append(ep_reward)
        history['result_efficiency'].append(result_efficiency)
        history['steps'].append(steps + 1)

        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            f.writelines(
                'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n'
                % (epoch + 1, loss, ep_reward, result_efficiency))
        with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f:
            f.writelines('epoch %d,  best_efficiency: %.4f\n' %
                         (best_epoch + 1, best_efficiency))
        save_model(save_name="Last")

        plot_loss_history(history,
                          os.path.join(log_dir, 'learning_loss_curve.png'))
        plot_reward_history(history,
                            os.path.join(log_dir, 'learning_reward_curve.png'))
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        plot_steps_history(history,
                           os.path.join(log_dir, 'learning_steps_curve.png'))
def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,一つのエッジのみの幅を選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['x'] = []
    history['x_mean'] = []
    history['x_sigma'] = []
    history['y'] = []
    history['y_mean'] = []
    history['y_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    x_y_Net = X_Y_Actor(2, 1, 400, 400).to(device).double()
    node1Net = Select_node1_model(2, 1, 400, 400).to(device).double()
    node2Net = Select_node2_model(400 + 2, 400).to(
        device).double()  # 400+2における400は,Select_node1_modelのinput3の部分に対応
    optimizer_node1 = optim.Adam(node1Net.parameters(), lr=lr_actor)
    optimizer_node2 = optim.Adam(node2Net.parameters(), lr=lr_actor)
    optimizer_xy = optim.Adam(x_y_Net.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                        output_vectors, frozen_nodes, edges_indices,
                        edges_thickness, frozen_nodes)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        action = select_action_gcn_critic_gcn(env,
                                              criticNet,
                                              node1Net,
                                              node2Net,
                                              x_y_Net,
                                              device,
                                              log_dir=log_file,
                                              history=history)
        next_nodes_pos, _, done, _ = env.step(action)
        if 4 in action['which_node']:
            env.input_nodes = [2, 4]
            env.input_vectors = np.array([[1, 0], [0, 1]])
        if 2 in action['which_node'] and 4 in action[
                'which_node']:  # TODO [2,4]を選択しないように学習させる
            reward = np.array([0])
        else:
            reward = env.calculate_simulation()
        criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              x_y_Net,
                              node1Net,
                              node2Net,
                              optimizer_critic,
                              optimizer_xy,
                              optimizer_node1,
                              optimizer_node2,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           x_y_Net,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history, f)

    return history