def actor_gcn_critic_gcn_mean(test_num=5,
                              max_episodes=5000,
                              test_name="test",
                              log_file=None):
    """Actor-Criticの5回実験したときの平均グラフを作成する関数"""

    log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name)
    assert not os.path.exists(log_dir), "already folder exists"
    os.makedirs(log_dir, exist_ok=True)

    history = {}
    for i in range(test_num):
        history["{}".format(i)] = actor_gcn_critic_gcn(
            max_episodes=max_episodes,
            test_name=os.path.join(test_name, str(i)),
            log_file=log_file)

    mean = np.stack([
        history["{}".format(i)]['result_efficiency'] for i in range(test_num)
    ])
    std = np.std(mean[:, -1])
    print('最終結果の標準偏差:', std)
    mean = np.mean(mean, axis=0)

    meanhistory = {}
    meanhistory['epoch'] = history['0']['epoch']
    meanhistory['result_efficiency'] = mean

    # 学習履歴を保存
    with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history, f)

    plot_efficiency_history(
        meanhistory, os.path.join(log_dir, 'mean_learning_effi_curve.png'))
def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,ノード1とノード2であり,一つのエッジのみを選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
    history['a'] = []
    history['a_mean'] = []
    history['a_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    actorNet = Select_node1_model(2, 1, 400, 400).to(device).double()
    actorNet2 = Select_node2_model(400 + 2, 400).to(device).double()
    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(400).to(device).double()
    optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
    optimizer_actor2 = optim.Adam(actorNet2.parameters(), lr=lr_actor)
    optimizer_edgethick = optim.Adam(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  actorNet,
                                                  actorNet2,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation()
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              actorNet,
                              actorNet2,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_actor,
                              optimizer_actor2,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           edgethickNet,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
Example #3
0
                                origin_output_vectors, origin_frozen_nodes,
                                current_edges_indices, current_edges_thickness)

        if gif:
            os.makedirs(os.path.join(log_dir, 'epochs/epoch{}'.format(epoch)),
                        exist_ok=True)
            save_graph_info_npy(
                os.path.join(log_dir, 'epochs/epoch{}'.format(epoch)),
                origin_nodes_positions, barfem_input_nodes,
                origin_input_vectors, barfem_output_nodes,
                origin_output_vectors, origin_frozen_nodes,
                current_edges_indices, current_edges_thickness)
            env = BarFemOutputGym(origin_nodes_positions, barfem_input_nodes,
                                  origin_input_vectors, barfem_output_nodes,
                                  origin_output_vectors, origin_frozen_nodes,
                                  current_edges_indices,
                                  current_edges_thickness, origin_frozen_nodes)
            env.reset()
            env.render(os.path.join(log_dir,
                                    'render_image/{}.png'.format(epoch)),
                       edge_size=100)
            #render_graph(origin_nodes_positions, current_edges_indices, current_edges_thickness, os.path.join(log_dir, 'render_image/{}.png'.format(epoch)), display_number=False)

        history['epoch'].append(epoch + 1)
        history['result_efficiency'].append(current_efficiency)
        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
Example #4
0
def main():
    # running_reward = 0
    prior_efficiency = 0
    continuous_trigger = 0

    best_efficiency = -1000
    best_epoch = 0

    # 1エピソードのループ
    while (1):
        new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
            origin_nodes_positions, origin_edges_indices, origin_input_nodes,
            origin_input_vectors, origin_output_nodes, origin_output_vectors,
            origin_frozen_nodes, EDGE_THICKNESS)
        env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                        new_output_nodes, new_output_vectors, new_frozen_nodes,
                        new_edges_indices, new_edges_thickness)
        env.reset()
        if env.confirm_graph_is_connected():
            break
    nodes_pos, _, _, _ = env.extract_node_edge_info()
    first_node_num = nodes_pos.shape[0]

    # run inifinitely many episodes
    for epoch in tqdm(range(train_num)):
        # for epoch in count(1):

        # reset environment and episode reward
        while (1):
            new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
                origin_nodes_positions, origin_edges_indices,
                origin_input_nodes, origin_input_vectors, origin_output_nodes,
                origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
            env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                            new_output_nodes, new_output_vectors,
                            new_frozen_nodes, new_edges_indices,
                            new_edges_thickness)
            env.reset()
            if env.confirm_graph_is_connected():
                break
        state = env.reset()
        ep_reward = 0
        continuous_trigger = 0

        # for each episode, only run 9999 steps so that we don't
        # infinite loop while learning
        for t in range(max_action):
            # select action from policy
            action = select_action(first_node_num)
            nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info(
            )

            # take the action
            state, _, done, info = env.step(action)
            if (t == (max_action -
                      1)) and (done is not True):  # max_action内にてactionが終わらない時
                reward = -final_penalty
            elif env.confirm_graph_is_connected():
                efficiency = env.calculate_simulation()
                if continuous_trigger == 1:
                    reward = efficiency - prior_efficiency
                else:
                    reward = efficiency + continuous_reward
                    continuous_trigger = 1
                prior_efficiency = efficiency

            elif continuous_trigger == 1:
                reward = -penalty
            else:
                reward = 0

            GCN.rewards.append(reward)

            ep_reward += reward
            if done:
                steps = t
                break

        # update cumulative reward
        # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward

        # perform backprop
        loss = finish_episode()

        # efficiencyの最終結果を求める
        if env.confirm_graph_is_connected():
            result_efficiency = env.calculate_simulation()
        else:
            result_efficiency = -1

        if best_efficiency < result_efficiency:
            best_epoch = epoch
            best_efficiency = result_efficiency
            save_model(save_name="Good")
            # env.render(os.path.join(
            #    log_dir, 'render_image/{}.png'.format(epoch+1)))

        history['epoch'].append(epoch + 1)
        history['loss'].append(loss)
        history['ep_reward'].append(ep_reward)
        history['result_efficiency'].append(result_efficiency)
        history['steps'].append(steps + 1)

        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            f.writelines(
                'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n'
                % (epoch + 1, loss, ep_reward, result_efficiency))
        with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f:
            f.writelines('epoch %d,  best_efficiency: %.4f\n' %
                         (best_epoch + 1, best_efficiency))
        save_model(save_name="Last")

        plot_loss_history(history,
                          os.path.join(log_dir, 'learning_loss_curve.png'))
        plot_reward_history(history,
                            os.path.join(log_dir, 'learning_reward_curve.png'))
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        plot_steps_history(history,
                           os.path.join(log_dir, 'learning_steps_curve.png'))
def load_actor_gcn_critic_gcn(load_dir,
                              load_epoch,
                              max_episodes=5000,
                              test_name="test",
                              history=None,
                              log_file=False):
    """ActorCriticにおいて保存されpthをロードし,そこから学習を開始する.

    Args:
        load_dir ([type]): ロードする対象のpthが複数存在するディレクトリのパスを指定する.
        load_epoch ([type]): いつのepochから学習を開始するかを決める.
        max_episodes (int, optional): 学習回数. Defaults to 5000.
        test_name (str, optional): 保存ファイルの名前. Defaults to "test".
        history ([type], optional): 保存したhistory.これを指定した時,グラフにもロード結果が適用される. Defaults to None.
        log_file (bool, optional): Trueにすると,progress.txtに損失関数などの情報のログをとる. Defaults to False.
    """

    if history is None:
        history = {}
        history['epoch'] = []
        history['result_efficiency'] = []
        history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
        history['a'] = []
        history['a_mean'] = []
        history['a_sigma'] = []
        history['advantage'] = []
        history['critic_value'] = []
    else:
        for key in history.keys():
            history[key] = history[key][:load_epoch]

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(2, 1, 400, 400).to(device).double()

    criticNet.load_state_dict(
        torch.load(
            os.path.join(load_dir, "pth/{}_criticNet.pth".format(load_epoch))))
    edgethickNet.load_state_dict(
        torch.load(
            os.path.join(load_dir,
                         "pth/{}_edgethickNet.pth".format(load_epoch))))

    optimizer_edgethick = optim.SGD(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(load_epoch, max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file,
                                                  history=history)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,一つのエッジのみの幅を選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['x'] = []
    history['x_mean'] = []
    history['x_sigma'] = []
    history['y'] = []
    history['y_mean'] = []
    history['y_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    x_y_Net = X_Y_Actor(2, 1, 400, 400).to(device).double()
    node1Net = Select_node1_model(2, 1, 400, 400).to(device).double()
    node2Net = Select_node2_model(400 + 2, 400).to(
        device).double()  # 400+2における400は,Select_node1_modelのinput3の部分に対応
    optimizer_node1 = optim.Adam(node1Net.parameters(), lr=lr_actor)
    optimizer_node2 = optim.Adam(node2Net.parameters(), lr=lr_actor)
    optimizer_xy = optim.Adam(x_y_Net.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                        output_vectors, frozen_nodes, edges_indices,
                        edges_thickness, frozen_nodes)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        action = select_action_gcn_critic_gcn(env,
                                              criticNet,
                                              node1Net,
                                              node2Net,
                                              x_y_Net,
                                              device,
                                              log_dir=log_file,
                                              history=history)
        next_nodes_pos, _, done, _ = env.step(action)
        if 4 in action['which_node']:
            env.input_nodes = [2, 4]
            env.input_vectors = np.array([[1, 0], [0, 1]])
        if 2 in action['which_node'] and 4 in action[
                'which_node']:  # TODO [2,4]を選択しないように学習させる
            reward = np.array([0])
        else:
            reward = env.calculate_simulation()
        criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              x_y_Net,
                              node1Net,
                              node2Net,
                              optimizer_critic,
                              optimizer_xy,
                              optimizer_node1,
                              optimizer_node2,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           x_y_Net,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history, f)

    return history
Example #7
0
def ddpg():
    """DDPGを利用して強化学習を行う.
    Actorの指定できる幅は0.1-1となっている"""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []

    max_episodes = 500
    memory_capacity = 1e6  # バッファの容量
    gamma = 0.99  # 割引率
    tau = 1e-3  # ターゲットの更新率
    epsilon = 1.0  # ノイズの量をいじりたい場合、多分いらない
    batch_size = 64
    lr_actor = 1e-4
    lr_critic = 1e-3
    logger_interval = 10
    weight_decay = 1e-2

    test_name = "ddpg_500_v2"  # 実験名
    log_dir = "confirm/step1/ddpg_results/{}".format(test_name)
    assert not os.path.exists(log_dir), "already folder exists"
    os.makedirs(log_dir)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = Step1Gym(node_pos, input_nodes, input_vectors, output_nodes,
                   output_vectors, frozen_nodes, edges_indices,
                   edges_thickness, frozen_nodes)
    env.reset()
    num_state = 2
    num_action = 1
    max_steps = 1

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    actorNet = ActorNetwork(num_state, num_action).to(device)
    criticNet = CriticNetwork(num_state, num_action).to(device)
    optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)
    replay_buffer = ReplayBuffer(capacity=memory_capacity)
    agent = DDPG(actorNet, criticNet, optimizer_actor, optimizer_critic,
                 replay_buffer, device, gamma, tau, epsilon, batch_size)

    for episode in range(max_episodes):
        observation = env.reset()
        observation = np.array([0, 1])
        total_reward = 0

        for step in range(max_steps):
            edges_thickness = agent.get_action(observation)
            action = {}
            action['which_node'] = np.array([0, 1])
            action['end'] = 0
            action['edge_thickness'] = edges_thickness
            action['new_node'] = np.array([[0, 2]])

            next_observation, _, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            next_observation = np.array([0, 1])
            total_reward += reward
            agent.add_memory(observation, edges_thickness, next_observation,
                             reward, done)
            agent.train()
            observation = next_observation
            if done:
                break

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        if reward < 0:
            print(edges_thickness)

        if episode % logger_interval == 0:
            print("episode:{} total reward:{}".format(episode, total_reward))

    for episode in range(3):
        observation = env.reset()
        observation = np.array([0, 1])
        for step in range(max_steps):
            edges_thickness = agent.get_action(observation, greedy=True)
            action = {}
            action['which_node'] = np.array([0, 1])
            action['end'] = 0
            action['edge_thickness'] = edges_thickness
            action['new_node'] = np.array([[0, 2]])

            next_observation, reward, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            observation = np.array([0, 1])

            if done:
                break

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))
Example #8
0
def actor_critic_mean(max_episodes, test_name):
    """Actor-Criticの5回実験したときの平均グラフを作成する関数"""

    test_num = 5

    log_dir = "confirm/step1/ac_results/{}".format(test_name)
    assert not os.path.exists(log_dir), "already folder exists"
    os.makedirs(log_dir, exist_ok=True)

    history = {}
    for i in range(test_num):
        history["{}".format(i)] = {}
        history["{}".format(i)]['epoch'] = []
        history["{}".format(i)]['result_efficiency'] = []

        node_pos, input_nodes, input_vectors,\
            output_nodes, output_vectors, frozen_nodes,\
            edges_indices, edges_thickness, frozen_nodes = easy_dev()

        max_steps = 1
        lr_actor = 1e-4
        lr_critic = 1e-3
        weight_decay = 1e-2
        gamma = 0.99

        device = torch.device('cpu')

        actorNet = Edgethick_Actor().to(device)
        criticNet = Edgethick_Critic().to(device)
        optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
        optimizer_critic = optim.Adam(criticNet.parameters(),
                                      lr=lr_critic,
                                      weight_decay=weight_decay)

        for episode in tqdm(range(max_episodes)):
            observation = np.array([0, 1])

            for step in range(max_steps):
                action = select_action(observation, actorNet, criticNet,
                                       device)
                edges_thickness = action['edge_thickness']
                displacement = barfem(node_pos,
                                      edges_indices,
                                      edges_thickness,
                                      input_nodes,
                                      input_vectors,
                                      frozen_nodes,
                                      mode="force")
                reward = displacement[1 * 3]
                criticNet.rewards.append(reward)

            loss = finish_episode(criticNet, actorNet, optimizer_critic,
                                  optimizer_actor, gamma)

            history["{}".format(i)]['epoch'].append(episode + 1)
            history["{}".format(i)]['result_efficiency'].append(reward)

            if episode % 1000 == 0:
                print("episode:{} total reward:{}".format(episode, reward))

        plot_efficiency_history(
            history["{}".format(i)],
            os.path.join(log_dir, 'learning_effi_curve{}.png'.format(i)))

    mean = np.stack([
        history["{}".format(i)]['result_efficiency'] for i in range(test_num)
    ])
    std = np.std(mean[:, -1])
    print('最終結果の標準偏差:', std)
    mean = np.mean(mean, axis=0)

    meanhistory = {}
    meanhistory['epoch'] = history['0']['epoch']
    meanhistory['result_efficiency'] = mean

    # 学習履歴を保存
    with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history, f)

    plot_efficiency_history(
        meanhistory, os.path.join(log_dir, 'mean_learning_effi_curve.png'))
Example #9
0
def actor_critic():
    """Actor-Criticを行う.学習はDL(GCN以外)で
    Actorの指定できる幅は0.1-1となっている"""

    max_episodes = 500
    test_name = "500_moto"  # 実験名

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []

    log_dir = "confirm/step1/ac_results/{}".format(test_name)
    assert not os.path.exists(log_dir), "already folder exists"
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = Step1Gym(node_pos, input_nodes, input_vectors, output_nodes,
                   output_vectors, frozen_nodes, edges_indices,
                   edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    actorNet = Edgethick_Actor().to(device)
    criticNet = Edgethick_Critic().to(device)
    optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in range(max_episodes):
        observation = env.reset()
        observation = np.array([0, 1])

        for step in range(max_steps):
            action = select_action(observation, actorNet, criticNet, device)

            next_observation, _, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet, actorNet, optimizer_critic,
                              optimizer_actor, gamma)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)

        if episode % 10 == 0:
            print("episode:{} total reward:{}".format(episode, reward))

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))