コード例 #1
0
def main():
    # running_reward = 0
    prior_efficiency = 0
    continuous_trigger = 0

    best_efficiency = -1000
    best_epoch = 0

    # 1エピソードのループ
    while (1):
        new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
            origin_nodes_positions, origin_edges_indices, origin_input_nodes,
            origin_input_vectors, origin_output_nodes, origin_output_vectors,
            origin_frozen_nodes, EDGE_THICKNESS)
        env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                        new_output_nodes, new_output_vectors, new_frozen_nodes,
                        new_edges_indices, new_edges_thickness)
        env.reset()
        if env.confirm_graph_is_connected():
            break
    nodes_pos, _, _, _ = env.extract_node_edge_info()
    first_node_num = nodes_pos.shape[0]

    # run inifinitely many episodes
    for epoch in tqdm(range(train_num)):
        # for epoch in count(1):

        # reset environment and episode reward
        while (1):
            new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
                origin_nodes_positions, origin_edges_indices,
                origin_input_nodes, origin_input_vectors, origin_output_nodes,
                origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
            env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                            new_output_nodes, new_output_vectors,
                            new_frozen_nodes, new_edges_indices,
                            new_edges_thickness)
            env.reset()
            if env.confirm_graph_is_connected():
                break
        state = env.reset()
        ep_reward = 0
        continuous_trigger = 0

        # for each episode, only run 9999 steps so that we don't
        # infinite loop while learning
        for t in range(max_action):
            # select action from policy
            action = select_action(first_node_num)
            nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info(
            )

            # take the action
            state, _, done, info = env.step(action)
            if (t == (max_action -
                      1)) and (done is not True):  # max_action内にてactionが終わらない時
                reward = -final_penalty
            elif env.confirm_graph_is_connected():
                efficiency = env.calculate_simulation()
                if continuous_trigger == 1:
                    reward = efficiency - prior_efficiency
                else:
                    reward = efficiency + continuous_reward
                    continuous_trigger = 1
                prior_efficiency = efficiency

            elif continuous_trigger == 1:
                reward = -penalty
            else:
                reward = 0

            GCN.rewards.append(reward)

            ep_reward += reward
            if done:
                steps = t
                break

        # update cumulative reward
        # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward

        # perform backprop
        loss = finish_episode()

        # efficiencyの最終結果を求める
        if env.confirm_graph_is_connected():
            result_efficiency = env.calculate_simulation()
        else:
            result_efficiency = -1

        if best_efficiency < result_efficiency:
            best_epoch = epoch
            best_efficiency = result_efficiency
            save_model(save_name="Good")
            # env.render(os.path.join(
            #    log_dir, 'render_image/{}.png'.format(epoch+1)))

        history['epoch'].append(epoch + 1)
        history['loss'].append(loss)
        history['ep_reward'].append(ep_reward)
        history['result_efficiency'].append(result_efficiency)
        history['steps'].append(steps + 1)

        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            f.writelines(
                'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n'
                % (epoch + 1, loss, ep_reward, result_efficiency))
        with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f:
            f.writelines('epoch %d,  best_efficiency: %.4f\n' %
                         (best_epoch + 1, best_efficiency))
        save_model(save_name="Last")

        plot_loss_history(history,
                          os.path.join(log_dir, 'learning_loss_curve.png'))
        plot_reward_history(history,
                            os.path.join(log_dir, 'learning_reward_curve.png'))
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        plot_steps_history(history,
                           os.path.join(log_dir, 'learning_steps_curve.png'))
コード例 #2
0
Select_node2 = model.Select_node2_model(node_features + node_out_features,
                                        2).double()
Edge_thickness = model.Edge_thickness_model(node_features + node_out_features,
                                            2).double()

# 連続状態を作成
while (1):
    new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
        origin_nodes_positions, origin_edges_indices, origin_input_nodes,
        origin_input_vectors, origin_output_nodes, origin_output_vectors,
        origin_frozen_nodes, EDGE_THICKNESS)
    env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                    new_output_nodes, new_output_vectors, new_frozen_nodes,
                    new_edges_indices, new_edges_thickness)
    env.reset()
    if env.confirm_graph_is_connected():
        # env.render('render_image/yatta.png')
        break

Saved_Action = namedtuple('SavedAction', ['action', 'value'])
Saved_prob_Action = namedtuple('SavedAction', ['log_prob'])
Saved_mean_std_Action = namedtuple('SavedAction', ['mean', 'variance'])
GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr)
X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr)
Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr)
Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr)
Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr)
Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr)


def select_action(first_node_num):