Example #1
0
def train(episodes, agent, env, size_board, ep_update_target, interval_mean,
          dueling, batch_size, hidden_dim_1, hidden_dim_2, hidden_dim_3,
          screen):
    # 数据可视化
    writer = SummaryWriter(log_dir="data/test2")

    rewards_per_episode = []
    loss_per_episode = []
    steps_per_episode = []
    scores_per_episode = []
    threshold = []
    decay_step = 0
    best_score = 0

    for ep in range(episodes):
        print(ep)

        done = 0
        state, valid_movements = env.reset()
        loss_ep = []
        episode_rewards = []
        steps = 0

        while True:

            draw(state.flatten(), screen)
            steps += 1

            action = agent.selection_action(valid_movements, state.flatten())

            eps_threshold = agent.get_threshold()
            threshold.append(eps_threshold)

            next_state, reward, done, info = env.step(action)

            episode_rewards.append(reward)

            if done == 1:

                steps_per_episode.append(steps)

                rewards_per_episode.append(np.sum(episode_rewards))
                loss_per_episode.append(np.sum(loss_ep) / steps)
                scores_per_episode.append(info["total_score"])

                # loss可视化
                writer.add_scalar("data/test2/loss_groups", np.sum(loss_ep),
                                  ep)

                writer.add_scalar("data/test2/score_groups", reward, ep)

                if info["total_score"] > best_score:
                    best_score = info["total_score"]
                    best_reward = np.sum(episode_rewards)
                    best_ep = ep
                    best_board = deepcopy(next_state)
                    print(best_board)
                    best_steps = steps
                    agent.save_model("./test.pth")

                agent.store_memory(state.flatten(), next_state.flatten(),
                                   action, reward, done)

                next_state = np.zeros((1, size_board * size_board))
            else:
                # print(next_state)
                agent.store_memory(state.flatten(), next_state.flatten(),
                                   action, reward, done)

                state = deepcopy(next_state)

                valid_movements = info["valid_movements"]

            loss = agent.train_model()

            if loss != -1:

                loss_ep.append(loss)

            if done == 1:
                break

        if ep % ep_update_target == 0:
            print("Update")
            agent.update_target_net()

    device = torch.device("cuda")
    batch_state = torch.tensor(state.flatten(),
                               dtype=torch.float32).view(1, 1, -1)

    with SummaryWriter(log_dir="data/test2", comment='DQN_NET') as w:

        w.add_graph(
            Net1(size_board * size_board, hidden_dim_1, hidden_dim_2,
                 hidden_dim_3, 4), (batch_state, ))
    writer.export_scalars_to_json("data/all_scalars.json")
    writer.close()

    print("***********************")
    print("Best ep", best_ep)
    print("Best Board:")
    print(best_board)
    print("Best step", best_steps)
    print("Best score", best_score)
    if dueling is True:
        print("Dueling type")
    else:
        print("No-dueling type")
    print("Update Target_Net period", ep_update_target)
    print("Batch size", batch_size)
    print("***********************")
    agent.save_model("./test.pth")

    plot_info(
        steps_per_episode,
        rewards_per_episode,
        loss_per_episode,
        scores_per_episode,
        interval_mean,
        episodes,
        threshold,
    )
Example #2
0
def play(env, agent, episodes, interval_mean, screen):

    rewards_per_episode = []
    loss_per_episode = []
    steps_per_episode = []
    scores_per_episode = []
    threshold = []
    decay_step = 0
    best_score = 0
    best_ep = 0
    best_board = 0
    best_steps = 0
    best_reward = 0
    ep = 0
    for ep in range(episodes):
        # while True:
        print(ep)

        done = 0
        state, valid_movements = env.reset()
        loss_ep = []
        episode_rewards = []
        steps = 0

        while True:

            done = 0
            draw(state.flatten(), screen)
            steps += 1

            action = agent.selection_action(valid_movements, state.flatten())
            next_state, reward, done, info = env.step(action)

            if done == 1:

                steps_per_episode.append(steps)

                rewards_per_episode.append(np.sum(episode_rewards))
                loss_per_episode.append(np.sum(loss_ep) / steps)
                scores_per_episode.append(info["total_score"])

                if info["total_score"] > best_score:
                    best_score = info["total_score"]
                    best_reward = np.sum(episode_rewards)
                    best_ep = ep
                    best_board = deepcopy(next_state)
                    best_steps = steps
                    print(best_board)

            else:

                state = deepcopy(next_state)

                valid_movements = info["valid_movements"]

            if done == 1:
                break

    print("***********************")
    print("Best ep{}".format(best_ep))
    print("Best reward", best_reward)
    print("Best Board:")
    print(best_board)
    print("Best step", best_steps)
    print("Best score", best_score)

    plot_info(
        steps_per_episode,
        rewards_per_episode,
        loss_per_episode,
        scores_per_episode,
        interval_mean,
        episodes,
        threshold,
    )
Example #3
0
def train(
    dqn_net,
    target_net,
    env,
    memory,
    batch_size,
    size_board,
    episodes,
    ep_update_target,
    decay_rate,
    explore_start,
    explore_stop,
    learning_rate,
    gamma,
    interval_mean,
):

    # Using GPU or CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dqn_net.to(device)
    target_net.to(device)

    print("Starting training...")
    decay_step = 0

    total_steps_per_episode = []
    total_rewards_per_episode = []
    total_loss_per_episode = []
    total_score_per_episode = []

    best_board = None
    best_reward = 0
    best_score = 0
    best_steps = 0
    best_ep = -1

    # Optimizer
    optimizer = optim.RMSprop(dqn_net.parameters(), lr=learning_rate)

    for ep in range(episodes):
        # Set step to 0
        step = 0

        # Rewards of the episode
        episode_rewards = []
        board, valid_movements = env.reset()
        state = to_power_two_matrix(board)
        done = False
        loss_ep = []

        while True:
            step += 1

            # Increase decay step to choose net output instead random action
            decay_step += 1

            # Make a action
            eps_threshold = explore_stop + (explore_start -
                                            explore_stop) * np.exp(
                                                -decay_rate * decay_step)
            action = selection_action(eps_threshold, valid_movements, dqn_net,
                                      state, size_board, device)
            new_board, reward, done, info = env.step(action)

            # Add episode reward inside list
            episode_rewards.append(reward)

            if done:
                total_steps_per_episode.append(step)

                next_state = np.zeros((1, size_board, size_board, 16))

                total_reward = np.sum(episode_rewards)

                total_rewards_per_episode.append(total_reward)

                memory.store(state, action, reward, next_state, done)

                loss_total_ep = np.sum(loss_ep) / step
                total_loss_per_episode.append(loss_total_ep)

                total_score_per_episode.append(info["total_score"])

                print("Episode:", ep)
                print("Total Reward:", total_reward)
                print("Total episodes", step)
                print("Eps_threshold:", eps_threshold)
                print("Loss ep:", loss_total_ep)
                env.render()
                print("---------------------------")

                if info["total_score"] > best_score:
                    best_score = info["total_score"]
                    best_reward = total_reward
                    best_ep = ep
                    best_board = deepcopy(new_board)
                    best_steps = step

            else:
                next_state = to_power_two_matrix(new_board)

                memory.store(state, action, reward, next_state, done)

                state = deepcopy(next_state)

                # Valid movements
                valid_movements = info["valid_movements"]

                # Change board
                board = deepcopy(new_board)

            # Learning part
            loss = optimize_model(
                dqn_net,
                target_net,
                memory,
                learning_rate,
                batch_size,
                size_board,
                gamma,
                optimizer,
                device,
            )

            loss_ep.append(loss)

            if done:
                break

        # Update target net
        if ep % ep_update_target == 0:
            print("Update target_net")
            target_net = deepcopy(dqn_net)

    print("***********************")
    print("Best ep", best_ep)
    print("Best Board:")
    print(best_board)
    print("Best step", best_steps)
    print("Best score", best_score)
    print("***********************")

    plot_info(
        total_steps_per_episode,
        total_rewards_per_episode,
        total_loss_per_episode,
        total_score_per_episode,
        interval_mean,
        episodes,
    )
                   action='store_true')
    p.add_argument('--number_of_medoids',
                   help='Number of medoids to find. Default = 10',
                   default=10,
                   type=int)
    p.add_argument('--numlocal',
                   help='Number of local minimum to obtain. Default = 20',
                   default=20,
                   type=int)
    p.add_argument('--maxneighbor',
                   help='Maximal number of neighbors in claster. Default = 80',
                   default=80,
                   type=int)
    p.add_argument('--output',
                   help='Output file name. Default = output.txt',
                   default='output.txt')
    p.add_argument('--input',
                   help='Input file name. Default = data.txt',
                   default='data.txt')
    args = p.parse_args()

    objects = read_from_file(args.input, polygons=args.polygons)

    clarans_model = Clarans(objects, args.numlocal, args.maxneighbor,
                            args.number_of_medoids, args.polygons)
    medoids, objects = clarans_model.run()

    write_to_file(args.output, objects, polygons=args.polygons)
    plot_info(medoids, objects)
    plot_data(objects, medoids=medoids, clusters=True, polygons=args.polygons)