def train(episodes, agent, env, size_board, ep_update_target, interval_mean, dueling, batch_size, hidden_dim_1, hidden_dim_2, hidden_dim_3, screen): # 数据可视化 writer = SummaryWriter(log_dir="data/test2") rewards_per_episode = [] loss_per_episode = [] steps_per_episode = [] scores_per_episode = [] threshold = [] decay_step = 0 best_score = 0 for ep in range(episodes): print(ep) done = 0 state, valid_movements = env.reset() loss_ep = [] episode_rewards = [] steps = 0 while True: draw(state.flatten(), screen) steps += 1 action = agent.selection_action(valid_movements, state.flatten()) eps_threshold = agent.get_threshold() threshold.append(eps_threshold) next_state, reward, done, info = env.step(action) episode_rewards.append(reward) if done == 1: steps_per_episode.append(steps) rewards_per_episode.append(np.sum(episode_rewards)) loss_per_episode.append(np.sum(loss_ep) / steps) scores_per_episode.append(info["total_score"]) # loss可视化 writer.add_scalar("data/test2/loss_groups", np.sum(loss_ep), ep) writer.add_scalar("data/test2/score_groups", reward, ep) if info["total_score"] > best_score: best_score = info["total_score"] best_reward = np.sum(episode_rewards) best_ep = ep best_board = deepcopy(next_state) print(best_board) best_steps = steps agent.save_model("./test.pth") agent.store_memory(state.flatten(), next_state.flatten(), action, reward, done) next_state = np.zeros((1, size_board * size_board)) else: # print(next_state) agent.store_memory(state.flatten(), next_state.flatten(), action, reward, done) state = deepcopy(next_state) valid_movements = info["valid_movements"] loss = agent.train_model() if loss != -1: loss_ep.append(loss) if done == 1: break if ep % ep_update_target == 0: print("Update") agent.update_target_net() device = torch.device("cuda") batch_state = torch.tensor(state.flatten(), dtype=torch.float32).view(1, 1, -1) with SummaryWriter(log_dir="data/test2", comment='DQN_NET') as w: w.add_graph( Net1(size_board * size_board, hidden_dim_1, hidden_dim_2, hidden_dim_3, 4), (batch_state, )) writer.export_scalars_to_json("data/all_scalars.json") writer.close() print("***********************") print("Best ep", best_ep) print("Best Board:") print(best_board) print("Best step", best_steps) print("Best score", best_score) if dueling is True: print("Dueling type") else: print("No-dueling type") print("Update Target_Net period", ep_update_target) print("Batch size", batch_size) print("***********************") agent.save_model("./test.pth") plot_info( steps_per_episode, rewards_per_episode, loss_per_episode, scores_per_episode, interval_mean, episodes, threshold, )
def play(env, agent, episodes, interval_mean, screen): rewards_per_episode = [] loss_per_episode = [] steps_per_episode = [] scores_per_episode = [] threshold = [] decay_step = 0 best_score = 0 best_ep = 0 best_board = 0 best_steps = 0 best_reward = 0 ep = 0 for ep in range(episodes): # while True: print(ep) done = 0 state, valid_movements = env.reset() loss_ep = [] episode_rewards = [] steps = 0 while True: done = 0 draw(state.flatten(), screen) steps += 1 action = agent.selection_action(valid_movements, state.flatten()) next_state, reward, done, info = env.step(action) if done == 1: steps_per_episode.append(steps) rewards_per_episode.append(np.sum(episode_rewards)) loss_per_episode.append(np.sum(loss_ep) / steps) scores_per_episode.append(info["total_score"]) if info["total_score"] > best_score: best_score = info["total_score"] best_reward = np.sum(episode_rewards) best_ep = ep best_board = deepcopy(next_state) best_steps = steps print(best_board) else: state = deepcopy(next_state) valid_movements = info["valid_movements"] if done == 1: break print("***********************") print("Best ep{}".format(best_ep)) print("Best reward", best_reward) print("Best Board:") print(best_board) print("Best step", best_steps) print("Best score", best_score) plot_info( steps_per_episode, rewards_per_episode, loss_per_episode, scores_per_episode, interval_mean, episodes, threshold, )
def train( dqn_net, target_net, env, memory, batch_size, size_board, episodes, ep_update_target, decay_rate, explore_start, explore_stop, learning_rate, gamma, interval_mean, ): # Using GPU or CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dqn_net.to(device) target_net.to(device) print("Starting training...") decay_step = 0 total_steps_per_episode = [] total_rewards_per_episode = [] total_loss_per_episode = [] total_score_per_episode = [] best_board = None best_reward = 0 best_score = 0 best_steps = 0 best_ep = -1 # Optimizer optimizer = optim.RMSprop(dqn_net.parameters(), lr=learning_rate) for ep in range(episodes): # Set step to 0 step = 0 # Rewards of the episode episode_rewards = [] board, valid_movements = env.reset() state = to_power_two_matrix(board) done = False loss_ep = [] while True: step += 1 # Increase decay step to choose net output instead random action decay_step += 1 # Make a action eps_threshold = explore_stop + (explore_start - explore_stop) * np.exp( -decay_rate * decay_step) action = selection_action(eps_threshold, valid_movements, dqn_net, state, size_board, device) new_board, reward, done, info = env.step(action) # Add episode reward inside list episode_rewards.append(reward) if done: total_steps_per_episode.append(step) next_state = np.zeros((1, size_board, size_board, 16)) total_reward = np.sum(episode_rewards) total_rewards_per_episode.append(total_reward) memory.store(state, action, reward, next_state, done) loss_total_ep = np.sum(loss_ep) / step total_loss_per_episode.append(loss_total_ep) total_score_per_episode.append(info["total_score"]) print("Episode:", ep) print("Total Reward:", total_reward) print("Total episodes", step) print("Eps_threshold:", eps_threshold) print("Loss ep:", loss_total_ep) env.render() print("---------------------------") if info["total_score"] > best_score: best_score = info["total_score"] best_reward = total_reward best_ep = ep best_board = deepcopy(new_board) best_steps = step else: next_state = to_power_two_matrix(new_board) memory.store(state, action, reward, next_state, done) state = deepcopy(next_state) # Valid movements valid_movements = info["valid_movements"] # Change board board = deepcopy(new_board) # Learning part loss = optimize_model( dqn_net, target_net, memory, learning_rate, batch_size, size_board, gamma, optimizer, device, ) loss_ep.append(loss) if done: break # Update target net if ep % ep_update_target == 0: print("Update target_net") target_net = deepcopy(dqn_net) print("***********************") print("Best ep", best_ep) print("Best Board:") print(best_board) print("Best step", best_steps) print("Best score", best_score) print("***********************") plot_info( total_steps_per_episode, total_rewards_per_episode, total_loss_per_episode, total_score_per_episode, interval_mean, episodes, )
action='store_true') p.add_argument('--number_of_medoids', help='Number of medoids to find. Default = 10', default=10, type=int) p.add_argument('--numlocal', help='Number of local minimum to obtain. Default = 20', default=20, type=int) p.add_argument('--maxneighbor', help='Maximal number of neighbors in claster. Default = 80', default=80, type=int) p.add_argument('--output', help='Output file name. Default = output.txt', default='output.txt') p.add_argument('--input', help='Input file name. Default = data.txt', default='data.txt') args = p.parse_args() objects = read_from_file(args.input, polygons=args.polygons) clarans_model = Clarans(objects, args.numlocal, args.maxneighbor, args.number_of_medoids, args.polygons) medoids, objects = clarans_model.run() write_to_file(args.output, objects, polygons=args.polygons) plot_info(medoids, objects) plot_data(objects, medoids=medoids, clusters=True, polygons=args.polygons)