def main(): env = gym.make('CartPole-v0') net = model.Net(env.observation_space.shape[0], env.action_space.n) step_idx = 0 while True: t_start = time.time() batch_noise = [] batch_reward = [] batch_steps = 0 for _ in range(MAX_BATCH_EPISODES): noise, neg_noise = sample_noise(net) batch_noise.append(noise) batch_noise.append(neg_noise) reward, steps = eval_with_noise(env, net, noise) batch_reward.append(reward) batch_steps += steps reward, steps = eval_with_noise(env, net, neg_noise) batch_reward.append(reward) batch_steps += steps if batch_steps > MAX_BATCH_STEPS: break step_idx += 1 m_reward = np.mean(batch_reward) if m_reward > 199: print('Solved in {:d} steps'.format(step_idx)) break train(net, batch_noise, batch_reward) speed = batch_steps / (time.time() - t_start) print("%d: reward=%.2f, speed=%.2f f/s" % (step_idx, m_reward, speed))
def main(): env = gym.make('CartPole-v0') gen_idx = 0 nets = [ model.Net(env.observation_space.shape[0], env.action_space.n) for _ in range(POPULATION_SIZE) ] population = [ (net, evaluate(env, net)) for net in nets ] while True: population.sort(key=lambda p: p[1], reverse=True) rewards = [p[1] for p in population[:PARENTS_COUNT]] reward_mean = np.mean(rewards) reward_max = np.max(rewards) reward_std = np.std(rewards) print("%d: reward_mean=%.2f, reward_max=%.2f, reward_std=%.2f" % (gen_idx, reward_mean, reward_max, reward_std)) if reward_mean > 199: print("Solved in %d steps" % gen_idx) break prev_population = population population = [population[0]] for _ in range(POPULATION_SIZE - 1): parent_idx = np.random.randint(0, PARENTS_COUNT) parent = prev_population[parent_idx][0] net = mutate_parent(parent) fitness = evaluate(env, net) population.append((net, fitness)) gen_idx += 1
def __init__(self, model_file, player_moves_first, player_id): self.model_file = model_file self.model = model.Net(input_shape=model.OBS_SHAPE, actions_n=game.GAME_COLS) self.model.load_state_dict(torch.load(model_file, map_location=lambda storage, loc: storage)) self.state = game.INITIAL_STATE self.value = None self.player_moves_first = player_moves_first self.player_id = player_id self.moves = [] self.mcts_store = mcts.MCTS()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--lr', type=float, default=LEARNING_RATE) parser.add_argument('--noise-std', type=float, default=NOISE_STD) args = parser.parse_args() env = gym.make('CartPole-v0') net = model.Net(env.observation_space.shape[0], env.action_space.n) step_idx = 0 while True: t_start = time.time() batch_noise = [] batch_reward = [] batch_steps = 0 for _ in range(MAX_BATCH_EPISODES): pos_noise, neg_noise = utils.sample_noise(net) batch_noise.append(pos_noise) batch_noise.append(neg_noise) reward, steps = utils.eval_with_noise(env, net, pos_noise, args.noise_std) batch_reward.append(reward) batch_steps += steps reward, steps = utils.eval_with_noise(env, net, neg_noise, args.noise_std) batch_reward.append(reward) batch_steps += steps if batch_steps > MAX_BATCH_STEPS: break step_idx += 1 m_reward = np.mean(batch_reward) if m_reward > 199: print('Solved in {:d} steps'.format(step_idx)) break train(net, batch_noise, batch_reward, args.lr) speed = batch_steps / (time.time() - t_start) print("%d: reward=%.2f, speed=%.2f f/s" % (step_idx, m_reward, speed))
n1_win += 1 return n1_win / (n1_win + n2_win) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join("saves", args.name) os.makedirs(saves_path, exist_ok=True) writer = SummaryWriter(comment="-" + args.name) net = model.Net(input_shape=model.OBS_SHAPE, actions_n=game.GAME_COLS).to(device) best_net = ptan.agent.TargetNet(net) print(net) optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9) replay_buffer = collections.deque(maxlen=REPLAY_BUFFER) mcts_store = mcts.MCTS() step_idx = 0 best_idx = 0 with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker: while True: t = time.time() prev_nodes = len(mcts_store) game_steps = 0
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") parser.add_argument("-m", "--model", help="Model to load") parser.add_argument("-tm", "--tmodel", help="Temp model") args = parser.parse_args() device = torch.device("cuda:1" if args.cuda else "cpu") saves_path = "saves" os.makedirs(saves_path, exist_ok=True) step_idx = 0 checkpoint = torch.load(args.model, map_location=lambda storage, loc: storage) best_net = model.Net(input_shape=model.OBS_SHAPE, actions_n=actionTable.AllMoveLength).to(device) best_net.load_state_dict(checkpoint['model'], strict=False) best_idx = checkpoint['best_idx'] #print(best_net) if args.tmodel: checkpoint = torch.load(args.tmodel, map_location=lambda storage, loc: storage) if best_idx != checkpoint['best_idx']: print('invalid tmodel'); sys.exit() net = model.Net(input_shape=model.OBS_SHAPE, actions_n=actionTable.AllMoveLength).to(device) net.load_state_dict(checkpoint['model']) else: net = copy.deepcopy(best_net) best_net.eval() optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9) if args.tmodel: optimizer.load_state_dict(checkpoint['opt']) print('best_idx: '+str(best_idx)) net.train()
help="The list of models (at least 2) to play against each other") parser.add_argument("-r", "--rounds", type=int, default=2, help="Count of rounds to perform for every pair") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") nets = [] for fname in args.models: net = model.Net(model.OBS_SHAPE, game.GAME_COLS) net.load_state_dict( torch.load(fname, map_location=lambda storage, loc: storage)) net = net.to(device) nets.append((fname, net)) total_agent = {} total_pairs = {} for idx1, n1 in enumerate(nets): for idx2, n2 in enumerate(nets): if idx1 == idx2: continue wins, losses, draws = 0, 0, 0 ts = time.time() for _ in range(args.rounds):
pan, won = game.move(pan, action, step) historystr.append(pan) if won>0: render(pan, player_human) print(('초' if won==1 else '한')+' 승') break cur_player = 1-cur_player step += 1 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", help="The model to play") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") print(device) modelfile = args.model if args.model else "./best_model.pth" if os.path.isfile(modelfile): checkpoint = torch.load(modelfile, map_location=lambda storage, loc: storage) if 'resBlockNum' in checkpoint: model.resBlockNum = checkpoint['resBlockNum'] net = model.Net(model.OBS_SHAPE, actionTable.AllMoveLength).to(device) net.load_state_dict(checkpoint['model'], strict=False) net.eval() while True: play_game(net, 7, 80, device) else: print(modelfile+" 파일이 존재하지 않습니다")
mp.set_start_method("spawn") parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") parser.add_argument("-m", "--model", help="The model to start from") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join("saves", args.name) os.makedirs(saves_path, exist_ok=True) writer = SummaryWriter(comment="-" + args.name) net = model.Net(model.OBS_SHAPE, game.BOARD_SIZE**2 + 1) if args.model is None: step_idx = 0 start = 0 else: # fname = os.path.join(saves_path, args.model) fname = args.model if not os.path.exists(fname): print("{} does not exists!".format(fname)) raise RuntimeError step_idx = int(os.path.basename(args.model)[:6]) dir_name = os.path.dirname(args.model) start = step_idx / TRAIN_STEPS print("step_idx={}".format(step_idx)) step_idx = step_idx - 6 * TRAIN_STEPS net.share_memory()
default=10, help="Count of rounds to perform for every pair") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") nets = [] for fname in args.models: checkpoint = torch.load(fname, map_location=lambda storage, loc: storage) if 'resBlockNum' in checkpoint: model.resBlockNum = checkpoint['resBlockNum'] net = model.Net(model.OBS_SHAPE, actions_n=actionTable.AllMoveLength) net.load_state_dict(checkpoint['model'], strict=False) net = net.to(device) nets.append((fname, net)) total_agent = {} total_pairs = {} for idx1, n1 in enumerate(nets): for idx2, n2 in enumerate(nets): if idx1 == idx2: continue wins, losses, draws = 0, 0, 0 ts = time.time() for _ in range(args.rounds): r, _ = model.play_game(None,