Example #1
0
def main():
    env = gym.make('CartPole-v0')

    net = model.Net(env.observation_space.shape[0], env.action_space.n)

    step_idx = 0
    while True:
        t_start = time.time()
        batch_noise = []
        batch_reward = []
        batch_steps = 0
        for _ in range(MAX_BATCH_EPISODES):
            noise, neg_noise = sample_noise(net)
            batch_noise.append(noise)
            batch_noise.append(neg_noise)
            reward, steps = eval_with_noise(env, net, noise)
            batch_reward.append(reward)
            batch_steps += steps
            reward, steps = eval_with_noise(env, net, neg_noise)
            batch_reward.append(reward)
            batch_steps += steps
            if batch_steps > MAX_BATCH_STEPS:
                break

        step_idx += 1
        m_reward = np.mean(batch_reward)
        if m_reward > 199:
            print('Solved in {:d} steps'.format(step_idx))
            break

        train(net, batch_noise, batch_reward)

        speed = batch_steps / (time.time() - t_start)
        print("%d: reward=%.2f, speed=%.2f f/s" % (step_idx, m_reward, speed))
Example #2
0
def main():
    env = gym.make('CartPole-v0')

    gen_idx = 0
    nets = [
        model.Net(env.observation_space.shape[0], env.action_space.n) for _ in range(POPULATION_SIZE)
    ]
    population = [
        (net, evaluate(env, net)) for net in nets
    ]
    while True:
        population.sort(key=lambda p: p[1], reverse=True)
        rewards = [p[1] for p in population[:PARENTS_COUNT]]
        reward_mean = np.mean(rewards)
        reward_max = np.max(rewards)
        reward_std = np.std(rewards)

        print("%d: reward_mean=%.2f, reward_max=%.2f, reward_std=%.2f" % (gen_idx, reward_mean, reward_max, reward_std))
        if reward_mean > 199:
            print("Solved in %d steps" % gen_idx)
            break

        prev_population = population
        population = [population[0]]
        for _ in range(POPULATION_SIZE - 1):
            parent_idx = np.random.randint(0, PARENTS_COUNT)
            parent = prev_population[parent_idx][0]
            net = mutate_parent(parent)
            fitness = evaluate(env, net)
            population.append((net, fitness))
        gen_idx += 1
 def __init__(self, model_file, player_moves_first, player_id):
     self.model_file = model_file
     self.model = model.Net(input_shape=model.OBS_SHAPE, actions_n=game.GAME_COLS)
     self.model.load_state_dict(torch.load(model_file, map_location=lambda storage, loc: storage))
     self.state = game.INITIAL_STATE
     self.value = None
     self.player_moves_first = player_moves_first
     self.player_id = player_id
     self.moves = []
     self.mcts_store = mcts.MCTS()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--lr', type=float, default=LEARNING_RATE)
    parser.add_argument('--noise-std', type=float, default=NOISE_STD)
    args = parser.parse_args()

    env = gym.make('CartPole-v0')
    net = model.Net(env.observation_space.shape[0], env.action_space.n)

    step_idx = 0
    while True:
        t_start = time.time()
        batch_noise = []
        batch_reward = []
        batch_steps = 0
        for _ in range(MAX_BATCH_EPISODES):
            pos_noise, neg_noise = utils.sample_noise(net)
            batch_noise.append(pos_noise)
            batch_noise.append(neg_noise)
            reward, steps = utils.eval_with_noise(env, net, pos_noise, args.noise_std)
            batch_reward.append(reward)
            batch_steps += steps
            reward, steps = utils.eval_with_noise(env, net, neg_noise, args.noise_std)
            batch_reward.append(reward)
            batch_steps += steps
            if batch_steps > MAX_BATCH_STEPS:
                break

        step_idx += 1
        m_reward = np.mean(batch_reward)
        if m_reward > 199:
            print('Solved in {:d} steps'.format(step_idx))
            break

        train(net, batch_noise, batch_reward, args.lr)

        speed = batch_steps / (time.time() - t_start)
        print("%d: reward=%.2f, speed=%.2f f/s" % (step_idx, m_reward, speed))
Example #5
0
            n1_win += 1
    return n1_win / (n1_win + n2_win)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", args.name)
    os.makedirs(saves_path, exist_ok=True)
    writer = SummaryWriter(comment="-" + args.name)

    net = model.Net(input_shape=model.OBS_SHAPE, actions_n=game.GAME_COLS).to(device)
    best_net = ptan.agent.TargetNet(net)
    print(net)

    optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9)

    replay_buffer = collections.deque(maxlen=REPLAY_BUFFER)
    mcts_store = mcts.MCTS()
    step_idx = 0
    best_idx = 0

    with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
        while True:
            t = time.time()
            prev_nodes = len(mcts_store)
            game_steps = 0
Example #6
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA")
    parser.add_argument("-m", "--model", help="Model to load")
    parser.add_argument("-tm", "--tmodel", help="Temp model")
    args = parser.parse_args()
    device = torch.device("cuda:1" if args.cuda else "cpu")

    saves_path = "saves"
    os.makedirs(saves_path, exist_ok=True)

    step_idx = 0

    checkpoint = torch.load(args.model, map_location=lambda storage, loc: storage)
    best_net = model.Net(input_shape=model.OBS_SHAPE, actions_n=actionTable.AllMoveLength).to(device)
    best_net.load_state_dict(checkpoint['model'], strict=False)
    best_idx = checkpoint['best_idx']
    #print(best_net)
    if args.tmodel:
        checkpoint = torch.load(args.tmodel, map_location=lambda storage, loc: storage)
        if best_idx != checkpoint['best_idx']: print('invalid tmodel'); sys.exit()
        net = model.Net(input_shape=model.OBS_SHAPE, actions_n=actionTable.AllMoveLength).to(device)
        net.load_state_dict(checkpoint['model'])
    else: net = copy.deepcopy(best_net)
    best_net.eval()
    optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9)
    if args.tmodel: optimizer.load_state_dict(checkpoint['opt'])
    print('best_idx: '+str(best_idx))

    net.train()
        help="The list of models (at least 2) to play against each other")
    parser.add_argument("-r",
                        "--rounds",
                        type=int,
                        default=2,
                        help="Count of rounds to perform for every pair")
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable CUDA")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    nets = []
    for fname in args.models:
        net = model.Net(model.OBS_SHAPE, game.GAME_COLS)
        net.load_state_dict(
            torch.load(fname, map_location=lambda storage, loc: storage))
        net = net.to(device)
        nets.append((fname, net))

    total_agent = {}
    total_pairs = {}

    for idx1, n1 in enumerate(nets):
        for idx2, n2 in enumerate(nets):
            if idx1 == idx2:
                continue
            wins, losses, draws = 0, 0, 0
            ts = time.time()
            for _ in range(args.rounds):
Example #8
0
        pan, won = game.move(pan, action, step)
        historystr.append(pan)
        if won>0:
            render(pan, player_human)
            print(('초' if won==1 else '한')+' 승')
            break
        cur_player = 1-cur_player
        step += 1

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model", help="The model to play")
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")
    print(device)

    modelfile = args.model if args.model else "./best_model.pth"
    if os.path.isfile(modelfile):
        checkpoint = torch.load(modelfile, map_location=lambda storage, loc: storage)
        if 'resBlockNum' in checkpoint:
            model.resBlockNum = checkpoint['resBlockNum']
        net = model.Net(model.OBS_SHAPE, actionTable.AllMoveLength).to(device)
        net.load_state_dict(checkpoint['model'], strict=False)
        net.eval()

        while True:
            play_game(net, 7, 80, device)
    else:
        print(modelfile+" 파일이 존재하지 않습니다")
Example #9
0
    mp.set_start_method("spawn")
    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable CUDA")
    parser.add_argument("-m", "--model", help="The model to start from")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", args.name)
    os.makedirs(saves_path, exist_ok=True)
    writer = SummaryWriter(comment="-" + args.name)

    net = model.Net(model.OBS_SHAPE, game.BOARD_SIZE**2 + 1)
    if args.model is None:
        step_idx = 0
        start = 0
    else:
        #        fname = os.path.join(saves_path, args.model)
        fname = args.model
        if not os.path.exists(fname):
            print("{} does not exists!".format(fname))
            raise RuntimeError
        step_idx = int(os.path.basename(args.model)[:6])
        dir_name = os.path.dirname(args.model)
        start = step_idx / TRAIN_STEPS
        print("step_idx={}".format(step_idx))
        step_idx = step_idx - 6 * TRAIN_STEPS
    net.share_memory()
Example #10
0
                        default=10,
                        help="Count of rounds to perform for every pair")
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable CUDA")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    nets = []
    for fname in args.models:
        checkpoint = torch.load(fname,
                                map_location=lambda storage, loc: storage)
        if 'resBlockNum' in checkpoint:
            model.resBlockNum = checkpoint['resBlockNum']
        net = model.Net(model.OBS_SHAPE, actions_n=actionTable.AllMoveLength)
        net.load_state_dict(checkpoint['model'], strict=False)
        net = net.to(device)
        nets.append((fname, net))

    total_agent = {}
    total_pairs = {}

    for idx1, n1 in enumerate(nets):
        for idx2, n2 in enumerate(nets):
            if idx1 == idx2:
                continue
            wins, losses, draws = 0, 0, 0
            ts = time.time()
            for _ in range(args.rounds):
                r, _ = model.play_game(None,