Beispiel #1
0
def feed_winner():
    game = GameCards98()
    game.piles = np.random.randint(2, 100, 4)
    game.deck = []
    card = np.random.randint(2, 100)
    while card in game.piles:
        card = np.random.randint(2, 100)
    game.hand = [card]
    action = np.random.randint(0, card_settings.ACTION_SPACE)
    tra = MapIndexesToNum(4, 8)
    old_state = game.observation()
    move = tra.get_map(action)
    reward, new_state, done, info = game.step(move)
    return old_state, new_state, action, reward, done
def test_bigger_num():
    t1 = MapIndexesToNum(2, 2, 2)
    assert 0 == t1.get_num(0, 0, 0)
    assert 1 == t1.get_num(1, 0, 0)
    assert 2 == t1.get_num(0, 1, 0)
    assert 3 == t1.get_num(1, 1, 0)
    assert 4 == t1.get_num(0, 0, 1)
    assert 5 == t1.get_num(1, 0, 1)
    assert 6 == t1.get_num(0, 1, 1)
    assert 7 == t1.get_num(1, 1, 1)
    with pytest.raises(Exception):
        t1.get_num(3, 3)
    with pytest.raises(Exception):
        t1.get_num(2, 2, 3)
def test_s3_map():
    t1 = MapIndexesToNum(3, 3)
    assert (0, 0) == t1.get_map(0)
    assert (1, 0) == t1.get_map(1)
    assert (2, 0) == t1.get_map(2)
    assert (0, 1) == t1.get_map(3)
    assert (1, 1) == t1.get_map(4)
    assert (2, 1) == t1.get_map(5)
    assert (0, 2) == t1.get_map(6)
    assert (1, 2) == t1.get_map(7)
    assert (2, 2) == t1.get_map(8)
    with pytest.raises(Exception):
        t1.get_map(9)
Beispiel #4
0
def show_game():
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.compat.v1.Session(config=config)

    agent = Agent(layers=card_settings.LAYERS)
    trans = MapIndexesToNum(4, 8)
    game = GameCards98(timeout_turn=card_settings.GAME_TIMEOUT)
    new_state = game.reset()
    done = False
    info = None

    while not done:
        states = [new_state]
        game.display_table()
        action = agent.predict(states)[0]
        move = trans.get_map(action)
        pile, hand = move
        print(f"Move: {hand + 1} -> {pile + 1}")
        rew, new_state, done, info = game.step(move)
    print(info)
def test_s2_num():
    t1 = MapIndexesToNum(2, 2)
    assert 0 == t1.get_num(0, 0)
    assert 1 == t1.get_num(1, 0)
    assert 2 == t1.get_num(0, 1)
    assert 3 == t1.get_num(1, 1)
    with pytest.raises(Exception):
        t1.get_num(2, 2)
def test_s2_map():
    t1 = MapIndexesToNum(2, 2)
    assert (0, 0) == t1.get_map(0)
    assert (1, 0) == t1.get_map(1)
    assert (0, 1) == t1.get_map(2)
    assert (1, 1) == t1.get_map(3)
    with pytest.raises(Exception):
        t1.get_map(4)
def test_universal_1():
    t1 = MapIndexesToNum(135, 30, 525)

    num = 40
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 140
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 1240
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 5440
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 4150
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 4430
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 123
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 4340
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
    num = 4370
    indx = t1.get_map(num)
    assert num == t1.get_num(indx)
def test_size_10_map():
    t1 = MapIndexesToNum(10, 10)
    assert (0, 0) == t1.get_map(0)
    assert (0, 1) == t1.get_map(10)
    assert (1, 1) == t1.get_map(11)
def test_size_10_num():
    t1 = MapIndexesToNum(10, 10)
    assert 0 == t1.get_num(0, 0)
    assert 10 == t1.get_num(0, 1)
    assert 11 == t1.get_num(1, 1)
Beispiel #10
0
def train_model():
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.compat.v1.Session(config=config)
    try:
        episode_offset = np.load(
            f"models/{card_settings.MODEL_NAME}/last-episode-num.npy",
            allow_pickle=True)
    except FileNotFoundError:
        episode_offset = 0
    stats = {"episode": [], "eps": [], "score": [], "good_moves": []}

    agent = Agent(layers=card_settings.LAYERS)
    trans = MapIndexesToNum(4, 8)
    time_start = time.time()
    time_save = time.time()
    EPS = iter(np.linspace(card_settings.EPS, 0, card_settings.EPS_INTERVAL))
    try:
        for episode in range(episode_offset,
                             card_settings.GAME_NUMBER + episode_offset):
            if (time.time() - time_start) > card_settings.TRAIN_TIMEOUT:
                print("Train timeout")
                break
            try:
                eps = next(EPS)
            except StopIteration:
                EPS = iter(
                    np.linspace(card_settings.EPS, 0,
                                card_settings.EPS_INTERVAL))
                eps = 0

            Games = []  # Close screen
            States = []
            for loop_ind in range(card_settings.SIM_COUNT):
                game = GameCards98(timeout_turn=card_settings.GAME_TIMEOUT)
                state = game.reset()
                Games.append(game)
                States.append(state)

            Scores = [0] * len(Games)
            step = 0
            All_score = []
            All_steps = []
            while len(Games):
                step += 1
                Old_states = np.array(States)
                if card_settings.EPS_PROGRESIVE:
                    this_step_eps = eps * step
                elif step <= card_settings.EPS_BIAS:
                    this_step_eps = eps / card_settings.EPS_DIVIDE
                else:
                    this_step_eps = eps

                if this_step_eps > np.random.random():
                    Actions = np.random.randint(0,
                                                card_settings.ACTION_SPACE,
                                                size=(len(Old_states)))
                    was_random_move = True
                else:
                    Actions = agent.predict(Old_states)
                    was_random_move = False
                Dones = []
                Rewards = []
                States = []

                for g_index, game in enumerate(Games):
                    move = trans.get_map(Actions[g_index])
                    reward, state, done, info = game.step(action=move)
                    if not reward:
                        print(f"WINNDER!!!! {reward}")
                    Rewards.append(reward)
                    Scores[g_index] += reward
                    Dones.append(done)
                    States.append(state)

                if card_settings.ALLOW_TRAIN:
                    for old_s, act, rew, n_st, dn in zip(
                            Old_states, Actions, Rewards, States, Dones):
                        agent.add_memmory(old_s, n_st, act, rew, dn)
                    if card_settings.STEP_TRAIN:
                        for x in range(card_settings.TRAIN_AMOUNT):
                            agent.train_model()

                for ind_d in range(len(Games) - 1, -1, -1):
                    if Dones[ind_d]:

                        All_score.append(Scores[ind_d])
                        All_steps.append(Games[ind_d].move_count)

                        if not was_random_move:
                            stats['episode'].append(episode + episode_offset)
                            stats['eps'].append(eps)
                            stats['score'].append(Scores[ind_d])
                            stats['good_moves'].append(step)

                        Scores.pop(ind_d)
                        Games.pop(ind_d)
                        States.pop(ind_d)

            if card_settings.ALLOW_TRAIN and card_settings.FEED_WINNER_CHANCE > np.random.random(
            ):
                for x in range(card_settings.FEED_AMOUNT):
                    agent.add_memmory(*feed_winner())

            if card_settings.ALLOW_TRAIN and not episode % card_settings.TRAIN_EVERY:
                agent.train_model()

            if eps < 0.01:
                print(f"'{card_settings.MODEL_NAME}-{agent.plot_num}' "
                      f"best-score: {np.max(All_score):>6.1f}, "
                      f"avg-score: {np.mean(All_score):>6.2f}, "
                      f"worst-score: {np.min(All_score):>6.1f}, "
                      f"best-moves: {np.max(All_steps):>3}, "
                      f"avg-moves: {np.round(np.mean(All_steps)):>3.0f}, "
                      f"worst-moves: {np.min(All_steps):>2}, "
                      f"eps: {eps:<5.2f}")
            if time.time() - card_settings.SAVE_INTERVAL > time_save:
                time_save = time.time()
                agent.save_all()

    except KeyboardInterrupt:
        if card_settings.ALLOW_TRAIN:
            agent.save_all()
        print("Keyboard STOP!")

    duration = (time.time() - time_start) / 60
    print(
        f"Train durations: {duration:<6.2f}m, per 1k games: {duration * 1000 / (episode - episode_offset):<6.2f}m"
    )

    if card_settings.ALLOW_TRAIN:
        agent.save_all()
        np.save(f"models/{card_settings.MODEL_NAME}/last-episode-num.npy",
                episode)

        print(f"Training end: {card_settings.MODEL_NAME}")
        print("\nPARAMS:")
        print(f"Learning rate: {card_settings.ALPHA}")
        print(f"BATCH_SIZE: {card_settings.BATCH_SIZE}")
        print(f"MIN_BATCH_SIZE: {card_settings.MIN_BATCH_SIZE}")
        print(f"MAX_BATCH_SIZE: {card_settings.MAX_BATCH_SIZE}")
        print(f"MEMOR_MAX_SIZE: {card_settings.MEMOR_MAX_SIZE}")
        print("")
        # print(f"EPS_BIAS: {card_settings.EPS_BIAS}")
        # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}")
        # print(f"SIM_COUNT: {card_settings.SIM_COUNT}")
        # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}")
        # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}")

        print(f"Layers: {agent.layers}")
        if card_settings.PLOT_AFTER:
            plot_stats(stats)