def feed_winner(): game = GameCards98() game.piles = np.random.randint(2, 100, 4) game.deck = [] card = np.random.randint(2, 100) while card in game.piles: card = np.random.randint(2, 100) game.hand = [card] action = np.random.randint(0, card_settings.ACTION_SPACE) tra = MapIndexesToNum(4, 8) old_state = game.observation() move = tra.get_map(action) reward, new_state, done, info = game.step(move) return old_state, new_state, action, reward, done
def test_bigger_num(): t1 = MapIndexesToNum(2, 2, 2) assert 0 == t1.get_num(0, 0, 0) assert 1 == t1.get_num(1, 0, 0) assert 2 == t1.get_num(0, 1, 0) assert 3 == t1.get_num(1, 1, 0) assert 4 == t1.get_num(0, 0, 1) assert 5 == t1.get_num(1, 0, 1) assert 6 == t1.get_num(0, 1, 1) assert 7 == t1.get_num(1, 1, 1) with pytest.raises(Exception): t1.get_num(3, 3) with pytest.raises(Exception): t1.get_num(2, 2, 3)
def test_s3_map(): t1 = MapIndexesToNum(3, 3) assert (0, 0) == t1.get_map(0) assert (1, 0) == t1.get_map(1) assert (2, 0) == t1.get_map(2) assert (0, 1) == t1.get_map(3) assert (1, 1) == t1.get_map(4) assert (2, 1) == t1.get_map(5) assert (0, 2) == t1.get_map(6) assert (1, 2) == t1.get_map(7) assert (2, 2) == t1.get_map(8) with pytest.raises(Exception): t1.get_map(9)
def show_game(): config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.4 sess = tf.compat.v1.Session(config=config) agent = Agent(layers=card_settings.LAYERS) trans = MapIndexesToNum(4, 8) game = GameCards98(timeout_turn=card_settings.GAME_TIMEOUT) new_state = game.reset() done = False info = None while not done: states = [new_state] game.display_table() action = agent.predict(states)[0] move = trans.get_map(action) pile, hand = move print(f"Move: {hand + 1} -> {pile + 1}") rew, new_state, done, info = game.step(move) print(info)
def test_s2_num(): t1 = MapIndexesToNum(2, 2) assert 0 == t1.get_num(0, 0) assert 1 == t1.get_num(1, 0) assert 2 == t1.get_num(0, 1) assert 3 == t1.get_num(1, 1) with pytest.raises(Exception): t1.get_num(2, 2)
def test_s2_map(): t1 = MapIndexesToNum(2, 2) assert (0, 0) == t1.get_map(0) assert (1, 0) == t1.get_map(1) assert (0, 1) == t1.get_map(2) assert (1, 1) == t1.get_map(3) with pytest.raises(Exception): t1.get_map(4)
def test_universal_1(): t1 = MapIndexesToNum(135, 30, 525) num = 40 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 140 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 1240 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 5440 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 4150 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 4430 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 123 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 4340 indx = t1.get_map(num) assert num == t1.get_num(indx) num = 4370 indx = t1.get_map(num) assert num == t1.get_num(indx)
def test_size_10_map(): t1 = MapIndexesToNum(10, 10) assert (0, 0) == t1.get_map(0) assert (0, 1) == t1.get_map(10) assert (1, 1) == t1.get_map(11)
def test_size_10_num(): t1 = MapIndexesToNum(10, 10) assert 0 == t1.get_num(0, 0) assert 10 == t1.get_num(0, 1) assert 11 == t1.get_num(1, 1)
def train_model(): config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.4 sess = tf.compat.v1.Session(config=config) try: episode_offset = np.load( f"models/{card_settings.MODEL_NAME}/last-episode-num.npy", allow_pickle=True) except FileNotFoundError: episode_offset = 0 stats = {"episode": [], "eps": [], "score": [], "good_moves": []} agent = Agent(layers=card_settings.LAYERS) trans = MapIndexesToNum(4, 8) time_start = time.time() time_save = time.time() EPS = iter(np.linspace(card_settings.EPS, 0, card_settings.EPS_INTERVAL)) try: for episode in range(episode_offset, card_settings.GAME_NUMBER + episode_offset): if (time.time() - time_start) > card_settings.TRAIN_TIMEOUT: print("Train timeout") break try: eps = next(EPS) except StopIteration: EPS = iter( np.linspace(card_settings.EPS, 0, card_settings.EPS_INTERVAL)) eps = 0 Games = [] # Close screen States = [] for loop_ind in range(card_settings.SIM_COUNT): game = GameCards98(timeout_turn=card_settings.GAME_TIMEOUT) state = game.reset() Games.append(game) States.append(state) Scores = [0] * len(Games) step = 0 All_score = [] All_steps = [] while len(Games): step += 1 Old_states = np.array(States) if card_settings.EPS_PROGRESIVE: this_step_eps = eps * step elif step <= card_settings.EPS_BIAS: this_step_eps = eps / card_settings.EPS_DIVIDE else: this_step_eps = eps if this_step_eps > np.random.random(): Actions = np.random.randint(0, card_settings.ACTION_SPACE, size=(len(Old_states))) was_random_move = True else: Actions = agent.predict(Old_states) was_random_move = False Dones = [] Rewards = [] States = [] for g_index, game in enumerate(Games): move = trans.get_map(Actions[g_index]) reward, state, done, info = game.step(action=move) if not reward: print(f"WINNDER!!!! {reward}") Rewards.append(reward) Scores[g_index] += reward Dones.append(done) States.append(state) if card_settings.ALLOW_TRAIN: for old_s, act, rew, n_st, dn in zip( Old_states, Actions, Rewards, States, Dones): agent.add_memmory(old_s, n_st, act, rew, dn) if card_settings.STEP_TRAIN: for x in range(card_settings.TRAIN_AMOUNT): agent.train_model() for ind_d in range(len(Games) - 1, -1, -1): if Dones[ind_d]: All_score.append(Scores[ind_d]) All_steps.append(Games[ind_d].move_count) if not was_random_move: stats['episode'].append(episode + episode_offset) stats['eps'].append(eps) stats['score'].append(Scores[ind_d]) stats['good_moves'].append(step) Scores.pop(ind_d) Games.pop(ind_d) States.pop(ind_d) if card_settings.ALLOW_TRAIN and card_settings.FEED_WINNER_CHANCE > np.random.random( ): for x in range(card_settings.FEED_AMOUNT): agent.add_memmory(*feed_winner()) if card_settings.ALLOW_TRAIN and not episode % card_settings.TRAIN_EVERY: agent.train_model() if eps < 0.01: print(f"'{card_settings.MODEL_NAME}-{agent.plot_num}' " f"best-score: {np.max(All_score):>6.1f}, " f"avg-score: {np.mean(All_score):>6.2f}, " f"worst-score: {np.min(All_score):>6.1f}, " f"best-moves: {np.max(All_steps):>3}, " f"avg-moves: {np.round(np.mean(All_steps)):>3.0f}, " f"worst-moves: {np.min(All_steps):>2}, " f"eps: {eps:<5.2f}") if time.time() - card_settings.SAVE_INTERVAL > time_save: time_save = time.time() agent.save_all() except KeyboardInterrupt: if card_settings.ALLOW_TRAIN: agent.save_all() print("Keyboard STOP!") duration = (time.time() - time_start) / 60 print( f"Train durations: {duration:<6.2f}m, per 1k games: {duration * 1000 / (episode - episode_offset):<6.2f}m" ) if card_settings.ALLOW_TRAIN: agent.save_all() np.save(f"models/{card_settings.MODEL_NAME}/last-episode-num.npy", episode) print(f"Training end: {card_settings.MODEL_NAME}") print("\nPARAMS:") print(f"Learning rate: {card_settings.ALPHA}") print(f"BATCH_SIZE: {card_settings.BATCH_SIZE}") print(f"MIN_BATCH_SIZE: {card_settings.MIN_BATCH_SIZE}") print(f"MAX_BATCH_SIZE: {card_settings.MAX_BATCH_SIZE}") print(f"MEMOR_MAX_SIZE: {card_settings.MEMOR_MAX_SIZE}") print("") # print(f"EPS_BIAS: {card_settings.EPS_BIAS}") # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}") # print(f"SIM_COUNT: {card_settings.SIM_COUNT}") # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}") # print(f"EPS_DIVIDE: {card_settings.EPS_DIVIDE}") print(f"Layers: {agent.layers}") if card_settings.PLOT_AFTER: plot_stats(stats)