def main(): """Train LSTM model.""" # Import data os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" mirrored_strategy = tf.distribute.MirroredStrategy(["/gpu:0", "/gpu:1"]) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): X, Y = import_dataset("gender") X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, shuffle=True, random_state=RAND_STATE) X_train, y_train = utils.augment_dataset(X_train, y_train) data = { 'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test, } # Define model parameters model_type = "ResNet50" input_shape = (None, X_train.shape[2], X_train.shape[3], X_train.shape[4]) filename = f"{model_type}_complete_gender_vFinal2" batch = 128 hyperparameters = { "epochs": 1000, "batch_size": batch, "loss": "categorical_crossentropy", "optimizer": keras.optimizers.Adam(), "metrics": ["accuracy"], "save_file": f"models/{filename}_b{batch}.h5" } # try: classes = y_train.shape[1] except: classes = 1 # Choose and train model model = choose_model(input_shape, classes, model_type) train_model(model, data, hyperparameters) return None
def load_dataset(self): train_df = import_file("train", path=self.dataset_path) dev_df = import_file("dev", path=self.dataset_path) test_df = import_file("test", path=self.dataset_path) train_df_aug = augment_dataset( train_df, lambda score: score < -1, lambda score: score < -0.3, lambda score: score > 0.55, lambda score: score > 1, lambda score: score > 1.3, ) self.dataLoader_train = get_data_loader(train_df, batch_size=32) self.dataLoader_train_aug = get_data_loader(train_df_aug, batch_size=32) self.dataLoader_dev = get_data_loader(dev_df, batch_size=32) self.dataLoader_test = get_data_loader(test_df, batch_size=32, test=True) # Masked versions of the dataset (load both into memory to avoid recalculation) self.dataLoader_train_masked = get_data_loader_masked(train_df, batch_size=32) self.dataLoader_train_aug_masked = get_data_loader_masked( train_df_aug, batch_size=32 ) self.dataLoader_dev_masked = get_data_loader_masked(dev_df, batch_size=32)
def self_play(n_selfplay): global cur_memory, rep_memory global Agent state_black = deque() state_white = deque() pi_black = deque() pi_white = deque() if RESIGN_MODE: resign_val_balck = [] resign_val_white = [] resign_val = [] resign_v = -1.0 n_resign_thres = N_SELFPLAY // 4 for episode in range(n_selfplay): if (episode + 1) % 10 == 0: logging.warning('Playing Episode {:3}'.format(episode + 1)) env = game.GameState('text') board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float') turn = 0 root_id = (0, ) win_index = 0 time_steps = 0 action_index = None if RESIGN_MODE: resign_index = 0 while win_index == 0: if PRINT_SELFPLAY: utils.render_str(board, BOARD_SIZE, action_index) # ====================== start MCTS ============================ # if time_steps < TAU_THRES: tau = 1 else: tau = 0 pi = Agent.get_pi(root_id, tau) # ===================== collect samples ======================== # state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES) if turn == 0: state_black.appendleft(state) pi_black.appendleft(pi) else: state_white.appendleft(state) pi_white.appendleft(pi) # ======================== get action ========================== # action, action_index = utils.get_action(pi) root_id += (action_index, ) # ====================== print evaluation ====================== # if PRINT_SELFPLAY: Agent.model.eval() with torch.no_grad(): state_input = torch.tensor([state]).to(device).float() p, v = Agent.model(state_input) p = p.cpu().numpy()[0] v = v.item() print('\nPi:\n{}'.format( pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) print('\nPolicy:\n{}'.format( p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) if turn == 0: print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100)) if RESIGN_MODE: if episode < n_resign_thres: resign_val_balck.append(v) elif v < resign_v: resign_index = 2 if PRINT_SELFPLAY: print('"Black Resign!"') else: print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100)) if RESIGN_MODE: if episode < n_resign_thres: resign_val_white.append(v) elif v < resign_v: resign_index = 1 if PRINT_SELFPLAY: print('"White Resign!"') # =========================== step ============================= # board, _, win_index, turn, _ = env.step(action) time_steps += 1 # ========================== result ============================ # if RESIGN_MODE: if resign_index != 0: win_index = resign_index result['Resign'] += 1 if win_index != 0: if win_index == 1: reward_black = 1. reward_white = -1. result['Black'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_balck: resign_val.append(val) resign_val_balck.clear() resign_val_white.clear() elif win_index == 2: reward_black = -1. reward_white = 1. result['White'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_white: resign_val.append(val) resign_val_white.clear() resign_val_balck.clear() else: reward_black = 0. reward_white = 0. result['Draw'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_balck: resign_val.append(val) for val in resign_val_white: resign_val.append(val) resign_val_balck.clear() resign_val_white.clear() if RESIGN_MODE: if episode + 1 == n_resign_thres: resign_v = min(resign_val) resign_val.clear() if PRINT_SELFPLAY: print('Resign win%: {:.2f}%'.format( (resign_v + 1) / 2 * 100)) # ====================== store in memory ======================= # while state_black or state_white: if state_black: cur_memory.append( (state_black.pop(), pi_black.pop(), reward_black)) if state_white: cur_memory.append( (state_white.pop(), pi_white.pop(), reward_white)) # ========================= result =========================== # if PRINT_SELFPLAY: utils.render_str(board, BOARD_SIZE, action_index) bw, ww, dr, rs = result['Black'], result['White'], \ result['Draw'], result['Resign'] print('') print('=' * 20, " {:3} Game End ".format(episode + 1), '=' * 20) print('Black Win: {:3} ' 'White Win: {:3} ' 'Draw: {:2} ' 'Win%: {:.2f}%' '\nResign: {:2}'.format(bw, ww, dr, (bw + 0.5 * dr) / (bw + ww + dr) * 100, rs)) print('current memory size:', len(cur_memory)) Agent.reset() rep_memory.extend(utils.augment_dataset(cur_memory, BOARD_SIZE))
def self_play(agent, cur_memory, rank=0): agent.model.eval() state_black = deque() state_white = deque() pi_black = deque() pi_white = deque() episode = 0 while True: if (episode + 1) % 10 == 0: logging.info('Playing Episode {:3}'.format(episode + 1)) env = game.GameState('text') board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float') turn = 0 root_id = (0, ) win_index = 0 time_steps = 0 action_index = None while win_index == 0: if PRINT_SELFPLAY and rank == 0: utils.render_str(board, BOARD_SIZE, action_index) # ====================== start MCTS ============================ # if time_steps < TAU_THRES: tau = 1 else: tau = 0 pi = agent.get_pi(root_id, tau, rank) # ===================== collect samples ======================== # state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES) if turn == 0: state_black.appendleft(state) pi_black.appendleft(pi) else: state_white.appendleft(state) pi_white.appendleft(pi) # ======================== get action ========================== # action, action_index = utils.get_action(pi) root_id += (action_index, ) # ====================== print evaluation ====================== # if PRINT_SELFPLAY and rank == 0: with torch.no_grad(): state_input = torch.tensor([state]).to(device).float() p, v = agent.model(state_input) p = p.cpu().numpy()[0] v = v.item() print('\nPi:\n{}'.format( pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) print('\nPolicy:\n{}'.format( p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) if turn == 0: print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100)) else: print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100)) # =========================== step ============================= # board, _, win_index, turn, _ = env.step(action) time_steps += 1 # ========================== result ============================ # if win_index != 0: if win_index == 1: reward_black = 1. reward_white = -1. result['Black'] += 1 elif win_index == 2: reward_black = -1. reward_white = 1. result['White'] += 1 else: reward_black = 0. reward_white = 0. result['Draw'] += 1 # ====================== store in memory ======================= # while state_black or state_white: if state_black: cur_memory.append( (state_black.pop(), pi_black.pop(), reward_black)) if state_white: cur_memory.append( (state_white.pop(), pi_white.pop(), reward_white)) # ========================= result =========================== # if PRINT_SELFPLAY and rank == 0: utils.render_str(board, BOARD_SIZE, action_index) bw, ww, dr = result['Black'], result['White'], \ result['Draw'] print('') print('=' * 20, " {:3} Game End ".format(episode + 1), '=' * 20) print('Black Win: {:3} ' 'White Win: {:3} ' 'Draw: {:2} ' 'Win%: {:.2f}%'.format(bw, ww, dr, (bw + 0.5 * dr) / (bw + ww + dr) * 100)) print('current memory size:', len(cur_memory)) episode += 1 agent.reset() if len(cur_memory) >= MEMORY_SIZE: return utils.augment_dataset(cur_memory, BOARD_SIZE)