def test_self_play(self): model = DummyModel() self.count = 0 fn = model.predict_on_batch def monkey_patch(this, *args, **kwargs): self.count += 1 return fn(this, *args, **kwargs) model.predict_on_batch = monkey_patch # mcts batch size is 8 and we need at least one batch games_data = self_play(model, n_games=2, mcts_simulations=8) self.assertEqual(len(games_data), 2) moves = len(games_data[0]['moves']) + len(games_data[1]['moves']) self.assertEqual( self.count, 2 * moves) # 1 prediction for mcts simulation + 1 to get policy self.count = 0 games_data = self_play(model, n_games=2, mcts_simulations=32) self.assertEqual(len(games_data), 2) moves = len(games_data[0]['moves']) + len(games_data[1]['moves']) self.assertEqual( self.count, 5 * moves) # 4 predictions for mcts simulation + 1 to get policy
def create_initial_model(name): full_filename = os.path.join(conf['MODEL_DIR'], name) + ".h5" if os.path.isfile(full_filename): model = load_model(full_filename, custom_objects={'loss': loss}) return model model = build_model(name) # Save graph in tensorboard. This graph has the name scopes making it look # good in tensorboard, the loaded models will not have the scopes. tf_callback = TensorBoard(log_dir=os.path.join(conf['LOG_DIR'], name), histogram_freq=0, batch_size=1, write_graph=True, write_grads=False) tf_callback.set_model(model) tf_callback.on_epoch_end(0) tf_callback.on_train_end(0) from self_play import self_play self_play(model, n_games=conf['N_GAMES'], mcts_simulations=conf['MCTS_SIMULATIONS']) model.save(full_filename) best_filename = os.path.join(conf['MODEL_DIR'], 'best_model.h5') model.save(best_filename) return model
def test_model_saving_after_training(self): init_directories() model_name = "model_1" model = build_model(model_name) self.assertEqual(model.name, 'model_1') board, player = game_init() policies, values = model.predict(board) try: os.remove('test.h5') except: pass model.save('test.h5') self_play(model, n_games=2, mcts_simulations=32) train(model, game_model_name=model.name, epochs=2) self.assertEqual(model.name, 'model_2') policies2, values2 = model.predict(board) self.assertFalse(np.array_equal(values, values2)) self.assertFalse(np.array_equal(policies, policies2)) model3 = load_model('test.h5', custom_objects={'loss': loss}) policies3, values3 = model3.predict(board) self.assertTrue(np.array_equal(values, values3)) self.assertTrue(np.array_equal(policies, policies3)) os.remove('test.h5')
def main(): config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) init_directories() model_name = "model_1" model = create_initial_model(name=model_name) while True: best_model = load_best_model() self_play(best_model, n_games=10, mcts_simulations=conf['MCTS_SIMULATIONS']) K.clear_session()
def test_self_play_resign(self): model = DummyModel() games_data = self_play(model, n_games=50, mcts_simulations=8) self.assertEqual(len(games_data), 50) resign_games = len([ g for g in games_data if g['resign_model1'] != None and g['resign_model2'] != None ]) no_resign_games = len([ g for g in games_data if g['resign_model1'] == None or g['resign_model2'] == None ]) self.assertEqual(resign_games, 28) self.assertEqual(no_resign_games, 22)
# ==================== # 학습 사이클 실행 # ==================== # 패키지 임포트 from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network from evaluate_best_player import evaluate_best_player # 듀얼 네트워크 생성 dual_network() for i in range(10): print('Train', i, '====================') # 셀프 플레이 파트 self_play() # 파라미터 갱신 파트 train_network() # 신규 파라미터 평가 파트 update_best_player = evaluate_network() # 베스트 플레이어 평가 if update_best_player: evaluate_best_player()
def run_self_play(net_path): net = load_net_eval(net_path) if net_path else None return self_play(config['board_size'], config['mcts_iterations'], config['temperature'], net)
def elect_model_as_best_model(model): self_play(model, n_games=conf['N_GAMES'], mcts_simulations=conf['MCTS_SIMULATIONS']) full_filename = os.path.join(conf['MODEL_DIR'], conf['BEST_MODEL']) model.save(full_filename)
features[key] = np.append(prev_array, [int(element)]) labels = np.append( labels, [convert_move_to_index(m_h.move, num_cols=settings.NUM_COLS)]) dataset = tf.data.Dataset.from_tensor_slices( (features, labels.astype(int))) dataset = dataset.shuffle(len(labels)).repeat().batch(len(labels)) return dataset return fn if __name__ == '__main__': from self_play import self_play, print_self_play_results estimator = create_estimator() self_play_result = self_play(dnn=estimator, num_games=1, num_cols=settings.NUM_COLS, num_rows=settings.NUM_ROWS, vs_random=False) print_self_play_results(self_play_result) estimator.train( input_fn=get_train_fn(move_histories=self_play_result.move_histories), steps=1)
from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network, update_best_player from evaluate_best_player import evaluate_best_player dual_network() count = 0 fail_count = 0 for i in range(30): print('Train', i, '======================') self_play(fail_count) train_network(fail_count) #skip = True #updated = True #if i%10 == 0 and i != 0: updated = evaluate_network() # skip = False #else: # update_best_player() if updated == True: # and skip == False: count += 1 fail_count = 0 else: fail_count += 1 if count > 4: evaluate_best_player()
from evaluate_network import evaluate_network from train_network import train_network from self_play import self_play for i in range(10): print('Train', i, '====================') self_play() # セルフプレイ部 train_network() # パラメータ更新部 evaluate_network() # 新パラメータ評価部