コード例 #1
0
ファイル: slow_tests.py プロジェクト: drsagitn/sejonggo
    def test_self_play(self):
        model = DummyModel()
        self.count = 0
        fn = model.predict_on_batch

        def monkey_patch(this, *args, **kwargs):
            self.count += 1
            return fn(this, *args, **kwargs)

        model.predict_on_batch = monkey_patch

        # mcts batch size is 8 and we need at least one batch
        games_data = self_play(model, n_games=2, mcts_simulations=8)

        self.assertEqual(len(games_data), 2)
        moves = len(games_data[0]['moves']) + len(games_data[1]['moves'])

        self.assertEqual(
            self.count,
            2 * moves)  # 1 prediction for mcts simulation + 1 to get policy

        self.count = 0
        games_data = self_play(model, n_games=2, mcts_simulations=32)

        self.assertEqual(len(games_data), 2)
        moves = len(games_data[0]['moves']) + len(games_data[1]['moves'])

        self.assertEqual(
            self.count,
            5 * moves)  # 4 predictions for mcts simulation + 1 to get policy
コード例 #2
0
ファイル: model.py プロジェクト: hyperchi/alphagozero
def create_initial_model(name):
    full_filename = os.path.join(conf['MODEL_DIR'], name) + ".h5"
    if os.path.isfile(full_filename):
        model = load_model(full_filename, custom_objects={'loss': loss})
        return model

    model = build_model(name)

    # Save graph in tensorboard. This graph has the name scopes making it look
    # good in tensorboard, the loaded models will not have the scopes.
    tf_callback = TensorBoard(log_dir=os.path.join(conf['LOG_DIR'], name),
                              histogram_freq=0,
                              batch_size=1,
                              write_graph=True,
                              write_grads=False)
    tf_callback.set_model(model)
    tf_callback.on_epoch_end(0)
    tf_callback.on_train_end(0)

    from self_play import self_play
    self_play(model,
              n_games=conf['N_GAMES'],
              mcts_simulations=conf['MCTS_SIMULATIONS'])
    model.save(full_filename)
    best_filename = os.path.join(conf['MODEL_DIR'], 'best_model.h5')
    model.save(best_filename)
    return model
コード例 #3
0
ファイル: slow_tests.py プロジェクト: drsagitn/sejonggo
    def test_model_saving_after_training(self):
        init_directories()
        model_name = "model_1"
        model = build_model(model_name)
        self.assertEqual(model.name, 'model_1')
        board, player = game_init()
        policies, values = model.predict(board)
        try:
            os.remove('test.h5')
        except:
            pass
        model.save('test.h5')
        self_play(model, n_games=2, mcts_simulations=32)
        train(model, game_model_name=model.name, epochs=2)
        self.assertEqual(model.name, 'model_2')
        policies2, values2 = model.predict(board)
        self.assertFalse(np.array_equal(values, values2))
        self.assertFalse(np.array_equal(policies, policies2))

        model3 = load_model('test.h5', custom_objects={'loss': loss})
        policies3, values3 = model3.predict(board)

        self.assertTrue(np.array_equal(values, values3))
        self.assertTrue(np.array_equal(policies, policies3))
        os.remove('test.h5')
コード例 #4
0
def main():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))
    init_directories()

    model_name = "model_1"
    model = create_initial_model(name=model_name)

    while True:
        best_model = load_best_model()
        self_play(best_model,
                  n_games=10,
                  mcts_simulations=conf['MCTS_SIMULATIONS'])
        K.clear_session()
コード例 #5
0
ファイル: slow_tests.py プロジェクト: drsagitn/sejonggo
    def test_self_play_resign(self):
        model = DummyModel()
        games_data = self_play(model, n_games=50, mcts_simulations=8)

        self.assertEqual(len(games_data), 50)

        resign_games = len([
            g for g in games_data
            if g['resign_model1'] != None and g['resign_model2'] != None
        ])
        no_resign_games = len([
            g for g in games_data
            if g['resign_model1'] == None or g['resign_model2'] == None
        ])
        self.assertEqual(resign_games, 28)
        self.assertEqual(no_resign_games, 22)
コード例 #6
0
# ====================
# 학습 사이클 실행
# ====================

# 패키지 임포트
from dual_network import dual_network
from self_play import self_play
from train_network import train_network
from evaluate_network import evaluate_network
from evaluate_best_player import evaluate_best_player

# 듀얼 네트워크 생성
dual_network()

for i in range(10):
    print('Train', i, '====================')
    # 셀프 플레이 파트
    self_play()

    # 파라미터 갱신 파트
    train_network()

    # 신규 파라미터 평가 파트
    update_best_player = evaluate_network()

    # 베스트 플레이어 평가
    if update_best_player:
        evaluate_best_player()
コード例 #7
0
ファイル: main.py プロジェクト: EvanSamaa/ECE413_Hex_agent
def run_self_play(net_path):
    net = load_net_eval(net_path) if net_path else None
    return self_play(config['board_size'], config['mcts_iterations'], config['temperature'], net)
コード例 #8
0
def elect_model_as_best_model(model):
    self_play(model, n_games=conf['N_GAMES'], mcts_simulations=conf['MCTS_SIMULATIONS'])
    full_filename = os.path.join(conf['MODEL_DIR'], conf['BEST_MODEL'])
    model.save(full_filename)
コード例 #9
0
ファイル: learn.py プロジェクト: lowaa/ml-tic-tac-toe
                features[key] = np.append(prev_array, [int(element)])

            labels = np.append(
                labels,
                [convert_move_to_index(m_h.move, num_cols=settings.NUM_COLS)])

        dataset = tf.data.Dataset.from_tensor_slices(
            (features, labels.astype(int)))
        dataset = dataset.shuffle(len(labels)).repeat().batch(len(labels))
        return dataset

    return fn


if __name__ == '__main__':
    from self_play import self_play, print_self_play_results

    estimator = create_estimator()

    self_play_result = self_play(dnn=estimator,
                                 num_games=1,
                                 num_cols=settings.NUM_COLS,
                                 num_rows=settings.NUM_ROWS,
                                 vs_random=False)

    print_self_play_results(self_play_result)

    estimator.train(
        input_fn=get_train_fn(move_histories=self_play_result.move_histories),
        steps=1)
コード例 #10
0
from dual_network import dual_network
from self_play import self_play
from train_network import train_network
from evaluate_network import evaluate_network, update_best_player
from evaluate_best_player import evaluate_best_player

dual_network()
count = 0
fail_count = 0
for i in range(30):
    print('Train', i, '======================')
    self_play(fail_count)

    train_network(fail_count)

    #skip = True
    #updated = True
    #if i%10 == 0 and i != 0:
    updated = evaluate_network()
    #    skip = False
    #else:
    #    update_best_player()

    if updated == True:  # and skip == False:
        count += 1
        fail_count = 0
    else:
        fail_count += 1

    if count > 4:
        evaluate_best_player()
コード例 #11
0
from evaluate_network import evaluate_network
from train_network import train_network

from self_play import self_play

for i in range(10):
    print('Train', i, '====================')
    self_play()  # セルフプレイ部
    train_network()  # パラメータ更新部
    evaluate_network()  # 新パラメータ評価部