Ejemplo n.º 1
0
def main():
    sys.setrecursionlimit(2000)
    init_directories()
    clean_up_empty()
    GPUs = conf['GPUs']
    START_PHASE = "EVALUATING"
    while True:
        if START_PHASE != "EVALUATING":
            # SELF-PLAY
            init_predicting_workers(GPUs)
            workers = [
                NoModelSelfPlayWorker(i) for i in range(conf['N_GAME_PROCESS'])
            ]
            for p in workers:
                p.start()
            for p in workers:
                p.join()
            destroy_predicting_workers(GPUs)

        # EVALUATE
        init_predicting_workers(
            GPUs
        )  # re-init predicting worker to run with latest trained model (sent from train server)
        workers = [
            NoModelEvaluateWorker(i) for i in range(conf['N_GAME_PROCESS'])
        ]
        for p in workers:
            p.start()
        for p in workers:
            p.join()
        workers.clear()
        destroy_predicting_workers(GPUs)

        if promote_best_model():
            START_PHASE = ""  # there are new best model so we doing self-play in next loop
Ejemplo n.º 2
0
def main():
    sys.setrecursionlimit(10000)
    init_directories()
    clean_up_empty()
    GPUs = conf['GPUs']
    finished_best_model_name = None
    while True:
        init_predicting_workers(GPUs)
        #  Check if we did self-play on this best model or not
        curr_best_model_name = put_name_request("BEST")
        if curr_best_model_name != finished_best_model_name:
            finished_best_model_name = curr_best_model_name
        else:
            print("No new best model for self-playing. Stopping..")
            destroy_predicting_workers(GPUs)
            break
        print("SELF-PLAYING BEST MODEL ", curr_best_model_name)
        workers = [
            NoModelSelfPlayWorker(i) for i in range(conf['N_GAME_PROCESS'])
        ]
        for p in workers:
            p.start()
        for p in workers:
            p.join()
        destroy_predicting_workers(GPUs)
Ejemplo n.º 3
0
def create_initial_model(name, self_play=True):
    from utils import init_directories
    init_directories()
    full_filename = os.path.join(conf['MODEL_DIR'], name) + ".h5"
    if os.path.isfile(full_filename):
        model = load_model(full_filename, custom_objects={'loss': loss})
        return model

    model = build_model(name)

    # Save graph in tensorboard. This graph has the name scopes making it look
    # good in tensorboard, the loaded models will not have the scopes.
    tf_callback = TensorBoard(log_dir=os.path.join(conf['LOG_DIR'], name),
                              histogram_freq=0,
                              batch_size=1,
                              write_graph=True,
                              write_grads=False)
    tf_callback.set_model(model)
    tf_callback.on_epoch_end(0)
    tf_callback.on_train_end(0)

    if self_play:
        from self_play import self_play
        self_play(model,
                  n_games=conf['N_GAMES'],
                  mcts_simulations=conf['MCTS_SIMULATIONS'])
    model.save(full_filename)
    best_filename = os.path.join(conf['MODEL_DIR'], 'best_model.h5')
    model.save(best_filename)
    return model
Ejemplo n.º 4
0
def main():
    init_directories()
    GPUs = conf['GPUs']
    # workers = list()
    # workers.append(TrainWorker([i for i in range(n_gpu)]))
    # for p in workers: p.start()
    # for p in workers: p.join()
    # workers.clear()

    train_multi_gpus(n_gpu=len(GPUs))
Ejemplo n.º 5
0
def main():
    print("Starting run (v{})".format(__version__))
    init_directories()
    if conf['THREAD_SIMULATION']:
        init_simulation_workers()
    model_name = "model_1"
    model = create_initial_model(name=model_name)

    while True:
        model = load_latest_model()
        best_model = load_best_model()
        train(model, game_model_name=best_model.name)
        evaluate(best_model, model)
        K.clear_session()
Ejemplo n.º 6
0
def main():
    init_directories()
    clean_up_empty()
    resource.setrlimit(resource.RLIMIT_STACK, (2**29, -1))
    sys.setrecursionlimit(10**6)
    GPUs = conf['GPUs']

    mgr = registerRemoteFunc()

    while True:
        jobs = mgr.get_job(concurency=len(GPUs))._getvalue()
        logger.info("GOT JOBS %s", jobs)
        out_dirs = jobs['out_dirs']
        assert len(out_dirs) <= len(GPUs)
        state = jobs['state']
        model_check_update(jobs['latest_model_name'], jobs['best_model_name'],
                           mgr)
        if state == ASYNC_PIPELINE_STATE.SELF_PLAYING.name:
            logger.info("STARTING REMOTE SELF_PLAY PHASE WITH %s GPUs",
                        len(GPUs))
            workers = [
                SelfPlayWorker(i, one_game_only=extract_game_number(dir))
                for i, dir in enumerate(out_dirs)
            ]
            for p in workers:
                p.start()
            for p in workers:
                p.join()
            workers.clear()
            send_finish_jobs(jobs, mgr)
            logger.info("FINISHED SELF_PLAY JOBS %", jobs['id'])
        elif state == ASYNC_PIPELINE_STATE.EVALUATING.name:
            logger.info("STARTING REMOTE EVALUATION PHASE WITH %s GPUs",
                        len(GPUs))
            workers = [
                EvaluateWorker(i, one_game_only=extract_game_number(dir))
                for i in GPUs
            ]
            for p in workers:
                p.start()
            for p in workers:
                p.join()
            workers.clear()
            send_finish_jobs(jobs, mgr)
            logger.info("FINISHED EVALUATION JOBS %", jobs["id"])
        else:
            print("Unhandled state %s. Sleep 5 to wait for new state" % state)
            time.sleep(5)
            continue
Ejemplo n.º 7
0
def main():
    init_directories()
    clean_up_empty()
    resource.setrlimit(resource.RLIMIT_STACK, (2**29, -1))
    sys.setrecursionlimit(10**6)
    GPUs = conf['GPUs']
    START_PHASE = "SELF-PLAY"
    STARTED = False

    while True:
        if STARTED or START_PHASE == "SELF-PLAY":
            STARTED = True
            logger.info("STARTING SELF_PLAY PHASE WITH %s GPUs", len(GPUs))
            turn_on_event(ASYNC_PIPELINE_STATE.SELF_PLAYING)
            init_predicting_workers(GPUs)
            workers = [NoModelSelfPlayWorker(i) for i in GPUs]
            for p in workers:
                p.start()
            for p in workers:
                p.join()
            while is_slave_working():
                time.sleep(2)
            destroy_predicting_workers(GPUs)
            workers.clear()
        if STARTED or START_PHASE == "TRAINING":
            STARTED = True
            logger.info("STARTING TRAINING PHASE with %s GPUs", len(GPUs))
            turn_on_event(ASYNC_PIPELINE_STATE.TRAINING)
            trainer = TrainWorker([i for i in GPUs])
            trainer.start()
            trainer.join()
        if STARTED or START_PHASE == "EVALUATION":
            STARTED = True
            logger.info("STARTING EVALUATION PHASE WITH %s GPUs", len(GPUs))
            turn_on_event(ASYNC_PIPELINE_STATE.EVALUATING)
            init_predicting_workers(GPUs)
            workers = [NoModelEvaluateWorker(i) for i in GPUs]
            for p in workers:
                p.start()
            for p in workers:
                p.join()
            while is_slave_working():
                time.sleep(2)
            workers.clear()
            destroy_predicting_workers(GPUs)

            promote_best_model()
Ejemplo n.º 8
0
def main():
    init_directories()
    clean_up_empty()
    GPUs = conf['GPUs']
    EPOCHS_PER_SAVE = conf['EPOCHS_PER_SAVE']
    BATCH_SIZE = conf['TRAIN_BATCH_SIZE']
    NUM_WORKERS = conf['NUM_WORKERS']
    SIZE = conf['SIZE']
    n_gpu = len(GPUs)
    if n_gpu <= 1:
        raise EnvironmentError(
            "Number of GPU need > 1 for multi-gpus training")

    logger.info("STARTING TRAINING PHASE with %s GPUs", len(GPUs))
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUs).strip('[').strip(']').strip(
        ' ')

    global model
    model = load_latest_model()

    base_name, index = model.name.split('_')
    smallest_loss = Inf

    # try:
    #     model = multi_gpu_model(model, cpu_relocation=True)
    #     print("Training using multiple GPUs..")
    # except:
    #     print("Training using single GPU or CPU..")
    opt = SGD(lr=1e-2, momentum=0.9, clipnorm=0.9)
    model.compile(loss=loss, optimizer=opt, metrics=["accuracy"])

    params = {
        'dim': (SIZE, SIZE, 17),
        'batch_size': BATCH_SIZE * n_gpu,
        'shuffle': True
    }
    while True:
        new_name = "_".join([base_name, str(int(index) + 1)]) + ".h5"
        # partition = get_KGS_training_desc()  # get_training_desc()
        training_generator = KGSDataGenerator([], None, **params)
        # validation_generator = KGSDataGenerator(partition['validation'], None, **params)
        reduce_lr = ReduceLROnPlateau(monitor='policy_out_acc',
                                      factor=0.1,
                                      patience=3,
                                      verbose=1,
                                      mode='auto',
                                      min_lr=0)

        callbacks_list = [reduce_lr]

        EPOCHS_PER_BACKUP = conf['EPOCHS_PER_BACKUP']
        cycle = EPOCHS_PER_SAVE // EPOCHS_PER_BACKUP
        for i in range(cycle):
            logger.info("CYCLE {}/{}".format(i + 1, cycle))
            model.fit_generator(
                generator=training_generator,
                # validation_data=validation_generator,
                use_multiprocessing=True,
                workers=NUM_WORKERS,
                epochs=EPOCHS_PER_BACKUP,
                verbose=1,
                callbacks=callbacks_list)
            model.save(os.path.join(conf['MODEL_DIR'], "backup.h5"))
            logger.info('Auto save model backup.h5')