예제 #1
0
def train(good_luck=False):
    """
    Train procedure:
        0) Try to resume the training if it has been stopped
        1) Self play
        2) Train the net based on the selfplay
        3) Evaluate the trained net
    """
    if not good_luck:
        raise Exception('You will need it')

    # Init file and folders for training
    globals.init_training_stuff()

    # Resume training
    iteration = globals.get_last_iteration()

    for it in range(iteration, train_args.iters):
        print('Starting iteration ', it)

        # Self play
        examples = selfplay()
        examples = GameState.all_symmetries(examples)
        globals.dump_selfplay_data(examples)

        # Train
        train_net(examples)

        # Evaluate
        score = evaluate_net(
            net=str(globals.get_last_iteration() + 1), net_opponent='best'
        ) if not train_args.always_accept_model else np.inf
        # If new net scored positively accept new model
        if score >= 0:
            print('Accepting model, score', score)
            globals.accept_model()
        else:
            print('Rejecting model, score', score)

        # Update iteration file to keep track of iterations done
        globals.update_iteration_file(it + 1)