Beispiel #1
0
def play_games(args):
    agent1_fname, agent2_fname, num_games, board_size, gpu_frac, simulations = args

    set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_model_from_disk(agent1_fname)
    agent2 = load_model_from_disk(agent2_fname)

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
            print("Agent 1 playing as black and Agent 2 as white")
        else:
            white_player, black_player = agent1, agent2
            print("Agent 1 playing as white and Agent 2 as black")
        game = simulate_game(black_player, white_player, board_size,
                             simulations)
        if game.winner() == color1.value:
            print('Agent 1 wins')
            wins += 1
        elif game.winner() == color1.opp.value:
            print('Agent 2 wins')
            losses += 1
        else:
            print('Game is a draw', game.winner())

        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.opp
    return wins, losses
Beispiel #2
0
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 simulations, temperature, experience_filename, gpu_frac):

    import tensorflow as tf
    import keras
    #import keras.backend as K
    # K.set_session(tf.compat.v1.Session())
    # tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session())

    #print(inspect.currentframe().f_code.co_name, inspect.currentframe().f_back.f_code.co_name)
    set_gpu_memory_target(gpu_frac)

    #import tensorflow as tf
    #import keras

    #global graph
    #graph = tf.compat.v1.get_default_graph()

    print("PID: ", os.getpid())
    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    print("learning agent : {} \nreference_agent : {}".format(
        agent1_filename, agent2_filename))

    #print("Loading model from disk ...")
    agent1 = load_model_from_disk(agent1_filename)
    agent2 = load_model_from_disk(agent2_filename)

    # print(agent1.summary())
    # print(agent2.summary())
    """
    # _filename is a model saved in .hdf5 format.
    agent1 = tf.keras.models.load_model(agent1_filename)
    print(agent1.summary())
    agent2 = tf.keras.models.load_model(agent2_filename)
    """

    mctsSP = MCTSSelfPlay(7, 5)
    input_shape = (7, 5, 5)
    #nn = AGZ.init_random_model(input_shape)
    print(
        f"{bcolors.OKBLUE} [PID : {os.getpid()}] self-play game is triggered, Get A Cup Of Coffee And Relax !!!{bcolors.ENDC}"
    )
    logging.debug(
        "[PID : {}] self-play game is triggered, Get A Cup Of Coffee And Relax !!!"
        .format(os.getpid()))
    # agent1 and agent2 are nn model
    mctsSP.play(agent1,
                agent2,
                experience_filename,
                num_games=num_games,
                simulations=simulations)
Beispiel #3
0
def main():
    gpu_frac = 0.20
    set_gpu_memory_target(gpu_frac)
    agent_model = ("./checkpoints/iteration_Savedmodel/initial.json", "./checkpoints/iteration_Savedmodel/initial.h5")
    model = load_model_from_disk(agent_model)
    model.summary()

    model_input = []

    for _ in range(100):
        board_tensor = np.random.randint(0, 3, size=(7, 5, 5))
        model_input.append(board_tensor)

    model_input = np.array(model_input) 
     

    action_target = []
    for _ in range (100):
        search_prob = np.random.randn(26)
        #search_prob_flat = search_prob.reshape(25,)
        action_target.append(search_prob)
        
    action_target = np.array(action_target)    


    value_target = np.random.rand(100)
    value_target = np.array(value_target) 

    

    X = model_input[0]
    X = np.expand_dims(X, axis=0)
    prediction = model.predict(X)
    print(prediction)
Beispiel #4
0
    def train(self,
              exp_filename,
              output_file,
              learning_rate=0.01,
              batch_size=128,
              epochs=100):
        from keras.optimizers import SGD
        import tensorflow as tf
        from algos.utils import load_model_from_disk

        is_gpu = len(tf.config.experimental.list_physical_devices('GPU'))
        gpus = tf.config.experimental.list_physical_devices('GPU')
        print("Num GPUs Available : {}, is_gpu: {} gpus: {}".format(
            len(tf.config.experimental.list_physical_devices('GPU')), is_gpu,
            gpus))
        logging.debug("Num GPUs Available : {}, is_gpu: {} gpus: {}".format(
            len(tf.config.experimental.list_physical_devices('GPU')), is_gpu,
            gpus))

        with h5py.File(exp_filename, 'r') as exp_input:
            experience_buffer = load_experience(exp_input)

        num_examples = experience_buffer.model_input.shape[0]
        print("num_examples : ", num_examples)

        model_input = experience_buffer.model_input
        action_target = experience_buffer.action_target
        value_target = experience_buffer.value_target

        # parallel training code
        strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        logging.debug('Number of devices: {}'.format(
            strategy.num_replicas_in_sync))

        # logging line for number of training devices available
        # print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))

        # TODO: Looks like an issue here. [David to Resolve]
        nn_model = load_model_from_disk(self.model_file)
        with strategy.scope():
            if is_gpu:
                """[GPU-ERROR] We are seeing this issue on AWS with 4 GPUs. Unable to load handle under this scope"""
                print(
                    "[Training] Load model on each GPUs as it is inside tf.distribute.Strategy scope"
                )
                logging.debug(
                    "[Training] Load model on each GPUs as it is inside tf.distribute.Strategy scope"
                )
                self.model = load_model_from_disk(self.model_file)
            else:
                print(
                    "[Training] Load model on CPUs as it is inside tf.distribute.Strategy scope but don't have GPUs"
                )
                logging.debug(
                    "[Training] Load model on CPUs as it is inside tf.distribute.Strategy scope but don't have GPUs"
                )

                self.model = nn_model

            self.model.compile(SGD(lr=learning_rate),
                               loss=['categorical_crossentropy', 'mse'])
        """ logic code for checkpointing.
             This is to understand how many epochs is best for training
             as loss may start increasing after a certain eochs.
             TARGET : WE NEED TO FIND THE BEST CHECKPOINTED MODELS. 
        """
        """
        # Include the epoch in the file name (uses `str.format`)
        checkpoint_path = "checkpoints/epochs_chkpts/cp-{epoch:04d}.ckpt"
        checkpoint_dir = os.path.dirname(checkpoint_path)

        # Create a callback that saves the model's weights every 5 epochs.
        # last checkpoint will be equivalent to the entire model saving after training gets over.
        cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                             filepath=checkpoint_path, 
                                             verbose=1, 
                                             save_weights_only=True,
                                             period=5)
        
        # Save the weights using the `checkpoint_path` format
        self.model.save_weights(checkpoint_path.format(epoch=0))
        """
        """
        @param monitor - the quantity monitored to determine where to stop
        @param mode - whether we want max or min of monitored quantity
        @param patience - how many extra epochs do we try after finding a new best before stopping
        @param restore_best_weights - Whether to restore the best weights found during training or stick with the current weights
        """
        early_stopping_callback = tf.keras.callbacks.EarlyStopping(
            monitor='loss', mode='min', patience=5, restore_best_weights=True)

        # Train the model with the callback
        self.model.fit(model_input, [action_target, value_target],
                       epochs=epochs,
                       batch_size=batch_size,
                       callbacks=[early_stopping_callback
                                  ])  # Pass callback to training
        """
        # This may generate warnings related to saving the state of the optimizer.
        # These warnings (and similar warnings throughout this notebook)
        # are in place to discourage outdated usage, and can be ignored.

        #latest = tf.train.latest_checkpoint(checkpoint_dir)
        
        # Loads the weights (syntx)
        # model.load_weights(checkpoint_path)
        """
        """
        # After training, save the entire model in 'checkpoints/iteration_Savedmodel/' dir.
        my_model = filename.split("/")[-1].split(".")[0]
        model_name = 'checkpoints/iteration_Savedmodel/' + my_model + '_model'
        model.save(model_name)
        """

        # Save the entire model to a HDF5 file.
        # The '.h5' extension indicates that the model should be saved to HDF5.
        #model_name = model_name + ".h5"
        # model.save(model_name)
        # self.model.save(output_file)

        # Reload a fresh Keras model from the saved model.
        # Recreate the exact same model, including its weights and the optimizer
        #new_model = tf.keras.models.load_model(model_name)
        """ Save the tained model in (.json, .h5) format """
        save_model_to_disk(self.model, output_file)
        print("trained model saved to disk : ", output_file)
        logging.debug("trained model saved to disk : {}".format(output_file))