def test_load_weights(self):
     player = NNPolicy(model_save_path = '/home/mike/tmp/test_nn_model_with_weights.json')
     player.save_model(weights_file='/home/mike/tmp/test_weight.hdf5')
     new_player = NNPolicy(model_load_path = '/home/mike/tmp/test_nn_model_with_weights.json')
     
     self.assertTrue(len(player.model.get_weights()) == len(new_player.model.get_weights()))
     for i in range(len(player.model.get_weights())):
         self.assertTrue( np.array_equal(player.model.get_weights()[i], new_player.model.get_weights()[i]) )
def evaluate_nn_policy():
    """
    To evaluate the results gained from the training process. It can be run in parallel when the training is happening.
    There should be a metadata.json file for the metadata of training process, a model.json for the model trained, 
    and at least one weights.%05d.hdf5 weight file in the output directory from the training process.
    """
    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Compare the trained NN policy to our manually crafted baseline policy'
    )
    parser.add_argument(
        "directory",
        help=
        "Path to folder where the model params and metadata was saved from training."
    )
    parser.add_argument("--metadata-file",
                        help="The meta data file to be loaded",
                        default="su_metadata.json")
    parser.add_argument("--weight-file",
                        help="The weight file to be loaded to the model",
                        default=ZEROTH_FILE)
    parser.add_argument("--plot",
                        help="Plot the evaluation results",
                        default=True,
                        action="store_true")
    parser.add_argument("--num-games",
                        help="Number of games to play for evaluation",
                        type=int,
                        default=1000)
    parser.add_argument(
        "--card-path",
        help="The directory with the card set file (Default: {})".format(
            gm.DEFAULT_PATH),
        default=gm.DEFAULT_PATH)
    parser.add_argument(
        "--card-file",
        help="The file containing the cards to play with (Default: {})".format(
            gm.DEFAULT_CARDS_FILE),
        default=gm.DEFAULT_CARDS_FILE)
    parser.add_argument("--verbose",
                        "-v",
                        help="Turn on verbose mode",
                        default=True,
                        action="store_true")
    args = parser.parse_args()

    with open(os.path.join(args.directory, args.metadata_file), "r") as f:
        metadata = json.load(f)

    with open(os.path.join(args.directory, metadata["model_file"]), "r") as f:
        player = NNPolicy(model_load_path=os.path.join(args.directory,
                                                       metadata["model_file"]))

    player.load_weights(os.path.join(args.directory, args.weight_file))
    opponent = BaselinePolicy()
    compare_policy(player, opponent, args.num_games, args.card_path,
                   args.card_file)
 def test_get_action(self):
     player = NNPolicy()
     input = np.zeros((1, fe.get_feature_dim(player.features), 2 * gm.START_HANDS))
     
     game = gm.GameState()
     while(not game.is_end_of_game()): 
         (card, move) = player.get_action(game)
         self.assertTrue(card.position == (-1, -1) and card.owner == game.current_player)
         self.assertTrue(game.board[Helper.tuple2idx(game.board_size, *move)] is None)
         game.play_round(card, *move)
Exemple #4
0
 def test_train_single_game(self):
     single_meta = {
         "game_batch": 1,
         "num_wins": {},
         "out_directory": "test_cards",
         "card_path": "test_cards",
         "card_file": "cards.csv",
         "learning_rate": 0.001
     }
     player = NNPolicy()
     opponent = player.clone()
     optimizer = SGD(lr=single_meta["learning_rate"])
     player.model.compile(loss=mc.log_loss, optimizer=optimizer)
     (states, actions, rewards) = mc.simulate_games(player, opponent,
                                                    single_meta)
     mc.train_on_results(player, states, actions, rewards)
Exemple #5
0
    def test_train_multi_games(self):
        num_games_batch = 20
        multi_meta = {
            "game_batch": num_games_batch,
            "num_wins": {},
            "out_directory": "test_cards",
            "card_path": "test_cards",
            "card_file": "cards.csv",
            "learning_rate": 0.001
        }
        player = NNPolicy()
        opponent = player.clone()

        (states, actions, rewards) = mc.simulate_games(player, opponent,
                                                       multi_meta)

        self.assertTrue(
            len(states) == num_games_batch and len(actions) == num_games_batch
            and len(rewards) == num_games_batch)
        # Ensure both player got almost equal chance of playing first
        games_first = sum(len(state) == 5 for state in states)
        games_second = sum(len(state) == 4 for state in states)
        self.assertTrue(games_first + games_second == num_games_batch)
        if games_first == 0 or games_second == 0:
            warnings.warn(
                'Abnormal results: {} games first, {} games second'.format(
                    games_first, games_second))

        self.assertTrue(
            len(actions[num_games_batch - 1]) == len(
                states[num_games_batch - 1])
            and len(rewards[num_games_batch - 1]) == len(
                states[num_games_batch - 1]))
        games_won = sum(reward[0] == 1 for reward in rewards)
        games_tie = sum(reward[0] == 0 for reward in rewards)
        games_lost = sum(reward[0] == -1 for reward in rewards)
        self.assertTrue(games_won + games_tie + games_lost == num_games_batch)
        if games_won == 0 or games_tie == 0 or games_lost == 0:
            warnings.warn(
                'Abnormal results: {} games won, {} games tie, {} lost'.format(
                    games_won, games_tie, games_lost))

        optimizer = SGD(lr=multi_meta["learning_rate"])
        player.model.compile(loss=mc.log_loss, optimizer=optimizer)
        mc.train_on_results(player, states, actions, rewards)
Exemple #6
0
    def test_run_single_game(self):
        single_meta = {
            "game_batch": 1,
            "num_wins": {},
            "out_directory": "test_cards",
            "card_path": "test_cards",
            "card_file": "cards.csv"
        }
        player = NNPolicy()
        opponent = player.clone()

        (states, actions, rewards) = mc.simulate_games(player, opponent,
                                                       single_meta)
        self.assertTrue(
            len(states) == 1 and len(actions) == 1 and len(rewards) == 1)
        self.assertTrue(len(states[0]) == 4 or len(states[0]) == 5)
        self.assertTrue(
            len(actions[0]) == len(states[0])
            and len(rewards[0]) == len(states[0]))
def run_training():
    import argparse
    parser = argparse.ArgumentParser(
        description='Train the policy network to simulate the baseline policy')
    parser.add_argument(
        "out_directory",
        help=
        "Path to folder where the model params and metadata will be saved after each epoch."
    )
    parser.add_argument("--initial-weights",
                        help="Path to HDF5 file with inital weights.",
                        default=ZEROTH_FILE)
    parser.add_argument(
        "--model-json",
        help="JSON file for policy model in the output directory.",
        default="model.json")
    parser.add_argument("--learning-rate",
                        help="Keras learning rate (Default: 0.01)",
                        type=float,
                        default=0.01)
    parser.add_argument(
        "--epoch",
        help="Number of epoches for training process (Default: 50)",
        type=int,
        default=50)
    parser.add_argument("--step-epoch",
                        help="Number of step per epoch(Default: 1000)",
                        type=int,
                        default=1000)
    parser.add_argument(
        "--batch-size",
        help="Number of games to simulate for each batch (Default: 50)",
        type=int,
        default=50)
    parser.add_argument("--val-steps",
                        help="Number of steps for validation (Default: 1000)",
                        type=int,
                        default=1000)
    parser.add_argument("--result-file",
                        help="The file to save results as csv )",
                        default="result.csv")
    parser.add_argument(
        "--card-path",
        help="The directory with the card set file (Default: {})".format(
            gm.DEFAULT_PATH),
        default=gm.DEFAULT_PATH)
    parser.add_argument(
        "--card-file",
        help="The file containing the cards to play with (Default: {})".format(
            gm.DEFAULT_CARDS_FILE),
        default=gm.DEFAULT_CARDS_FILE)
    parser.add_argument("--verbose",
                        "-v",
                        help="Turn on verbose mode",
                        default=True,
                        action="store_true")

    args = parser.parse_args()

    if not os.path.exists(args.out_directory):
        if args.verbose:
            print("creating output directory {}".format(args.out_directory))
        os.makedirs(args.out_directory)

    if not os.path.exists(os.path.join(args.card_path, args.card_file)):
        raise ValueError(
            "Cannot play the game without card file {} in the directory {}".
            format(args.card_file, args.card_path))

    metadata = {
        "out_directory": args.out_directory,
        "model_file": args.model_json,
        "init_weights": args.initial_weights,
        "learning_rate": args.learning_rate,
        "epoch": args.epoch,
        "step_epoch": args.step_epoch,
        "batch_size": args.batch_size,
        "val_steps": args.val_steps,
        "result_file": args.result_file,
        "card_path": args.card_path,
        "card_file": args.card_file
    }

    iter_start = 1
    player = NNPolicy(
        model_save_path=os.path.join(args.out_directory, args.model_json))
    Helper.save_metadata(metadata, args.out_directory, "su_metadata.json")
    player.save_model()

    target = BaselinePolicy()

    optimizer = SGD(lr=metadata["learning_rate"])
    player.model.compile(loss="categorical_crossentropy",
                         optimizer=optimizer,
                         metrics=["accuracy"])

    train_generator = state_action_generator(target, metadata)
    validation_generator = state_action_generator(target, metadata)

    csv_logger = CSVLogger(os.path.join(args.out_directory, args.result_file),
                           append=True)
    stopper = EarlyStopping(monitor='loss', patience=3)

    player.model.fit_generator(generator=train_generator,
                               steps_per_epoch=metadata["step_epoch"],
                               epochs=metadata["epoch"],
                               callbacks=[csv_logger, stopper],
                               validation_data=validation_generator,
                               validation_steps=metadata["val_steps"])

    player.model.save_weights(os.path.join(args.out_directory, ZEROTH_FILE))
def run_training(cmd_line_args=None):
    import argparse
    parser = argparse.ArgumentParser(description='Train the policy network with Monte Carlo approach and exploring start')
    parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.")
    parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true")
    parser.add_argument("--model-json", help="JSON file for policy model in the output directory.", default = "model.json")
    parser.add_argument("--initial-weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).", default = ZEROTH_FILE)
    parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.01)", type=float, default=0.01)
    parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 200)", type=int, default=200)
    parser.add_argument("--record-every", help="Save learner's weights every n batches (Default: 100)", type=int, default=100)
    parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 50)", type=int, default=50)
    parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 50000)", type=int, default=5000)
    parser.add_argument("--pool-size", help="Size of the games pool (Default: 5000)", type=int, default=5000)
    parser.add_argument("--card-path", help="The directory with the card set file (Default: {})".format(gm.DEFAULT_PATH), default=gm.DEFAULT_PATH)
    parser.add_argument("--card-file", help="The file containing the cards to play with (Default: {})".format(gm.DEFAULT_CARDS_FILE), default=gm.DEFAULT_CARDS_FILE)
    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=True, action="store_true")
    
    # Baseline function (TODO) default lambda state: 0  (receives either file
    # paths to JSON and weights or None, in which case it uses default baseline 0)
    if cmd_line_args is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(cmd_line_args)

   

    if not os.path.exists(args.out_directory):
        if args.verbose:
            print("creating output directory {}".format(args.out_directory))
        os.makedirs(args.out_directory)
    
    
    if not os.path.exists(os.path.join(args.card_path, args.card_file)):
        raise ValueError("Cannot resume without card file {} in the directory {}".format(args.card_file, args.card_path))
            
    
    metadata = {
            "out_directory": args.out_directory,
            "model_file": args.model_json,
            "init_weights": args.initial_weights,
            "learning_rate": args.learning_rate,
            "game_batch": args.game_batch,
            "save_every": args.save_every,
            "poo_size": args.pool_size,
            "card_path": args.card_path,
            "card_file": args.card_file,
            "opponents": [ZEROTH_FILE],  # which weights from which to sample an opponent each batch
            "num_wins": {},  # number of wins for player in each batch
            "wins_per_opponent": {}
        }
                 
    if not args.resume:
        # starting the game from scratch
        player_weights = ZEROTH_FILE
        iter_start = 1
        player = NNPolicy(model_save_path = os.path.join(args.out_directory, args.model_json))
        Helper.save_metadata(metadata, args.out_directory, "metadata.json")
        player.save_model()
        # Create the Zeroth weight file
        player.model.save_weights(os.path.join(args.out_directory, player_weights))
    else:
        # Load the metadata
        if not os.path.exists(os.path.join(args.out_directory, "metadata.json")):
            raise ValueError("Cannot resume without metadata.json file in the output directory")
        with open(os.path.join(args.out_directory, "metadata.json"), "r") as f:
            old_metadata = json.load(f)
        
        # Merge the metadata in case any parameter changed
        metadata = {**old_metadata, **metadata}    
        
        # Load the model    
        if not os.path.exists(os.path.join(args.out_directory, args.model_json)):
            raise ValueError("Cannot resume without model json file in the output directory")
        args.model_json = os.path.join(args.out_directory, os.path.basename(args.model_json))
        
        if args.verbose:
            print("Resuming with model {}".format(args.model_json))
        player = NNPolicy(model_load_path = args.model_json)
        
        # Load the initial weights
        if not re.match(r"weights\.\d{5}\.hdf5", args.initial_weights):
            raise ValueError("Expected to resume from weights file with name 'weights.#####.hdf5'")
        player_weights = args.initial_weights
        args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights))
        if not os.path.exists(args.initial_weights):
            raise ValueError("Cannot resume without weight file {} in the output directory".format(args.initial_weights))

        if args.verbose:
            print("Resuming with weights {}".format(args.initial_weights))
        player.model.load_weights(args.initial_weights)    
        iter_start = 1 + int(player_weights[8:13])
        
        

    opponent = player.clone()

    if args.verbose:
        print("created player and opponent")


    # Append args of current run to history of full command args.
    metadata["cmd_line_args"] = metadata.get("cmd_line_args", [])
    metadata["cmd_line_args"].append(vars(args))

    optimizer = SGD(lr=args.learning_rate)
    player.model.compile(loss=log_loss, optimizer=optimizer)
    
    # game_pool = []
    
    for i_iter in range(iter_start, args.iterations + 1):
        # Note that player_weights will only be saved as a file every args.record_every iterations.
        # Regardless, player_weights enters into the metadata to keep track of the win ratio over
        # time.
        player_weights = "weights.%05d.hdf5" % i_iter

        # Randomly choose opponent from pool (possibly self), and playing
        # game_batch games against them.
        opp_weights = np.random.choice(metadata["opponents"])
        opp_path = os.path.join(args.out_directory, opp_weights)

        # Load new weights into opponent's network, but keep the same opponent object.
        opponent.model.load_weights(opp_path)

        # Run games (and learn from results)
        (states, card_actions, move_actions, rewards) = simulate_games(player, opponent, metadata)
        
        '''
        game_pool = game_pool + list(zip((states, card_actions, move_actions, rewards)))
        
        if len(game_pool) > metadata["pool_size"]:
            random.shuffle(game_pool)
            game_pool = game_pool[metadata["game_batch"]:]
            train_on_batch()
        elif args.verbose:
            print("Skipped Training Due to insufficient training samples")
        ''' 
            
        train_on_batch(player, states, card_actions, move_actions, rewards)
        games_won = sum(reward == 1 for reward in rewards)
        games_lost = sum(reward == -1 for reward in rewards)
        if args.verbose:
            print("In iteration {} winrate is {}, loserate is {} against opponent {}".format(i_iter,\
                                                                    round(games_won / metadata["game_batch"], 2), \
                                                                    round(games_lost / metadata["game_batch"], 2), \
                                                                    opp_weights))
            
        metadata["num_wins"][player_weights] = games_won
        if opp_weights in metadata["wins_per_opponent"]:
            metadata["wins_per_opponent"][opp_weights].append(games_won)
        else:
            metadata["wins_per_opponent"][opp_weights] = [games_won]

        # Save intermediate models.
        if i_iter % args.record_every == 0:
            player.model.save_weights(os.path.join(args.out_directory, player_weights))

        # Add player to batch of oppenents once in a while.
        if i_iter % args.save_every == 0:
            metadata["opponents"].append(player_weights)
        Helper.save_metadata(metadata, args.out_directory, "metadata.json")
 def test_load_model(self):
     player = NNPolicy()
     player.load_model('/home/mike/tmp/test_nn_model.json')
 def test_save_model(self):
     player = NNPolicy(model_save_path = '/home/mike/tmp/test_nn_model.json')
     player.save_model()
 def test_nn_weights(self):
     player = NNPolicy()
     #self.assertTrue(player.model.get_weights()[0].shape == tuple(reversed((player.params["units"], 2 * gm.START_HANDS))))
     player.print_network()