def test_load_weights(self): player = NNPolicy(model_save_path = '/home/mike/tmp/test_nn_model_with_weights.json') player.save_model(weights_file='/home/mike/tmp/test_weight.hdf5') new_player = NNPolicy(model_load_path = '/home/mike/tmp/test_nn_model_with_weights.json') self.assertTrue(len(player.model.get_weights()) == len(new_player.model.get_weights())) for i in range(len(player.model.get_weights())): self.assertTrue( np.array_equal(player.model.get_weights()[i], new_player.model.get_weights()[i]) )
def evaluate_nn_policy(): """ To evaluate the results gained from the training process. It can be run in parallel when the training is happening. There should be a metadata.json file for the metadata of training process, a model.json for the model trained, and at least one weights.%05d.hdf5 weight file in the output directory from the training process. """ import argparse parser = argparse.ArgumentParser( description= 'Compare the trained NN policy to our manually crafted baseline policy' ) parser.add_argument( "directory", help= "Path to folder where the model params and metadata was saved from training." ) parser.add_argument("--metadata-file", help="The meta data file to be loaded", default="su_metadata.json") parser.add_argument("--weight-file", help="The weight file to be loaded to the model", default=ZEROTH_FILE) parser.add_argument("--plot", help="Plot the evaluation results", default=True, action="store_true") parser.add_argument("--num-games", help="Number of games to play for evaluation", type=int, default=1000) parser.add_argument( "--card-path", help="The directory with the card set file (Default: {})".format( gm.DEFAULT_PATH), default=gm.DEFAULT_PATH) parser.add_argument( "--card-file", help="The file containing the cards to play with (Default: {})".format( gm.DEFAULT_CARDS_FILE), default=gm.DEFAULT_CARDS_FILE) parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=True, action="store_true") args = parser.parse_args() with open(os.path.join(args.directory, args.metadata_file), "r") as f: metadata = json.load(f) with open(os.path.join(args.directory, metadata["model_file"]), "r") as f: player = NNPolicy(model_load_path=os.path.join(args.directory, metadata["model_file"])) player.load_weights(os.path.join(args.directory, args.weight_file)) opponent = BaselinePolicy() compare_policy(player, opponent, args.num_games, args.card_path, args.card_file)
def test_get_action(self): player = NNPolicy() input = np.zeros((1, fe.get_feature_dim(player.features), 2 * gm.START_HANDS)) game = gm.GameState() while(not game.is_end_of_game()): (card, move) = player.get_action(game) self.assertTrue(card.position == (-1, -1) and card.owner == game.current_player) self.assertTrue(game.board[Helper.tuple2idx(game.board_size, *move)] is None) game.play_round(card, *move)
def test_train_single_game(self): single_meta = { "game_batch": 1, "num_wins": {}, "out_directory": "test_cards", "card_path": "test_cards", "card_file": "cards.csv", "learning_rate": 0.001 } player = NNPolicy() opponent = player.clone() optimizer = SGD(lr=single_meta["learning_rate"]) player.model.compile(loss=mc.log_loss, optimizer=optimizer) (states, actions, rewards) = mc.simulate_games(player, opponent, single_meta) mc.train_on_results(player, states, actions, rewards)
def test_train_multi_games(self): num_games_batch = 20 multi_meta = { "game_batch": num_games_batch, "num_wins": {}, "out_directory": "test_cards", "card_path": "test_cards", "card_file": "cards.csv", "learning_rate": 0.001 } player = NNPolicy() opponent = player.clone() (states, actions, rewards) = mc.simulate_games(player, opponent, multi_meta) self.assertTrue( len(states) == num_games_batch and len(actions) == num_games_batch and len(rewards) == num_games_batch) # Ensure both player got almost equal chance of playing first games_first = sum(len(state) == 5 for state in states) games_second = sum(len(state) == 4 for state in states) self.assertTrue(games_first + games_second == num_games_batch) if games_first == 0 or games_second == 0: warnings.warn( 'Abnormal results: {} games first, {} games second'.format( games_first, games_second)) self.assertTrue( len(actions[num_games_batch - 1]) == len( states[num_games_batch - 1]) and len(rewards[num_games_batch - 1]) == len( states[num_games_batch - 1])) games_won = sum(reward[0] == 1 for reward in rewards) games_tie = sum(reward[0] == 0 for reward in rewards) games_lost = sum(reward[0] == -1 for reward in rewards) self.assertTrue(games_won + games_tie + games_lost == num_games_batch) if games_won == 0 or games_tie == 0 or games_lost == 0: warnings.warn( 'Abnormal results: {} games won, {} games tie, {} lost'.format( games_won, games_tie, games_lost)) optimizer = SGD(lr=multi_meta["learning_rate"]) player.model.compile(loss=mc.log_loss, optimizer=optimizer) mc.train_on_results(player, states, actions, rewards)
def test_run_single_game(self): single_meta = { "game_batch": 1, "num_wins": {}, "out_directory": "test_cards", "card_path": "test_cards", "card_file": "cards.csv" } player = NNPolicy() opponent = player.clone() (states, actions, rewards) = mc.simulate_games(player, opponent, single_meta) self.assertTrue( len(states) == 1 and len(actions) == 1 and len(rewards) == 1) self.assertTrue(len(states[0]) == 4 or len(states[0]) == 5) self.assertTrue( len(actions[0]) == len(states[0]) and len(rewards[0]) == len(states[0]))
def run_training(): import argparse parser = argparse.ArgumentParser( description='Train the policy network to simulate the baseline policy') parser.add_argument( "out_directory", help= "Path to folder where the model params and metadata will be saved after each epoch." ) parser.add_argument("--initial-weights", help="Path to HDF5 file with inital weights.", default=ZEROTH_FILE) parser.add_argument( "--model-json", help="JSON file for policy model in the output directory.", default="model.json") parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.01)", type=float, default=0.01) parser.add_argument( "--epoch", help="Number of epoches for training process (Default: 50)", type=int, default=50) parser.add_argument("--step-epoch", help="Number of step per epoch(Default: 1000)", type=int, default=1000) parser.add_argument( "--batch-size", help="Number of games to simulate for each batch (Default: 50)", type=int, default=50) parser.add_argument("--val-steps", help="Number of steps for validation (Default: 1000)", type=int, default=1000) parser.add_argument("--result-file", help="The file to save results as csv )", default="result.csv") parser.add_argument( "--card-path", help="The directory with the card set file (Default: {})".format( gm.DEFAULT_PATH), default=gm.DEFAULT_PATH) parser.add_argument( "--card-file", help="The file containing the cards to play with (Default: {})".format( gm.DEFAULT_CARDS_FILE), default=gm.DEFAULT_CARDS_FILE) parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=True, action="store_true") args = parser.parse_args() if not os.path.exists(args.out_directory): if args.verbose: print("creating output directory {}".format(args.out_directory)) os.makedirs(args.out_directory) if not os.path.exists(os.path.join(args.card_path, args.card_file)): raise ValueError( "Cannot play the game without card file {} in the directory {}". format(args.card_file, args.card_path)) metadata = { "out_directory": args.out_directory, "model_file": args.model_json, "init_weights": args.initial_weights, "learning_rate": args.learning_rate, "epoch": args.epoch, "step_epoch": args.step_epoch, "batch_size": args.batch_size, "val_steps": args.val_steps, "result_file": args.result_file, "card_path": args.card_path, "card_file": args.card_file } iter_start = 1 player = NNPolicy( model_save_path=os.path.join(args.out_directory, args.model_json)) Helper.save_metadata(metadata, args.out_directory, "su_metadata.json") player.save_model() target = BaselinePolicy() optimizer = SGD(lr=metadata["learning_rate"]) player.model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) train_generator = state_action_generator(target, metadata) validation_generator = state_action_generator(target, metadata) csv_logger = CSVLogger(os.path.join(args.out_directory, args.result_file), append=True) stopper = EarlyStopping(monitor='loss', patience=3) player.model.fit_generator(generator=train_generator, steps_per_epoch=metadata["step_epoch"], epochs=metadata["epoch"], callbacks=[csv_logger, stopper], validation_data=validation_generator, validation_steps=metadata["val_steps"]) player.model.save_weights(os.path.join(args.out_directory, ZEROTH_FILE))
def run_training(cmd_line_args=None): import argparse parser = argparse.ArgumentParser(description='Train the policy network with Monte Carlo approach and exploring start') parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.") parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true") parser.add_argument("--model-json", help="JSON file for policy model in the output directory.", default = "model.json") parser.add_argument("--initial-weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).", default = ZEROTH_FILE) parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.01)", type=float, default=0.01) parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 200)", type=int, default=200) parser.add_argument("--record-every", help="Save learner's weights every n batches (Default: 100)", type=int, default=100) parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 50)", type=int, default=50) parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 50000)", type=int, default=5000) parser.add_argument("--pool-size", help="Size of the games pool (Default: 5000)", type=int, default=5000) parser.add_argument("--card-path", help="The directory with the card set file (Default: {})".format(gm.DEFAULT_PATH), default=gm.DEFAULT_PATH) parser.add_argument("--card-file", help="The file containing the cards to play with (Default: {})".format(gm.DEFAULT_CARDS_FILE), default=gm.DEFAULT_CARDS_FILE) parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=True, action="store_true") # Baseline function (TODO) default lambda state: 0 (receives either file # paths to JSON and weights or None, in which case it uses default baseline 0) if cmd_line_args is None: args = parser.parse_args() else: args = parser.parse_args(cmd_line_args) if not os.path.exists(args.out_directory): if args.verbose: print("creating output directory {}".format(args.out_directory)) os.makedirs(args.out_directory) if not os.path.exists(os.path.join(args.card_path, args.card_file)): raise ValueError("Cannot resume without card file {} in the directory {}".format(args.card_file, args.card_path)) metadata = { "out_directory": args.out_directory, "model_file": args.model_json, "init_weights": args.initial_weights, "learning_rate": args.learning_rate, "game_batch": args.game_batch, "save_every": args.save_every, "poo_size": args.pool_size, "card_path": args.card_path, "card_file": args.card_file, "opponents": [ZEROTH_FILE], # which weights from which to sample an opponent each batch "num_wins": {}, # number of wins for player in each batch "wins_per_opponent": {} } if not args.resume: # starting the game from scratch player_weights = ZEROTH_FILE iter_start = 1 player = NNPolicy(model_save_path = os.path.join(args.out_directory, args.model_json)) Helper.save_metadata(metadata, args.out_directory, "metadata.json") player.save_model() # Create the Zeroth weight file player.model.save_weights(os.path.join(args.out_directory, player_weights)) else: # Load the metadata if not os.path.exists(os.path.join(args.out_directory, "metadata.json")): raise ValueError("Cannot resume without metadata.json file in the output directory") with open(os.path.join(args.out_directory, "metadata.json"), "r") as f: old_metadata = json.load(f) # Merge the metadata in case any parameter changed metadata = {**old_metadata, **metadata} # Load the model if not os.path.exists(os.path.join(args.out_directory, args.model_json)): raise ValueError("Cannot resume without model json file in the output directory") args.model_json = os.path.join(args.out_directory, os.path.basename(args.model_json)) if args.verbose: print("Resuming with model {}".format(args.model_json)) player = NNPolicy(model_load_path = args.model_json) # Load the initial weights if not re.match(r"weights\.\d{5}\.hdf5", args.initial_weights): raise ValueError("Expected to resume from weights file with name 'weights.#####.hdf5'") player_weights = args.initial_weights args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights)) if not os.path.exists(args.initial_weights): raise ValueError("Cannot resume without weight file {} in the output directory".format(args.initial_weights)) if args.verbose: print("Resuming with weights {}".format(args.initial_weights)) player.model.load_weights(args.initial_weights) iter_start = 1 + int(player_weights[8:13]) opponent = player.clone() if args.verbose: print("created player and opponent") # Append args of current run to history of full command args. metadata["cmd_line_args"] = metadata.get("cmd_line_args", []) metadata["cmd_line_args"].append(vars(args)) optimizer = SGD(lr=args.learning_rate) player.model.compile(loss=log_loss, optimizer=optimizer) # game_pool = [] for i_iter in range(iter_start, args.iterations + 1): # Note that player_weights will only be saved as a file every args.record_every iterations. # Regardless, player_weights enters into the metadata to keep track of the win ratio over # time. player_weights = "weights.%05d.hdf5" % i_iter # Randomly choose opponent from pool (possibly self), and playing # game_batch games against them. opp_weights = np.random.choice(metadata["opponents"]) opp_path = os.path.join(args.out_directory, opp_weights) # Load new weights into opponent's network, but keep the same opponent object. opponent.model.load_weights(opp_path) # Run games (and learn from results) (states, card_actions, move_actions, rewards) = simulate_games(player, opponent, metadata) ''' game_pool = game_pool + list(zip((states, card_actions, move_actions, rewards))) if len(game_pool) > metadata["pool_size"]: random.shuffle(game_pool) game_pool = game_pool[metadata["game_batch"]:] train_on_batch() elif args.verbose: print("Skipped Training Due to insufficient training samples") ''' train_on_batch(player, states, card_actions, move_actions, rewards) games_won = sum(reward == 1 for reward in rewards) games_lost = sum(reward == -1 for reward in rewards) if args.verbose: print("In iteration {} winrate is {}, loserate is {} against opponent {}".format(i_iter,\ round(games_won / metadata["game_batch"], 2), \ round(games_lost / metadata["game_batch"], 2), \ opp_weights)) metadata["num_wins"][player_weights] = games_won if opp_weights in metadata["wins_per_opponent"]: metadata["wins_per_opponent"][opp_weights].append(games_won) else: metadata["wins_per_opponent"][opp_weights] = [games_won] # Save intermediate models. if i_iter % args.record_every == 0: player.model.save_weights(os.path.join(args.out_directory, player_weights)) # Add player to batch of oppenents once in a while. if i_iter % args.save_every == 0: metadata["opponents"].append(player_weights) Helper.save_metadata(metadata, args.out_directory, "metadata.json")
def test_load_model(self): player = NNPolicy() player.load_model('/home/mike/tmp/test_nn_model.json')
def test_save_model(self): player = NNPolicy(model_save_path = '/home/mike/tmp/test_nn_model.json') player.save_model()
def test_nn_weights(self): player = NNPolicy() #self.assertTrue(player.model.get_weights()[0].shape == tuple(reversed((player.params["units"], 2 * gm.START_HANDS)))) player.print_network()