def main(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log") logger = logging.getLogger('Chess_SL') min_elo = 2400 # the minimal elo of the weakest player pgn_dir = "pgns" # dir containing all the pgn files db_file = 'fen_positions.db' # name of the sql data set file data_set_file = 'king-base-light-raw.h5' # name of the raw dataset file data_set_file_avg = 'king-base-light-avg.h5' # name of the output file # put all games in a databse in order to average the positions start = time.time() data_processing.create_fen_db(db_file, pgn_dir, min_elo) # data_processing.create_fen_dict() elapsed_time = time.time() - start logger.info("time to create the db file: {}".format(elapsed_time)) # create the averaged data set start = time.time() data_processing.create_averaged_data_set(db_file, data_set_file_avg) elapsed_time = time.time() - start logger.info("time to create the averaged data set: {}".format(elapsed_time)) # # create the data set from the pgn files # start = time.time() # data_processing.create_data_set(data_set_file) # elapsed_time = time.time() - start logger.info("time to create the data set from the pgn files: {}".format(elapsed_time))
def main(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log") logger = logging.getLogger('Chess_SL') variant = "threeCheck" elo_threshold = 2000 pgn_dir = "pgn/" + variant db_file = "fen_positions_" + variant + ".db" # name of the sql data set file data_set_file = "positions-raw" + variant + ".h5" # name of the raw dataset file data_set_file_avg = "positions-avg" + variant + ".h5" # name of the output file # create the fen_dict in order to average the positions start = time.time() data_processing.create_fen_db(db_file, pgn_dir, elo_threshold) # data_processing.create_fen_dict() elapsed_time = time.time() - start logger.info("time to create the db file: {}".format(elapsed_time)) # create the averaged data set start = time.time() data_processing.create_averaged_data_set(db_file, data_set_file_avg) elapsed_time = time.time() - start logger.info( "time to create the averaged data set: {}".format(elapsed_time))
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="../log/chess_sl.log") logger = logging.getLogger('Chess_SL') # set the random seed # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) logger.debug("start the main test program") # get the fen string of a board board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") board.push_san("Nc6") board.push_san("Bc4") board.push_san("Bc5") board.push_san("Qe2") board.push_san("d6") board.push_san("Nc3") board.push_san("Bd7") board.push_san("b3") board.push_san("Qe7") fen_string = board.fen() print(fen_string) print("n-bytes: ", len(fen_string.encode('utf-8'))) filter = data_processing.get_compression_filter() data_file = tables.open_file("king-base-light.h5", mode='r', filters=filter) print(data_file.root.data.shape[0]) state = data_file.root.data[2, 0:CONST.STATE_SIZE] policy_idx = int(data_file.root.data[2, -2]) value = data_file.root.data[100, -1] state = state.reshape(CONST.INPUT_CHANNELS, CONST.BOARD_HEIGHT, CONST.BOARD_WIDTH) policy = np.zeros(board_representation.LABEL_COUNT) policy[policy_idx] = 1 pgn_file = open("../pgns/KingBaseLite2019-B00-B19.pgn") game = chess.pgn.read_game(pgn_file) # read out the next game from the pgn while game is not None: result = data_processing.value_from_result(game.headers["Result"]) if result is None: print(game) game = chess.pgn.read_game( pgn_file) # read out the next game from the pgn for move in game.mainline_moves(): if move.uci() == "0000": print(game) print(move)
def main(): parser = get_argparse() parser.add_argument("--fine_tunning_model", type=str, required=True, help="fine_tuning model path") args = parser.parse_args() print( json.dumps(vars(args), sort_keys=True, indent=4, separators=(', ', ': '), ensure_ascii=False)) init_logger(log_file="./log/{}.log".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) seed_everything(args.seed) # save path if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) # device args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # tokenizer tokenizer = BertTokenizerFast.from_pretrained(args.model_name_or_path) # Dataset & Dataloader test_dataset = MrcDataset(args, json_path="./data/test1.json", tokenizer=tokenizer) test_iter = DataLoader(test_dataset, shuffle=False, batch_size=args.per_gpu_eval_batch_size, collate_fn=collate_fn, num_workers=24) logger.info("The nums of the test_dataset examples is {}".format( len(test_dataset.examples))) logger.info("The nums of the test_dataset features is {}".format( len(test_dataset))) # model model = MRC_model(args.model_name_or_path) model.to(args.device) model.load_state_dict(torch.load(args.fine_tunning_model)) # predict test model.eval() evaluate(args, test_iter, model, prefix="test")
def __init__(self, args) -> None: self.lr = args.learning_rate self.LAMBDA = args.LAMBDA self.save = args.save self.batch_size = args.batch_size self.path = args.path self.n_epochs = args.epoch_num self.eval_interval = 10 self.G_image_loss = [] self.G_GAN_loss = [] self.G_total_loss = [] self.D_loss = [] self.netG = Generator().to("cuda") self.netD = Discriminator().to("cuda") self.optimizerG = flow.optim.Adam(self.netG.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.optimizerD = flow.optim.Adam(self.netD.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.criterionGAN = flow.nn.BCEWithLogitsLoss() self.criterionL1 = flow.nn.L1Loss() self.checkpoint_path = os.path.join(self.path, "checkpoint") self.test_images_path = os.path.join(self.path, "test_images") mkdirs(self.checkpoint_path, self.test_images_path) self.logger = init_logger(os.path.join(self.path, "log.txt"))
def main(): # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('Connect4') # set the random seed random.seed(a=None, version=2) # create the configuration values for a random network network_path = "self-play-net.pt" Config.n_blocks = 1 Config.n_filters = 1 Config.mcts_sim_count = 200 loops = 10 games_per_loop = 1000 # create the agent logger.info("create a new random network for the self play") network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) torch.save({'state_dict': network.state_dict()}, network_path) # play self-play games logger.info("start to create self-play games") start = time.time() training_examples = [] for i in range(loops): new_examples = alpha_zero_learning.__self_play_worker__( network_path, games_per_loop) training_examples.extend(new_examples) logger.debug("finished creating games in loop {}".format(i)) # save the training examples with open("initial_training_data.pkl", 'wb') as output: pickle.dump(training_examples, output, pickle.HIGHEST_PROTOCOL) logger.info( "finished creating the initial training examples, length: {}".format( len(training_examples))) average_length = 0.5 * len(training_examples) / ( games_per_loop * loops ) # 0.5 as symmetric positions are included as well logger.debug("average moves per game: {}".format(average_length)) logger.debug("elapsed time: {}".format(time.time() - start))
def mainGui(): # The logger utils.init_logger(logging.DEBUG, file_name="log/gui.log") logger = logging.getLogger('Gui') net_path = "network_gen_148.pt" n_blocks = 10 n_filters = 128 np.set_printoptions(suppress=True, precision=2) # load the network Config.evaluation_device = torch.device('cpu') cpu_net = networks.ResNet(1e-4, n_blocks, n_filters, 1e-4) checkpoint = torch.load(net_path, map_location='cpu') cpu_net.load_state_dict(checkpoint['state_dict']) logger.debug("network loaded") # execute the game gui = connect4_gui.GUI(cpu_net) gui.execute_game()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters variant = "threeCheck" network_dir = "networks/" + variant network_file = network_dir + "/" + "network_batch_13071.pt" # load the network logger.info("load the neural network: " + network_file) net = torch.load(network_file, map_location='cpu') board = chess.variant.ThreeCheckBoard() board.push_san("e4") board.push_san("e5") board.push_san("Bc4") board.push_san("Bc5") board.push_san("Bxf7+") board.push_san("Kxf7") board.push_san("Qf3+") board.push_san("Kg6") board.push_san("Qg3") print(board.legal_moves)
from utils import utils from game import tournament from game.globals import CONST import logging # initialize the logger # The logger utils.init_logger(logging.DEBUG, file_name="log/app.log") logger = logging.getLogger('Tests') # play random vs random game_count = 1000 player1 = tournament.RandomPlayer() player2 = tournament.RandomPlayer() white_score = tournament.play_one_color(game_count, player1, CONST.WHITE, player2) logger.info("white score for random vs random: {}".format(white_score)) black_score = tournament.play_one_color(game_count, player1, CONST.BLACK, player2) logger.info("black score for random vs random: {}".format(black_score)) # play minimax vs minimax to check if the score is 0.5 game_count = 100 player1 = tournament.MinimaxPlayer() player2 = tournament.MinimaxPlayer() player1_score = tournament.play_match(game_count, player1, player2) logger.info("minimax vs minimax score: {}".format(player1_score))
import pandas as pd from utils import utils import globals from games.connect4 import configuration globals.init_config(configuration) from games.connect4.configuration import Config import data_storage from games.connect4 import connect4, evaluation import mcts # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('evaluation') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) test_set_path = "data_sets/test_set.csv" network_dir = "../../training_data/connect4/networks/" # directory in which the networks are saved print("pytorch version: ", torch.__version__)
def main_lr(): """ finds the min and the max learning rate. train the network for a few epochs and plot the prediction accuracy vs the learning rate. min learning rate is the rate where the prediction accuracy starts to increase and the max learning rate is the lr where the prediction accuracy slows down or even deteriorates. The batch size and all other hyperparameters should be the same for this test and the actual training. This test can be done with a smaller subset of the data set if the full data set is too large. :return: """ # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters Config.batch_size = 256 Config.weight_decay = 1e-4 Config.n_blocks = 10 Config.n_filters = 128 epochs = 8 csv_training_set_path = "../data_sets/training_set.csv" csv_test_set_path = "../data_sets/training_set.csv" # load the test set csv_test_set = pd.read_csv(csv_test_set_path, sep=",") # load the training set csv_training_set = pd.read_csv(csv_training_set_path, sep=",") sample_count = supervised.weak_sample_size(csv_training_set) logger.info("the training set contains {} weak training examples".format( sample_count)) # create the training data training_set = supervised.create_training_set(csv_training_set) logger.info("finished parsing the training set, size: {}".format( training_set.__len__())) # define the parameters for the training params = { 'batch_size': Config.batch_size, 'shuffle': True, 'num_workers': 2, 'pin_memory': True, 'drop_last': True, } # generators training_generator = data.DataLoader(training_set, **params) # train the neural network learning_rates = [] prediction_errors = [] value_errors = [] for power in np.arange(-6, 0.1, 0.25): Config.learning_rate = 10**power prediction_error, value_error = train_net(epochs, training_generator, csv_test_set) learning_rates.append(Config.learning_rate) prediction_errors.append(prediction_error) value_errors.append(value_error) # save the results np.save("learning_rates.npy", np.array(learning_rates)) np.save("lr_policy_error.npy", np.array(prediction_errors)) np.save("lr_value_error.npy", np.array(value_errors)) # set the style of the plot plt.style.use('seaborn-dark-palette') # policy prediction error fig1 = plt.figure(1) plt.semilogx(learning_rates, prediction_errors) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Move Prediction Error") plt.xlabel("Learning Rate") plt.ylabel("Prediciton Error") fig1.show() # value prediction error fig2 = plt.figure(2) plt.semilogx(learning_rates, value_errors) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Position Value Error") plt.xlabel("Learning Rate") plt.ylabel("Value Error") fig2.show() plt.show()
def main(): args = get_argparse().parse_args() print( json.dumps(vars(args), sort_keys=True, indent=4, separators=(', ', ': '), ensure_ascii=False)) init_logger(log_file="./log/{}.log".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) seed_everything(args.seed) # 设置保存目录 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) # device args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # tokenizer tokenizer = BertTokenizerFast.from_pretrained(args.model_name_or_path) # Dataset & Dataloader train_dataset = MrcDataset(args, json_path="./data/train.json", tokenizer=tokenizer) eval_dataset = MrcDataset(args, json_path="./data/dev.json", tokenizer=tokenizer) # eval_dataset, test_dataset = random_split(eval_dataset, # [round(0.5 * len(eval_dataset)), # len(eval_dataset) - round(0.5 * len(eval_dataset))], # generator=torch.Generator().manual_seed(42)) train_iter = DataLoader(train_dataset, shuffle=True, batch_size=args.per_gpu_train_batch_size, collate_fn=collate_fn, num_workers=10) eval_iter = DataLoader(eval_dataset, shuffle=False, batch_size=args.per_gpu_eval_batch_size, collate_fn=collate_fn, num_workers=10) # test_iter = DataLoader(test_dataset, # shuffle=False, # batch_size=args.per_gpu_eval_batch_size, # collate_fn=collate_fn, # num_workers=10) logger.info("The nums of the train_dataset examples is {}".format( len(train_dataset.examples))) logger.info("The nums of the train_dataset features is {}".format( len(train_dataset))) logger.info("The nums of the eval_dataset examples is {}".format( len(eval_dataset.examples))) logger.info("The nums of the eval_dataset features is {}".format( len(eval_dataset))) # model model = MRC_model(args.model_name_or_path) model.to(args.device) # 训练 best_f1 = 0 early_stop = 0 for epoch, _ in enumerate(range(int(args.num_train_epochs))): model.train() train(args, train_iter, model) # 每轮epoch在验证集上计算分数 eval_f1, eval_EM = evaluate(args, eval_iter, model, prefix="eval") logger.info("The F1-score is {}, The EM-score is {}".format( eval_f1, eval_EM)) if eval_f1 > best_f1: early_stop = 0 best_f1 = eval_f1 logger.info( "the best eval f1 is {:.4f}, saving model !!".format(best_f1)) best_model = copy.deepcopy( model.module if hasattr(model, "module") else model) torch.save(best_model.state_dict(), os.path.join(args.output_dir, "best_model.pkl")) else: early_stop += 1 if early_stop == args.early_stop: logger.info("Early stop in {} epoch!".format(epoch)) break
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="../log/chess_sl.log") logger = logging.getLogger('Chess_SL') # set the random seed # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) logger.debug("start the main test program") # test the rise network network = networks.RiseNet(Config.learning_rate, Config.n_blocks, Config.n_se_blocks, Config.n_filters, Config.se_ratio, Config.n_mobile_filters, Config.n_filter_inc, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) board = chess.Board() input = board_representation.board_to_matrix(board) input = torch.tensor(input) input = input.to(Config.training_device, dtype=torch.float) input = input.unsqueeze(0) res = network(input) board = chess.Board() board.push_san("g4") board.push_san("e5") board.push_san("f4") board.push_uci("d8h4") # board.push_san("Qh4") print(board.turn == chess.WHITE) list = [1, 2, 3, 4] list.remove(2) print(list) test_str = "_5_6" print(test_str.split("_")) # get the fen string of a board board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") board.push_san("Nc6") board.push_san("Bc4") board.push_san("Bc5") board.push_san("Qe2") board.push_san("d6") board.push_san("Nc3") board.push_san("Bd7") board.push_san("b3") board.push_san("Qe7") fen_string = board.fen() print(fen_string) print("n-bytes: ", len(fen_string.encode('utf-8'))) filter = data_processing.get_compression_filter() data_file = tables.open_file("../king-base-light-avg.h5", mode='r', filters=filter) print(data_file.root.data.shape[0]) state = data_file.root.data[2, 0:CONST.STATE_SIZE] policy_idx = int(data_file.root.data[2, -2]) value = data_file.root.data[100, -1] state = state.reshape(CONST.INPUT_CHANNELS, CONST.BOARD_HEIGHT, CONST.BOARD_WIDTH) policy = np.zeros(board_representation.LABEL_COUNT) policy[policy_idx] = 1 pgn_file = open("../pgns/KingBaseLite2019-B00-B19.pgn") game = chess.pgn.read_game(pgn_file) # read out the next game from the pgn while game is not None: result = data_processing.value_from_result(game.headers["Result"]) if result is None: print(game) game = chess.pgn.read_game( pgn_file) # read out the next game from the pgn for move in game.mainline_moves(): if move.uci() == "0000": print(game) print(move)
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters network_dir = "networks" network_file = network_dir + "/" + "network_batch_158436.pt" training_progress_dir = "training_progress" # count the games in the database dict_file = "elo_dict.pkl" if not Path(dict_file).is_file(): elo_dict = data_processing.create_elo_dict("pgns") with open(dict_file, 'wb') as f: pickle.dump(elo_dict, f, pickle.HIGHEST_PROTOCOL) with open(dict_file, 'rb') as f: elo_dict = pickle.load(f) elo = [] count = [] tot_count = 0 for key in sorted(elo_dict): elo.append(int(key)) value = elo_dict[key] tot_count += value count.append(tot_count) # plot the training policy loss fig1 = plt.figure(1) plt.plot(elo, count) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Dataset Statistics") plt.xlabel("Minimal ELO") plt.ylabel("Total Number of Games") fig1.show() plt.show() # load the network logger.info("load the neural network: " + network_file) net = torch.load(network_file, map_location='cpu') board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") bit_board = board_representation.board_to_matrix(board) policy, value = net(torch.Tensor(bit_board).unsqueeze(0)) print(policy) print(value) print("move: ", board_representation.policy_to_move(policy.detach().numpy(), board.turn)) print(board.legal_moves) # plot the learning progress value_loss = np.load(training_progress_dir + "/value_loss.npy") policy_loss = np.load(training_progress_dir + "/policy_loss.npy") batches = np.load(training_progress_dir + "/batches.npy") # plot the loss versus the number of seen batches # plot the value training loss fig2 = plt.figure(2) plt.plot(batches, value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Training Samples") plt.ylabel("Value Loss") fig2.show() # plot the training policy loss fig3 = plt.figure(3) plt.plot(batches, policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Training Samples") plt.ylabel("Policy Loss") fig3.show() plt.show()
import matplotlib.pyplot as plt import torch import random import time import logging from utils import utils from game.globals import CONST from game.globals import Globals import td0_learning # The logger utils.init_logger(logging.DEBUG, file_name="log/tic_tac_toe.log") logger = logging.getLogger('TD0_Learning') livePlots = True # set the random seed random.seed(a=None, version=2) # start to train the neural network epoch_count = 1000 # the number of epochs to train the neural network 100'000 episodes ~ 1h episode_count = 100 # the number of games that are self-played in one epoch update_count = 9 * episode_count # the number the neural net is updated in one epoch with the experience data test_interval = 10 # epoch intervals at which the network plays against a random player test_game_count = 1000 # the number of games that are played in the test against the random opponent epsilon = 0.1 # the exploration constant disc = 0.99 # the discount factor learning_rate = 0.005 # the learning rate of the neural network batch_size = 32 # the batch size of the experience buffer for the neural network training exp_buffer_size = 10000 # the size of the experience replay buffer
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters variant = "threeCheck" Config.learning_rate = 0.0001 Config.weight_decay = 1e-4 Config.n_blocks = 10 Config.n_filters = 128 epochs = 3 training_set_path = "positions-avg" + variant + ".h5" network_dir = "networks/" + variant training_progress_dir = "training_progress/" + variant # define the parameters for the training params = {'batch_size': 512, 'shuffle': True, 'num_workers': 2} # create the data set class training_set = data_processing.Dataset(training_set_path) training_generator = data.DataLoader(training_set, **params) logger.info("training set created, length: {}".format( training_set.__len__())) # create a new network to train network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) # create all needed folders Path(network_dir).mkdir(parents=True, exist_ok=True) Path(training_progress_dir).mkdir(parents=True, exist_ok=True) # list for the plots batches = [] policy_loss = [] value_loss = [] tot_batch_count = 0 current_batch_count = 0 current_value_loss = 0 current_policy_loss = 0 # execute the training by looping over all epochs network.train() for epoch in range(epochs): # training for state_batch, policy_batch, value_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device, dtype=torch.float) value_batch = value_batch.unsqueeze(1).to(Config.training_device, dtype=torch.float) policy_batch = policy_batch.to(Config.training_device, dtype=torch.float) # execute one training step loss_p, loss_v = network.train_step(state_batch, policy_batch, value_batch) current_policy_loss += loss_p current_value_loss += loss_v current_batch_count += 1 tot_batch_count += 1 if tot_batch_count % 100 == 0: logger.debug("epoch {}: trained {} batches so far".format( epoch, tot_batch_count)) batches.append(tot_batch_count) policy_loss.append(current_policy_loss / current_batch_count) value_loss.append(current_value_loss / current_batch_count) current_policy_loss = 0 current_value_loss = 0 current_batch_count = 0 if tot_batch_count % 1000 == 0: network_path = "{}/network_batch_{}.pt".format( network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) # save the last network network_path = "{}/network_batch_{}.pt".format(network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) logger.debug("epoch {}: finished training".format(epoch)) # plot the loss versus the number of seen batches # plot the value training loss fig1 = plt.figure(1) plt.plot(batches, value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Training Samples") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(batches, policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Training Samples") plt.ylabel("Policy Loss") fig2.show() plt.show()
feat = net.inference(img, roi) all_features.append(feat.cpu().numpy()) return all_features if __name__ == "__main__": args = parse_args() if args.cfg: cfg_from_file(args.cfg) if args.checkpoint is None: raise KeyError("--checkpoint option can not be empty.") if args.data_dir: cfg.DATA_DIR = osp.abspath(args.data_dir) init_logger("test.log") logging.info("Called with args:\n" + str(args)) dataset = PSDB(args.dataset) logging.info("Loaded dataset: %s" % args.dataset) net = Network() checkpoint = torch.load(osp.abspath(args.checkpoint)) net.load_state_dict(checkpoint["model"]) logging.info("Loaded checkpoint from: %s" % args.checkpoint) net.eval() device = torch.device("cuda:%s" % args.gpu if args.gpu != -1 else "cpu") net.to(device) save_path = osp.join(cfg.DATA_DIR, "cache") if args.eval_only:
def main_evaluation(game_class, result_folder): # configuration values game_count = 200 # the number of test games to play mcts_sim_count = 200 # the number of mcts simulations to perform temp = 0.3 # the temperature used to get the policy for the move selection, gives some randomness # the logger utils.init_logger(logging.DEBUG, file_name="log/app.log") logger = logging.getLogger('evaluation') # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # load the network network_dir = config.save_dir + "/networks/" path_list = os.listdir(network_dir) path_list.sort(key=utils.natural_keys) # let all network play against the last generation without any mcts best_net_path = network_dir + path_list[-1] best_net = data_storage.load_net(best_net_path, torch_device) generation = [] prediction_score = [] for i in range(len(path_list)): generation.append(i) net_path = network_dir + path_list[i] net = data_storage.load_net(net_path, torch_device) score = net_vs_net_prediction(net, best_net, game_count, game_class) prediction_score.append(score) logger.debug("prediction score: {}, network: {}".format(score, net_path)) # let all network play against the last generation with mcts mcts_score = [] path_list = [] # [path_list[0], path_list[-2]] for i in range(len(path_list)): net_path = network_dir + path_list[i] net = data_storage.load_net(net_path, torch_device) score = net_vs_net_mcts(net, best_net, mcts_sim_count, temp, game_count, game_class) mcts_score.append(score) logger.debug("mcts_score score: {}, network: {}".format(score, net_path)) # save the results np.save(result_folder +"/net_vs_net_pred.npy", np.array(prediction_score)) np.save(result_folder + "/net_vs_net_mcts.npy", np.array(mcts_score)) np.save(result_folder + "/net_vs_net_gen.npy", np.array(generation)) # set the style of the plot plt.style.use('seaborn-dark-palette') # plot the prediction score fig1 = plt.figure(1) plt.plot(generation, prediction_score) axes = plt.gca() axes.set_ylim([0, 0.55]) axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Prediction Score vs Best Network") plt.xlabel("Generation") plt.ylabel("Prediction Score") fig1.show() # # plot the mcts score # fig2 = plt.figure(2) # plt.plot(generation, mcts_score) # axes = plt.gca() # axes.set_ylim([0, 0.55]) # axes.grid(True, color=(0.9, 0.9, 0.9)) # plt.title("MCTS Prediction Score vs Best Network") # plt.xlabel("Generation") # plt.ylabel("MCTS Score") # fig2.show() plt.show()
def main(): parser = ArgumentParser() parser.add_argument("--pretrain", default="bert", type=str) parser.add_argument("--do_data", action="store_true") parser.add_argument("--do_train", action="store_true") parser.add_argument("--do_test", action="store_true") parser.add_argument("--save_best", action="store_true") parser.add_argument("--do_lower_case", action='store_true') parser.add_argument("--data_name", default="law", type=str) parser.add_argument("--train_data_num", default=0, type=int) parser.add_argument("--test_data_num", default=0, type=int) parser.add_argument("--epochs", default=5, type=int) parser.add_argument("--resume_path", default="", type=str) parser.add_argument("--mode", default="min", type=str) parser.add_argument("--monitor", default="valid_loss", type=str) parser.add_argument("--valid_size", default=0.2, type=float) parser.add_argument("--local_rank", type=int, default=-1) parser.add_argument("--sorted", default=1, type=int, help="1 : True 0:False") parser.add_argument("--n_gpu", type=str, default="0", help='"0,1,.." or "0" or "" ') parser.add_argument("--gradient_accumulation_steps", type=int, default=1) parser.add_argument("--train_batch_size", default=8, type=int) parser.add_argument("--eval_batch_size", default=8, type=int) parser.add_argument("--train_max_seq_len", default=256, type=int) parser.add_argument("--eval_max_seq_len", default=256, type=int) parser.add_argument("--loss_scale", type=float, default=0) parser.add_argument( "--warmup_proportion", default=0.1, type=int, ) parser.add_argument("--weight_decay", default=0.01, type=float) parser.add_argument("--adam_epsilon", default=1e-8, type=float) parser.add_argument("--grad_clip", default=1.0, type=float) parser.add_argument("--learning_rate", default=2e-5, type=float) parser.add_argument("--seed", type=int, default=42) parser.add_argument("--fp16", action="store_true") parser.add_argument("--fp16_opt_level", type=str, default="O1") args = parser.parse_args() try: pipeline = piop.read_yml("pipeline.yml") pl = AttrDict(pipeline["pipeline"]) config["preprocessor"] = pl.preprocessor config["pretrain"] = pl.pretrain config["postprocessor"] = pl.postprocessor config["classifier"] = pl.classifier except Exception as e: raise PipelineReadError config["checkpoint_dir"] = config["checkpoint_dir"] / config["classifier"] config["checkpoint_dir"].mkdir(exist_ok=True) torch.save(args, config["checkpoint_dir"] / "training_args.bin") seed_everything(args.seed) init_logger(log_file=config["log_dir"] / "{}.log".format(config["classifier"])) logger.info("Training/evaluation parameters %s", args) if args.do_data: from dataio.task_data import TaskData data = TaskData(args.train_data_num) labels, sents = data.read_data( raw_data_path=config["raw_data_path"], data_dir=config["data_dir"], preprocessor=Preprocessor(config["preprocessor"])( stopwords_path=config["stopwords_path"], userdict_path=config["userdict_path"]), is_train=True) data.train_val_split(X=sents, y=labels, valid_size=args.valid_size, data_dir=config["data_dir"], data_name=args.data_name) if config["pretrain"] == "Nopretrain": data.build_vocab(config["nopretrain_vocab_path"], sents, min_count=5) if args.do_train: train(args) if args.do_test: test(args)
import logging import time from utils import utils import sqlite3 from sqlite3 import Error # The logger utils.init_logger(logging.DEBUG, file_name="../log/chess_sl.log") logger = logging.getLogger('Chess_SL') db_file = "test.db" def create_connection(db_file): """ create a database connection to the SQLite database specified by db_file :param db_file: database file :return: Connection object or None """ conn = None try: conn = sqlite3.connect(db_file) return conn except Error as e: print(e) return conn
help="Enable tensorboardX. Default: False") return parser.parse_args() if __name__ == "__main__": args = parse_args() if args.cfg: cfg_from_file(args.cfg) if args.data_dir: cfg.DATA_DIR = osp.abspath(args.data_dir) if args.weights is None and args.checkpoint is None: args.weights = osp.join(cfg.DATA_DIR, "pretrained_model", "resnet50_caffe.pth") init_logger("train.log") logging.info("Called with args:\n" + str(args)) if not args.rand: # Fix the random seeds (numpy and pytorch) for reproducibility logging.info("Set to none random mode.") torch.manual_seed(cfg.RNG_SEED) torch.cuda.manual_seed(cfg.RNG_SEED) torch.cuda.manual_seed_all(cfg.RNG_SEED) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True random.seed(cfg.RNG_SEED) np.random.seed(cfg.RNG_SEED) os.environ["PYTHONHASHSEED"] = str(cfg.RNG_SEED) output_dir = osp.join(cfg.DATA_DIR, "trained_model")
def main_az(): # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('Connect4') # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # create the storage object training_data = data_storage.load_data() # create the agent network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) agent = alpha_zero_learning.Agent(network) if training_data.cycle == 0: logger.debug("create a new agent") training_data.save_data(agent.network) # save the generation 0 network if Config.use_initial_data: logger.debug("fill the experience buffer with some initial data") agent.experience_buffer.fill_with_initial_data( ) # add training examples of untrained network else: # load the current network logger.debug("load an old network") agent.network = training_data.load_current_net() agent.experience_buffer = training_data.experience_buffer start_training = time.time() for i in range(training_data.cycle, Config.cycle_count, 1): ###### self play and update: create some game data through self play logger.info("start playing games in cycle {}".format(i)) avg_moves_played = agent.play_self_play_games( training_data.network_path) training_data.avg_moves_played.append(avg_moves_played) logger.debug("average moves played: {}".format(avg_moves_played)) ###### training, train the training network and use the target network for predictions logger.info("start updates in cycle {}".format(i)) loss_p, loss_v = agent.nn_update(i) training_data.policy_loss.append(loss_p) training_data.value_loss.append(loss_v) logger.debug("policy loss: {}".format(loss_p)) logger.debug("value loss: {}".format(loss_v)) ###### save the new network logger.info("save check point to file in cycle {}".format(i)) training_data.cycle += 1 training_data.experience_buffer = agent.experience_buffer training_data.save_data(agent.network) end_training = time.time() training_time = end_training - start_training logger.info("elapsed time whole training process {}".format(training_time)) # save the results np.save("value_loss.npy", np.array(training_data.value_loss)) np.save("policy_loss.npy", np.array(training_data.policy_loss)) np.save("avg_moves.npy", np.array(training_data.avg_moves_played)) # set the style of the plot plt.style.use('seaborn-dark-palette') # plot the value training loss fig1 = plt.figure(1) plt.plot(training_data.value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Generation") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(training_data.policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Generation") plt.ylabel("Policy Loss") fig2.show() # plot the average number of moves played in the self-play games fig3 = plt.figure(3) plt.plot(training_data.avg_moves_played) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Moves in Self-Play Games") plt.xlabel("Generation") plt.ylabel("Move Count") fig3.show() plt.show()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/tic_tac_toe.log") logger = logging.getLogger('Alpha Tic') # set the random seed random.seed(a=None, version=2) # initialize the pool Globals.n_pool_processes = 5 # mp.cpu_count() Globals.pool = mp.Pool(processes=Globals.n_pool_processes) # define the parameters epoch_count = 30 # the number of epochs to train the neural network episode_count = 2000 # the number of games that are self-played in one epoch update_count = 200 # the number the neural net is updated in one epoch with the experience data evaluation_game_count = 300 # the number of games to play against the minimax player mcts_sim_count = 25 # the number of simulations for the monte-carlo tree search c_puct = 4 # the higher this constant the more the mcts explores temp = 1 # the temperature, controls the policy value distribution alpha_dirich = 1 # alpha parameter for the dirichlet noise (0.03 - 0.3 az paper, 10/ avg n_moves) temp_threshold = 9 # up to this move the temp will be temp, otherwise 0 (deterministic play) learning_rate = 0.001 # the learning rate of the neural network batch_size = 128 # the batch size of the experience buffer for the neural network training exp_buffer_size = 10000 # the size of the experience replay buffer network_dir = "networks" # directory in which the networks are saved # define the devices for the training and the target networks cpu or cuda, here cpu is way faster for small nets Globals.device = torch.device('cpu') # create the dirctory to save the networks if not os.path.exists(network_dir): os.makedirs(network_dir) shutil.rmtree(network_dir) os.makedirs(network_dir) # create the agent agent = alpha_zero_learning.Agent(learning_rate, mcts_sim_count, c_puct, temp, batch_size, exp_buffer_size) torch.save(agent.network, "{}/network_gen_{}.pt".format(network_dir, 0)) # to plot the fitness policy_loss = [] value_loss = [] minimax_score_white = [] minimax_score_black = [] start_training = time.time() for i in range(epoch_count): ###### play against a minimax player to see how good the network is logger.info("start match against minimax in epoch {}".format(i)) white_score = alpha_zero_learning.net_vs_minimax( agent.network, evaluation_game_count, mcts_sim_count, c_puct, 0, CONST.WHITE) logger.info("white score vs minimax: {}".format(white_score)) black_score = alpha_zero_learning.net_vs_minimax( agent.network, evaluation_game_count, mcts_sim_count, c_puct, 0, CONST.BLACK) logger.info("black score vs minimax: {}".format(black_score)) minimax_score_white.append(white_score) minimax_score_black.append(black_score) ###### self play and update: create some game data through self play logger.info("start playing games in epoch {}".format(i)) agent.play_self_play_games(episode_count, temp_threshold, alpha_dirich) ###### training, train the training network logger.info("start updates in epoch {}".format(i)) loss_p, loss_v = agent.nn_update(update_count) policy_loss.append(loss_p) value_loss.append(loss_v) print("policy loss: ", loss_p) print("value loss: ", loss_v) # agent.clear_exp_buffer() # clear the experience buffer # save the current network torch.save(agent.network, "{}/network_gen_{}.pt".format(network_dir, i + 1)) end_training = time.time() training_time = end_training - start_training logger.info( "elapsed time whole training process {} for {} episodes".format( training_time, epoch_count * episode_count)) # plot the value training loss fig1 = plt.figure(1) plt.plot(value_loss) plt.title("Average Value Training Loss") plt.xlabel("Episode") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(policy_loss) plt.title("Average Policy Training Loss") plt.xlabel("Episode") plt.ylabel("Policy Loss") fig2.show() # plot the score against the minimax player fig3 = plt.figure(3) plt.plot(minimax_score_white, label="white") plt.plot(minimax_score_black, label="black") plt.legend(loc='best') axes = plt.gca() axes.set_ylim([0, 0.5]) plt.title("Average Score Against Minimax") plt.xlabel("Episode") plt.ylabel("Average Score") fig3.show() plt.show()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('MCTS') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters network_dir = "networks" network_file = network_dir + "/" + "network_batch_158436.pt" training_progress_dir = "training_progress" # load the network logger.info("load the neural network: " + network_file) net = torch.load(network_file, map_location='cuda') board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") bit_board = board_representation.board_to_matrix(board) policy, value = net(torch.Tensor(bit_board).unsqueeze(0).cuda()) print(policy) print(value) print( "move: ", board_representation.policy_to_move(policy.detach().cpu().numpy(), board.turn)) print(board.legal_moves) # find the policy with the help of mcts board = chess.Board() board.push_san("g4") board.push_san("e5") board.push_san("f4") chess_board = game.ChessBoard() chess_board.chess_board = board policy = mcts.mcts_policy(chess_board, 200, net, 1, 0) print( "fastest mate move: ", board_representation.policy_to_move(policy, chess_board.chess_board.turn)) board = chess.Board() board.push_san("e4") board.push_san("e5") board.push_san("Nf3") chess_board = game.ChessBoard() chess_board.chess_board = board start_time = time.time() policy = mcts.mcts_policy(chess_board, 800, net, 1, 0) elapsed_time = time.time() - start_time print("time needed for mtcs: ", elapsed_time) print( "suggested move after e4, e5, Nf3: ", board_representation.policy_to_move(policy, chess_board.chess_board.turn))
import os, sys import logging import argparse from pprint import pformat from heapq import nlargest from gensim.matutils import MmWriter from gensim.corpora import MmCorpus from utils.utils import init_logger logger = init_logger() def prune_contribs_of_authors(contribs, N): for author, author_contribs in enumerate(contribs): logger.debug('author {}'.format(author)) logger.debug('from {}'.format(author_contribs)) max_contribs = nlargest(N, author_contribs, key=lambda contrib: contrib[1]) # bestimme die N wichtigsten Beiträge max_contribs = sorted(max_contribs, key=lambda contrib: contrib[0]) # sortiere wieder absteigend nach DocID logger.debug('from {}'.format(max_contribs)) yield max_contribs def main(): parser = argparse.ArgumentParser(description='prunes contribs of a given author-document-contribs file, storing only top N max. contributions per authot') parser.add_argument('--author-doc-contribs', type=argparse.FileType('r'), help='path to input contribution MatrixMarket file (.mm/.mm.bz2)', required=True) parser.add_argument('--pruned-contribs', type=argparse.FileType('w'), help='path to output MatrixMarket .mm file', required=True) parser.add_argument('--top-n-contribs', type=int, help='keep only N contribs with highes values per author', required=True) args = parser.parse_args() input_author_doc_contribs_path = args.author_doc_contribs.name output_pruned_contribs_path = args.pruned_contribs.name top_n_contribs = args.top_n_contribs
# max_steps = 1 # parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.") max_steps = 2001 # parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.") warmup_steps = 0 # parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") logging_steps = 2000 # parser.add_argument('--logging_steps', type=int, default=2000, help="Log every X updates steps.") save_steps = 2000 # parser.add_argument('--save_steps', type=int, default=2000, help="Save checkpoint every X updates steps.") do_train = True # parser.add_argument("--do_train", action="store_true", help="Whether to run training.") do_eval = False # parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the test set.") no_cuda = False # parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") # args = parser.parse_args() # args.model_name_or_path = MODEL_PATH_MAP[args.model_type] # main(args) init_logger() set_seed(seed, no_cuda) # 토큰나이져 설정 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) # tokenizer = load_tokenizer(model_name_or_path) # 데이터 불러 오기 mode = "train" train_dataset = load_and_cache_examples(task, tokenizer, mode, model_name_or_path, max_seq_len, data_dir, train_file, dev_file, test_file) dev_dataset = None mode = "test" test_dataset = load_and_cache_examples(task, tokenizer, mode,