def train_net(epoch_count, training_generator, csv_test_set): """ trains the neural network a few times and returns the value and prediciton error :param epoch_count: the number of epochs to train the neural network :param training_generator: the torch training generator :param csv_test_set: the test set on which the network is tested :return: prediction error, value error """ logger = logging.getLogger('Sup Learning') # create a new network to train network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) # execute the training by looping over all epochs network.train() for epoch in range(epoch_count): # training for state_batch, value_batch, policy_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device) value_batch = value_batch.to(Config.training_device) policy_batch = policy_batch.to(Config.training_device) # execute one training step _, _ = network.train_step(state_batch, policy_batch, value_batch) # evaluation pred_error, val_error = evaluation.net_prediction_error( network, csv_test_set) logger.debug( "learning rate {}, prediction error: {}, value-error: {}".format( Config.learning_rate, pred_error, val_error)) return pred_error, val_error
def __init__(self, *args, **kwargs): super(FastResNet, self).__init__(*args, **kwargs) self.encoder = networks.ResNet(layers=50,pretrained=False).to(self.device) self.decoder = networks.choose_decoder(decoder=self.name).to(self.device) # def infer(self,input): # pass self.encoder.eval() self.decoder.eval() print('==> model name:{}\nfeed_height:{}\nfeed_width:{}\n'.format(self.name,self.feed_height,self.feed_width))
def main(): # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('Connect4') # set the random seed random.seed(a=None, version=2) # create the configuration values for a random network network_path = "self-play-net.pt" Config.n_blocks = 1 Config.n_filters = 1 Config.mcts_sim_count = 200 loops = 10 games_per_loop = 1000 # create the agent logger.info("create a new random network for the self play") network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) torch.save({'state_dict': network.state_dict()}, network_path) # play self-play games logger.info("start to create self-play games") start = time.time() training_examples = [] for i in range(loops): new_examples = alpha_zero_learning.__self_play_worker__( network_path, games_per_loop) training_examples.extend(new_examples) logger.debug("finished creating games in loop {}".format(i)) # save the training examples with open("initial_training_data.pkl", 'wb') as output: pickle.dump(training_examples, output, pickle.HIGHEST_PROTOCOL) logger.info( "finished creating the initial training examples, length: {}".format( len(training_examples))) average_length = 0.5 * len(training_examples) / ( games_per_loop * loops ) # 0.5 as symmetric positions are included as well logger.debug("average moves per game: {}".format(average_length)) logger.debug("elapsed time: {}".format(time.time() - start))
def mainGui(): # The logger utils.init_logger(logging.DEBUG, file_name="log/gui.log") logger = logging.getLogger('Gui') net_path = "network_gen_148.pt" n_blocks = 10 n_filters = 128 np.set_printoptions(suppress=True, precision=2) # load the network Config.evaluation_device = torch.device('cpu') cpu_net = networks.ResNet(1e-4, n_blocks, n_filters, 1e-4) checkpoint = torch.load(net_path, map_location='cpu') cpu_net.load_state_dict(checkpoint['state_dict']) logger.debug("network loaded") # execute the game gui = connect4_gui.GUI(cpu_net) gui.execute_game()
def mainTrain(): # The logger utils.init_logger(logging.DEBUG, file_name="log/chess_sl.log.log") logger = logging.getLogger('Sup Learning') np.set_printoptions(suppress=True, precision=6) # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # parameters variant = "threeCheck" Config.learning_rate = 0.0001 Config.weight_decay = 1e-4 Config.n_blocks = 10 Config.n_filters = 128 epochs = 3 training_set_path = "positions-avg" + variant + ".h5" network_dir = "networks/" + variant training_progress_dir = "training_progress/" + variant # define the parameters for the training params = {'batch_size': 512, 'shuffle': True, 'num_workers': 2} # create the data set class training_set = data_processing.Dataset(training_set_path) training_generator = data.DataLoader(training_set, **params) logger.info("training set created, length: {}".format( training_set.__len__())) # create a new network to train network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) network = data_storage.net_to_device(network, Config.training_device) # create all needed folders Path(network_dir).mkdir(parents=True, exist_ok=True) Path(training_progress_dir).mkdir(parents=True, exist_ok=True) # list for the plots batches = [] policy_loss = [] value_loss = [] tot_batch_count = 0 current_batch_count = 0 current_value_loss = 0 current_policy_loss = 0 # execute the training by looping over all epochs network.train() for epoch in range(epochs): # training for state_batch, policy_batch, value_batch in training_generator: # send the data to the gpu state_batch = state_batch.to(Config.training_device, dtype=torch.float) value_batch = value_batch.unsqueeze(1).to(Config.training_device, dtype=torch.float) policy_batch = policy_batch.to(Config.training_device, dtype=torch.float) # execute one training step loss_p, loss_v = network.train_step(state_batch, policy_batch, value_batch) current_policy_loss += loss_p current_value_loss += loss_v current_batch_count += 1 tot_batch_count += 1 if tot_batch_count % 100 == 0: logger.debug("epoch {}: trained {} batches so far".format( epoch, tot_batch_count)) batches.append(tot_batch_count) policy_loss.append(current_policy_loss / current_batch_count) value_loss.append(current_value_loss / current_batch_count) current_policy_loss = 0 current_value_loss = 0 current_batch_count = 0 if tot_batch_count % 1000 == 0: network_path = "{}/network_batch_{}.pt".format( network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) # save the last network network_path = "{}/network_batch_{}.pt".format(network_dir, tot_batch_count) torch.save(network, network_path) np.save(training_progress_dir + "/value_loss.npy", np.array(value_loss)) np.save(training_progress_dir + "/policy_loss.npy", np.array(policy_loss)) np.save(training_progress_dir + "/batches.npy", np.array(batches)) logger.debug("epoch {}: finished training".format(epoch)) # plot the loss versus the number of seen batches # plot the value training loss fig1 = plt.figure(1) plt.plot(batches, value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Training Samples") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(batches, policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Training Samples") plt.ylabel("Policy Loss") fig2.show() plt.show()
def main_az(): # The logger utils.init_logger(logging.DEBUG, file_name="log/connect4.log") logger = logging.getLogger('Connect4') # set the random seed random.seed(a=None, version=2) np.random.seed(seed=None) # create the storage object training_data = data_storage.load_data() # create the agent network = networks.ResNet(Config.learning_rate, Config.n_blocks, Config.n_filters, Config.weight_decay) agent = alpha_zero_learning.Agent(network) if training_data.cycle == 0: logger.debug("create a new agent") training_data.save_data(agent.network) # save the generation 0 network if Config.use_initial_data: logger.debug("fill the experience buffer with some initial data") agent.experience_buffer.fill_with_initial_data( ) # add training examples of untrained network else: # load the current network logger.debug("load an old network") agent.network = training_data.load_current_net() agent.experience_buffer = training_data.experience_buffer start_training = time.time() for i in range(training_data.cycle, Config.cycle_count, 1): ###### self play and update: create some game data through self play logger.info("start playing games in cycle {}".format(i)) avg_moves_played = agent.play_self_play_games( training_data.network_path) training_data.avg_moves_played.append(avg_moves_played) logger.debug("average moves played: {}".format(avg_moves_played)) ###### training, train the training network and use the target network for predictions logger.info("start updates in cycle {}".format(i)) loss_p, loss_v = agent.nn_update(i) training_data.policy_loss.append(loss_p) training_data.value_loss.append(loss_v) logger.debug("policy loss: {}".format(loss_p)) logger.debug("value loss: {}".format(loss_v)) ###### save the new network logger.info("save check point to file in cycle {}".format(i)) training_data.cycle += 1 training_data.experience_buffer = agent.experience_buffer training_data.save_data(agent.network) end_training = time.time() training_time = end_training - start_training logger.info("elapsed time whole training process {}".format(training_time)) # save the results np.save("value_loss.npy", np.array(training_data.value_loss)) np.save("policy_loss.npy", np.array(training_data.policy_loss)) np.save("avg_moves.npy", np.array(training_data.avg_moves_played)) # set the style of the plot plt.style.use('seaborn-dark-palette') # plot the value training loss fig1 = plt.figure(1) plt.plot(training_data.value_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Value Training Loss") plt.xlabel("Generation") plt.ylabel("Value Loss") fig1.show() # plot the training policy loss fig2 = plt.figure(2) plt.plot(training_data.policy_loss) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Policy Training Loss") plt.xlabel("Generation") plt.ylabel("Policy Loss") fig2.show() # plot the average number of moves played in the self-play games fig3 = plt.figure(3) plt.plot(training_data.avg_moves_played) axes = plt.gca() axes.grid(True, color=(0.9, 0.9, 0.9)) plt.title("Average Moves in Self-Play Games") plt.xlabel("Generation") plt.ylabel("Move Count") fig3.show() plt.show()