class TrainingClient: args = dotdict({ # 'load_folder_file': ('../alphabot/temp/','checkpoint_0.pth.tar'), 'checkpoint': '../alphabot/temp/' }) def __init__(self): # self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.nnet = nn()
class Client: args = dotdict({ 'load_folder_file': ('../alphabot/temp.old/','checkpoint_1.pth.tar'), 'checkpoint': '../alphabot/temp/' }) def __init__(self): self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.nnet = nn() # def getCheckpointFile(self, iteration): # return 'checkpoint_' + str(iteration) + '.pth.tar' # def saveTrainExamples(self, iteration): # folder = self.args.checkpoint # if not os.path.exists(folder): # os.makedirs(folder) # filename = os.path.join(folder, self.getCheckpointFile(iteration)+".examples") # with open(filename, "wb+") as f: # Pickler(f, protocol=pickle.HIGHEST_PROTOCOL).dump(self.trainExamplesHistory) # f.closed def loadTrainExamples(self): modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1]) examplesFile = modelFile+".examples" if not os.path.isfile(examplesFile): print(examplesFile) r = input("File with trainExamples not found. Continue? [y|n]") if r != "y": sys.exit() else: print("File with trainExamples found. Read it.") with open(examplesFile, "rb") as f: #self.trainExamplesHistory = Unpickler(f).load() self.trainExamplesHistory = f.read() f.closed # examples based on the model were already collected (loaded) self.skipFirstSelfPlay = True def loadModelFile(self): modelFile = os.path.join(self.args.checkpoint, 'temp.pth.tar') if not os.path.isfile(modelFile): print(modelFile) r = input("Model file not found. Continue? [y|n]") if r != "y": sys.exit() else: print("File with modelFile found. Read it.") with open(modelFile, "rb") as f: #self.trainExamplesHistory = Unpickler(f).load() result = f.read() f.closed return result
import Arena from MCTS import MCTS from Game import YEET as Game from NNet import NNetWrapper as NNet from dotted_dict import DottedDict as dotdict import numpy as np import logging, psutil, os, random, functools, sys #from multiprocessing import freeze_support import multiprocessing as mp args = dotdict({ 'numGames': 16, # 48 'numThreads': psutil.cpu_count() # 8 }) """ use this script to play any two agents against each other, or play manually with any agent. """ class RandomPlayer(): def __init__(self, game): self.game = game def play(self, game_instance): # agent = game_instance.current_player choices = np.argwhere(self.game.getValidMoves(game_instance) == 1) return random.choice(choices) class HumanPlayer():
import multiprocessing as mp import random import string #from utils import dotdict from dotted_dict import DottedDict as dotdict import time dd = dotdict({ 'numIters': 100, 'numThreads': 4, }) # define a example function def rand_string(length, dd, output): """ Generates a random string of numbers, lower- and uppercase chars. """ rand_str = ''.join(random.choice( string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length)) #time.sleep(10) output.put(rand_str) if __name__ == '__main__': random.seed(123) # Define an output queue output = mp.Queue() # Setup a list of processes that we want to run processes = [mp.Process(target=rand_string, args=(5, dd, output)) for x in range(4)]
from NNet import NNetWrapper as nn #from utils import dotdict from dotted_dict import DottedDict as dotdict #from multiprocessing import freeze_support import multiprocessing as mp import logging, psutil, os, sys args = dotdict({ 'numIters': 100, 'numEps': 100, 'tempThreshold': 15, # degree of exploration in MCTS.getActionProb(). switch from temperature=1 to temperature=0 after this episode step 'updateThreshold': 0.55, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, # 25 # TODO: much more sims needed? 'arenaCompare': 48, 'cpuct': 2, # degree of exploration for upper confidence bound in MCTS.search() => TODO: try 2? 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('./temp/', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 50, #20 'numThreads': psutil.cpu_count(), 'remoteTraining': True }) if __name__ == "__main__": #freeze_support() # Start processes with lower priority to prevent system overload/hangs/freezes. Also set multiprocessing start method to spawn for Linux, since forking makes trouble p = psutil.Process(os.getpid()) if sys.platform.startswith('win32'): p.nice(psutil.BELOW_NORMAL_PRIORITY_CLASS)
import pickle from random import shuffle import multiprocessing as mp from multiprocessing import current_process, Pool from concurrent.futures import ProcessPoolExecutor, as_completed from tqdm import tqdm import functools from utils.helper import * args = dotdict({ 'numIters': 100, 'numEps': 2, 'tempThreshold': 15, # degree of exploration in MCTS.getActionProb(). switch from temperature=1 to temperature=0 after this episode step 'maxlenOfQueue': 200000, 'numMCTSSims': 25, # 25 # TODO: much more sims needed? 'cpuct': 2, # degree of exploration for upper confidence bound in MCTS.search() => TODO: try 2? 'modelspath': './models/', 'examplespath': './examples/', 'numThreads': 1 #psutil.cpu_count(), }) class Coach: """ This class executes the self-play + learning. It uses the functions defined in Game and NeuralNet. args are specified in main.py. """ def __init__(self, game, nnet, args): self.game = game
import pickle import torch import torch.optim as optim # from torch.autograd import Variable # from alphanet import DQN as nnet from alphanet12 import DQN as nnet # from alphanet6 import DQN as nnet # from alphanet18 import DQN as nnet # from alphanet26 import DQN as nnet args = dotdict({ 'lr': 0.0001, # 0.001 - 0.0001 # 'dropout': 0.3, # 0.3 # 'epochs': 2, # best 25 'batch_size': 64, # best 128 - 4 'cuda': True, # 'num_channels': 512, }) class NNetWrapper(): def __init__(self): self.nnet = nnet(args) if args.cuda: self.nnet.cuda() def train(self, examples): """ examples: list of examples, each example is of form (state, pi, v)
import pickle import torch import torch.optim as optim # from torch.autograd import Variable # from alphanet import DQN as nnet from alphanet12 import DQN as nnet # from alphanet6 import DQN as nnet # from alphanet18 import DQN as nnet # from alphanet26 import DQN as nnet args = dotdict({ 'lr': 0.001, # 'dropout': 0.3, 'epochs': 10, # best 25 'batch_size': 128, # best 128 'cuda': True, # 'num_channels': 512, }) class NNetWrapper(): def __init__(self): self.nnet = nnet(args) if args.cuda: self.nnet.cuda() def train(self, examples): """ examples: list of examples, each example is of form (state, pi, v)
from pickle import Pickler, Unpickler, loads from NNet import NNetWrapper as nn from dotted_dict import DottedDict as dotdict from random import shuffle import time, os, sys import torch from sklearn.model_selection import train_test_split #from tensorboard_logger import configure, log_value from tensorboardX import SummaryWriter args = dotdict({ 'modelspath': './models/', 'examplespath': './examples/', 'epochs': 100, # best 25, 'validation': True, 'early_stopping': False, 'save_model': True }) class TrainingClient: def __init__(self): # self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.nnet = nn() def loadTrainExamples(self): # modelFile = os.path.join(args.examplespath, 'best.pth.tar') # examplesFile = modelFile+".examples" examplesFile = os.path.join(args.examplespath, '0.pth.tar_6.examples') if not os.path.isfile(examplesFile): print(examplesFile)