コード例 #1
0
def main():
    # args.rows = np.random.randint(2,6)
    # args.columns = np.random.rand]int(2,args.rows)
    args = parser.parse_args()


    print("Rows: " + str(args.rows))
    print("Columns: " + str(args.columns))

    g = Game(args.rows,args.columns)
    nnet = nn(g,args)



    args.checkpoint += "dim" + str(args.rows) + 'x' + str(args.columns) + "/"
    args.load_folder += "dim" + str(args.rows) + 'x' + str(args.columns) + "/"


    if args.load_model == "true":
        nnet.load_checkpoint(args.load_folder, args.load_file)

    c = Coach(g, nnet, args)
    if args.load_model == "true":
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
コード例 #2
0
    def __init__(self, gateway):
        self.gateway = gateway
        self.frames = frames
        self.frame_count = 0

        # save short period of data to make 4 channel data
        self.capacity = resolution[2] * 5
        self.screens = list()
        self.actions = list()
        self.hps = list()
        self.energy = list()
        self.controllable = list()
        self.rewards = list()

        # FTGEnv object set this
        # use this to avoid change API of this bot
        on_train = getattr(self, '_on_train', False)

        # when start game, subscribe self to Coach object
        # and override act, and memorise
        if on_train:
            Coach().trainees.append(self)
            self.act = functools.partial(Coach().act, eps=1.0)
            self.memorize = Coach().memorize
        else:
            self.act = act_with_np  # always do best action, epsilon == -1
            self.memorize = lambda s1, a, s2, done, r, energy: None  # do nothing
コード例 #3
0
 def selfPlayOnly(args):
     g = nimGame(config)
     nnet = nn(g)
     coach_0 = Coach(g, nnet, args)
     for i in range(args.numIters):
         print("Self-play iteration: " + str(i))
         nnet.load_checkpoint(args.load_folder_file[0],
                              args.load_folder_file[1])
         coach_0.selfPlay()
コード例 #4
0
def main():
    # Create a 9x9 board
    game = GobangGame(n=9, nir=9)

    network = NNetWrapper(game)
    if args.load_model:
        network.load_checkpoint(args.checkpoint)

    coach = Coach(game, network, args)
    coach.learn()
コード例 #5
0
def add_team():
    data = request.get_json()
    coach = data.get("coach")
    name = data.get("name")
    # Retrieves coach and name variables sent from react POST request. These variable names must match key names in React JSON post request.
    new_coach = Coach(name=coach)
    # Create new coach for given team-- create team functions assumes you have a coach object created (as you require a coach_id (pk))
    new_coach.insert()
    coach_id = new_coach.pk
    new_team = Team(name=name, coach_id=coach_id)
    new_team.insert()
    # Create new team and insert into DB.
    return jsonify({"success": True})
コード例 #6
0
def main():
    log.info('Loading %s...', DotsAndBoxesGame.__name__)
    g = DotsAndBoxesGame(n=3)
    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)
    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')
    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)
    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #7
0
ファイル: AutoRun.py プロジェクト: asmaaelkeurti/alpha_zero
    def generate_data_debug(self, model_file):
        nnet = nn(self.game)
        if len(model_file) > 1:
            nnet.load_model(filename=model_file)
        else:
            print('random state')

        c = Coach(self.game, nnet, self.args)
        train_example = c.execute_episode()

        l_sum_up = [(np.sum(i[0]), i[2]) for i in train_example]
        print(sum(i == (0, -1) or i == (-1, 1)
                  for i in l_sum_up))  # second hand win
        print(sum(i == (0, 1) or i == (-1, -1)
                  for i in l_sum_up))  # first hand win
コード例 #8
0
def main(config):
    game = config.game()

    # Set up model
    nnet = NNetWrapper(game, config, tensorboard=config.tensorboardX)

    # load model from checkpoint
    if config.load_model:
        nnet.load_checkpoint(folder=config.load_model_file[0],
                             filename=config.load_model_file[1])

    if config.use_multiprocessing:
        # Required for multiprocessing
        try:
            mp.set_start_method('spawn', force=True)
        except RuntimeError:
            pass

        import warnings
        # Disables semaphore warning (bug in pytorch)
        warnings.filterwarnings("ignore",
                                message="semaphore_tracker",
                                category=UserWarning)
        coach = CoachMP(game, nnet, config)
    else:
        coach = Coach(game, nnet, config)

    # load training examples
    if config.load_train_examples:
        print("Load trainExamples from file")
        coach.loadTrainExamples()

    coach.learn()
コード例 #9
0
 def parse_coaches(self):
     with open(
             "C:/Users/milop/SciOlyScheduler/SciOlyScheduler/utility/coaches.csv",
             'r',
             newline='') as file:
         reader = csv.reader(file)
         next(reader)
         i = 1
         # parse the infomation line by line, fill in the dictionary
         for row in reader:
             if (row[0] == ''):
                 continue
             team = self.teams[row[0].lower()]
             coach = Coach(team.get_number(), row[1], i + self.total_events)
             team.add_coach(coach)
             i += 1
             self.coaches.append(coach)
             # creates two copies of the event with the same infomation but with different times (Morning and Afternoon)
             for j in range(2, len(row), 2):
                 event_list = self.events[row[j].lower()]
                 if row[j + 1].lower() == "no":
                     pair = (int(j / 2), coach)
                 else:
                     pair = (0, coach)
                 for event in event_list:
                     event.add_potential_coach(pair)
         for event_list in self.events.values():
             for event in event_list:
                 event.sort_coaches()
コード例 #10
0
def generate_data(l):
    g = Game(args.goBang_n)
    nnet = nn(g)

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder + "train_examples_4")
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
コード例 #11
0
    def save_transaction(self, actions):
        # save state and action tuple using Coach's memorize
        try:
            if len(self.screens) > self.capacity:
                del self.screens[0]
                del self.actions[0]
                del self.hps[0]
                del self.energy[0]
                del self.rewards[0]

            if len(self.screens) == self.capacity:
                assert(len(self.screens) == len(self.actions)
                       == len(self.hps) == len(self.energy))
                s1 = np.stack(
                    [self.screens[0],  # 1st frame
                     self.screens[4],  # 2nd frame
                     self.screens[8],  # 3rd frame
                     self.screens[12]], axis=2)  # 4th frame
                s2 = np.stack(
                    [self.screens[4],  # 2nd frame
                     self.screens[8],  # 3rd frame
                     self.screens[12],  # 4th frame
                     self.screens[16]], axis=2)  # 5th frame
                a = self.actions[0]
                # reward calculation
                my_hp_1, opp_hp_1 = self.hps[0]
                my_hp_2, opp_hp_2 = self.hps[4]
                energy = self.energy[0]
                r = (opp_hp_1 - opp_hp_2) - (my_hp_1 - my_hp_2)
                if opp_hp_1 >= my_hp_1:
                    r -= 0.1
                self.rewards.append(r)

                if self.controllable[0]:
                    # memorize sample when character is controllable
                    # do not use game end in this platform
                    # done is always false
                    self.memorize(s1, a, s2, False, r, energy)

                # for calculate score
                Coach().add_reward(r)

                # if there are debug port (process queue for monitoring code)
                # send current state and action  through this
                if hasattr(self, 'debug_port'):
                    cnt = getattr(self, '_debug_port_cnt', 0)
                    if cnt == 0:
                        screens, energy = self._get_recent_state(axis=0)
                        info = dict(action=str(actions[self.actions[-1]]),
                                    energy=int(energy * energy_scale),
                                    reward=r)
                        self.debug_port.put((screens, info))
                    setattr(self, '_debug_port_cnt', (cnt + 1) % 4)

        except Exception as exc:
            logger.error(traceback.format_exc())
コード例 #12
0
def generate_data(l, model_iter):
    g = Game(8)
    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(model_iter + 1)))

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder +
                                ("train_examples_auto_" + str(model_iter + 1)))
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
コード例 #13
0
def main():
    game = Connect4Game()

    c = Coach(game, args)

    if args.load_model:
        logger.info('Loading checkpoint {}...'.format(args.load_folder_file))
        c.loadModel()
        logger.info("Loading 'trainExamples' from file {}...".format(
            args.load_folder_file))
        c.loadTrainExamples()

    c.learn()
コード例 #14
0
ファイル: AutoRun.py プロジェクト: asmaaelkeurti/alpha_zero
    def generate_data(self, l, model_file, train_example_filename):
        nnet = nn(self.game)
        if len(model_file) > 2:
            nnet.load_model(filename=model_file)
        else:
            print('random nn model')

        c = Coach(self.game, nnet, self.args)
        train_example = c.execute_episode()

        l.acquire()
        try:
            folder = self.args['checkpoint']
            if not os.path.exists(folder):
                os.makedirs(folder)
            filename = os.path.join(folder + train_example_filename)
            with open(filename, "ab+") as f:
                pickle.dump(train_example, f)
        finally:
            l.release()
コード例 #15
0
ファイル: main.py プロジェクト: NetasDev/NetworkTraining
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game(8)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()


    run = wandb.init(project=args.wandb_project,config=args,reinit=True)

    log.info('Starting the learning process 🎉')
    c.learn()
    run.finish()
コード例 #16
0
def main():
    # Make sure OS supports synchronization primitives required for
    # Python 3 multiprocessing Queues, else don't use parallelization
    os_supported = check_platform()
    if os_supported == False:
        args.mcts_workers = 1
        args.nnet_workers = 1

    g = Game()

    nnet = NNetManager(args.nnet_workers, os_supported)
    for i in range(args.nnet_workers):
        mp.Process(target=NNetWorker, args=(g, nnet.nsync(i), i)).start()

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)

    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()

    c.learn()
コード例 #17
0
ファイル: main.py プロジェクト: FFKcoke/alpha-zero-general
def main():
    """
    nnet = nn(Game())
    nnet.load_checkpoint(folder='ashogickpt', filename='best.pth.tar')
    nmcts = MCTS(Game(), nnet, args)
    arena = Arena(HumanAnimalShogiPlayer(Game()).play, lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), Game(), Game().display)
    rwins, nwins, draws = arena.playGames(50)
    print('%d : %d (%d Draws)' % (nwins, rwins, draws))
    """

    log.info('Loading %s...', Game.__name__)
    g = Game()

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #18
0
def main():

    np.random.seed(123)

    log.info('Loading %s...', Game.__name__)
    g = Game(7)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #19
0
def main():
    # log.info('Loading %s...', Game.__name__)
    print('Loading Inception TicTacToe')
    # g = Game(6)

    # log.info('Loading %s...', nn.__name__)
    nnet = nn()

    if args['load_model']:
        # log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        print('Loading checkpoint: ', args['load_folder_file'])
        nnet.load_checkpoint(args['load_folder_file'][0],
                             args['load_folder_file'][1])
    else:
        # log.warning('Not loading a checkpoint!')
        print('Not loading a checkpoint')

    # log.info('Loading the Coach...')
    print('Loading the Coach')
    c = Coach(g, nnet, args)

    if args['load_model']:
        # log.info("Loading 'trainExamples' from file...")
        print('Loading \'trainExamples\' from file...')
        c.loadTrainExamples()

    # log.info('Starting the learning process 🎉')
    print('Starting the learning process 🎉')
    c.learn()
コード例 #20
0
def TrainNewNet():
    pred = OthelloPredictor(6, 'trainedModels/othello/pred_othello_073.pth',
                            100000)
    g = Game(6, predictor=pred)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
コード例 #21
0
def main():
    log.info('Loading %s...', HexGame.__name__)
    game = HexGame(7)

    log.info('Loading %s...', NNetWrapper.__name__)
    nnet = NNetWrapper(game)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(game, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #22
0
def main():

    if args.create_log_file:
        full_path = os.path.join(args.log_file_location, args.log_file_name)
        if os.path.exists(full_path):
            log.info(f'Log file {full_path} already exists')
        else:
            if os.path.exists(args.log_file_location):
                with open(full_path, 'w') as fp:
                    fp.write(f"Log file initiated for {args.log_run_name} \n")
                    fp.close()
            else:
                os.mkdir(args.log_file_location)
                with open(full_path, 'w') as fp:
                    fp.write(f"Log file initiated for {args.log_run_name} \n")
                    fp.close()

    log.info('Loading %s...', Game.__name__)
    g = Game(5, 4)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g, args=nn_args)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args, nn_args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #23
0
def main():
    log.info('Loading %s...', TicTacToeGame.__name__)
    g = TicTacToeGame()

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info(f'Loading checkpoint "{args.load_folder_file}" ...')
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #24
0
def main():
    log.info('Cuda enabled: %s', torch.cuda.is_available())
    log.info('Loading Curling...')
    g = CurlingGame()

    log.info('Loading nn...')
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint...')
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Load trainExamples from file")
        c.loadTrainExamples()

    log.info('Learning...')
    c.learn()
コード例 #25
0
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game(7)  # Changed from 6 to 7 because we are playing Hex ...
    # ... on a 7x7 grid.

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #26
0
def main():
    log.info('GPU availability: %s', torch.cuda.is_available())
    log.info('Loading %s...', Game.__name__)
    g = Game(6)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g, None)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
コード例 #27
0
ファイル: main.py プロジェクト: szsb26/Alphazero-CS
    #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING.
    'tempThreshold':
    10,  #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details.
    'alpha':
    1e-5,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution.
    'gamma':
    1,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. 
    #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem.
    'epsilon':
    1e-5,  #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. 
}

#START ALPHAZERO TRAINING:
#Initialize Game_args, nnet, Game, and Alphazero
Game_rules = Game_args()
Game = CSGame()

nnet = NNetWrapper(args)

if args['load_nn_model'] == True:
    filename = 'best'
    nnet.load_checkpoint(args['network_checkpoint'], filename)

Alphazero_train = Coach(Game, nnet, args, Game_rules)

if args['load_training'] == True:
    print('Load trainExamples from file')
    Alphazero_train.loadTrainExamples()

#Start Training Alphazero
Alphazero_train.learn()
コード例 #28
0
    500,
    'arenaTemp':
    0.1,
    'arenaMCTS':
    False,
    'randomCompareFreq':
    1,
    'compareWithPast':
    True,
    'pastCompareFreq':
    3,
    'expertValueWeight':
    dotdict({
        'start': 0,
        'end': 0,
        'iterations': 35
    }),
    'cpuct':
    3,
    'checkpoint':
    'checkpoint',
    'data':
    'data',
})

if __name__ == "__main__":
    g = Game()
    nnet = nn(g)
    c = Coach(g, nnet, args)
    c.learn()
コード例 #29
0
    if serialFlag:
        if gameChoice == 0:
            g = Game(6)
        elif gameChoice == 1:
            g = TicTacToeGame()
        elif gameChoice == 2:

            g = nimGame(nimConfig)

        nnet = nn(g)

        if args.load_model:
            nnet.load_checkpoint(args.load_folder_file[0],
                                 args.load_folder_file[1])

        c = Coach(g, nnet, args)
        if args.load_model:
            print("Load trainExamples from file")
            c.loadTrainExamples()

        c.learn()

    else:

        def selfPlayOnly(args):
            g = nimGame(config)
            nnet = nn(g)
            coach_0 = Coach(g, nnet, args)
            for i in range(args.numIters):
                print("Self-play iteration: " + str(i))
                nnet.load_checkpoint(args.load_folder_file[0],
コード例 #30
0
args = dotdict({
    'numIters': 1000,  #number of rounds the traning will be
    'numEps': 100,  #number of self-play in each round
    'tempThreshold': 15,
    'updateThreshold':
    0.6,  #if new nnet beat wins old nnet above this ration, update to new nnet
    'maxlenOfQueue': 200000,  #TODO: maximum length of the history in memory??
    'numMCTSSims': 25,  #number of MCTS simulation rounds
    'arenaCompare': 40,  #number of games between old and new
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

if __name__ == "__main__":
    g = Game(6)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
コード例 #31
0
        for n in range(args.n_cpu):
            disable_window = not (args.render == 'on'
                                  or args.render == 'single' and n == 0)
            env = FTGEnv(n,
                         BasicBot,
                         opponent,
                         port=args.port + n,
                         inverted_player=1,
                         disable_window=disable_window,
                         starts_with_energy=args.starts_with_energy,
                         train='train' in args.train_or_test,
                         verbose=False)
            env.run(block=False, ai_monitor=ai_monitor)
            envs.append(env)

        while Coach().memory.size < batch_size:
            time.sleep(5)
            pass

        epoch = 0
        start_time = timer()
        highest_score = -1e-10

        while True:
            Coach().training(epsilon)
            q_values = np.zeros(step_per_training)
            losses = np.zeros(step_per_training)
            for step in trange(step_per_training,
                               desc='Training {}'.format(epoch)):
                max_q, loss = Coach().learn()
                q_values[step] = max_q.mean()