Example #1
0
    def __init__(self, screen, currentLevel):
        Arena.__init__(self)
        
        currentLevel = 2
        self.entities = []
        self.keyListeners = []
        self.accelerators = []
        self.npcs = []
        
        # on a timer, we reset which npc emit attractive and repulsive forces
        self.foodieAcceleratorReset = 5000 # ms
        # this counter is incremented and reset on each repopulation
        self.foodieAcceleratorResetTimer = 0
        # we roll an n sided die for each Foodie to decide whether it will 
        # become an accelerator on this reset
        self.foodieAcceleratorResetDice = 5
    
        # init npcs
        
        maxSize = 50.0
        minSize = 6.0
        
        screenW = float(screen.get_width())
        screenH = float(screen.get_height())
        
        # rotation accel rates
        maxRotVel = math.pi / 20.0
        maxRotAcc = math.pi / 40.0

        for i in range(Variables.lvlFoodCount[currentLevel]):
            size = minSize + random.random() * (maxSize - minSize)
            pos = [0.0,0.0]
            pos[0] += int(random.random() * screenW)
            pos[1] += int(random.random() * screenH)            
            prey = Prey(size, pos, maxRotVel, maxRotAcc, self)

            self.entities.append(prey)

        for i in range(Variables.lvlEaterCount[currentLevel]):
            size = minSize + random.random() * (maxSize - minSize)
            pos = [0.0,0.0]
            pos[0] += int(random.random() * screenW)
            pos[1] += int(random.random() * screenH)
            predator = Predator(size, pos, maxRotVel, maxRotAcc, self)
            
            self.entities.append(predator)
        
        # init players
        c = CircleGuy(self)
        m = MouseEntity(self)
        self.accelerators += [m]
        self.entities += [c, m]

        
        self.keyListeners += [c, m]
Example #2
0
	def open(self):
		self.isopen = True;
		self.moving = False;
		self.arena = Arena.getInstance()
		self.sched = Scheduler()
		self.sched.start()
		self.sched.add_interval_job(self.calc_pos, seconds=0.05)
		self.sched.add_interval_job(self.send_pos, seconds=0.05)
		self.keys = Keys()	
		self.movement_enabled = True
		self.dirty_coll = True
		self.gravity = 0
		self.jumping = False
		
		self.pos_transaction = None
		self.id = self.arena.addPlayer(self)
		self.active = False
		self.name = None
		
		self.arena.Join(self.id)

		self.cur_weapon = None
		self.weapons = []
		self.add_weapon(Weapon("Mr. Default Gun"))
		
		self.maxhealth = 100
		self.health = self.maxhealth
		
		self.pos = Vec3()
		self.rot = Vec2()
		self.spawn()
		
		print 'new connection'
Example #3
0
 def update(self, dT):
     Arena.update(self, dT)
     
     '''
     self.foodieAcceleratorResetTimer += dT
     if self.foodieAcceleratorResetTimer > self.foodieAcceleratorReset:
         print "RESETING ACCELERATIN FOODIES"
         # remove all foodies from the accelerators list
         for a in self.accelerators:
             if isinstance(a, Foodie):
                 self.accelerators.remove(a)
         # randomly add some foodies back in
         for f in self.foodies:
             if random.randint(0, self.foodieAcceleratorResetDice) == 0:
                 self.accelerators.append(f)
         self.foodieAcceleratorResetTimer = 0
     '''
     pass                        
def play_test_case(radius, cat_angle, mouse_angle):
    m, c = make_test_case(radius, cat_angle, mouse_angle)
    s = m.statue
    tk = Tk()
    arena = Arena (tk, meter*10, meter*10)
    arena.add(s)
    arena.add(c)
    arena.add(m)
    arena.pack()
    tk.mainloop()
Example #5
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # if 8 < get_hour() < 23:
            #    log.warning('Sleeping to save CPU...')
            #    while 8 < get_hour() < 23:
            #        time.sleep(60)
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                for _ in tqdm(range(self.args.numEps),
                              desc="Self Play",
                              ncols=100):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            while len(self.trainExamplesHistory
                      ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NOTE! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            random.seed(time.time())
            random.shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins = arena.playGames(self.args.arenaCompare)

            print()
            print('Results')
            print(f'Won: {nwins}')
            print(f'Lost: {pwins}')
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_best.pth.tar')
                self.saveTrainExamples('best')
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
Example #6
0
            if keys[pygame.K_UP]:
                if direction[1] != 1:
                    direction = (0, -1)
                    break
            elif keys[pygame.K_DOWN]:
                if direction[1] != -1:
                    direction = (0, 1)
                    break
            if keys[pygame.K_LEFT]:
                if direction[0] != 1:
                    direction = (-1, 0)
                    break
            elif keys[pygame.K_RIGHT]:
                if direction[0] != -1:
                    direction = (1, 0)
                    break

        newArena.movesnake(direction, snake)

        timer += clock.tick(100)


width, height = 320, 320

newArena = Arena(30, 30)
win = pygame.display.set_mode((width, height))
# pygame.display.flip()

menu_screen()
Example #7
0
 def sleep(self):
     Magni.health = 1000
     Arena.sleep(Arena)
Example #8
0
 def learn(self):
     #generate or load a matrix if fixed matrix set to True. We save a Game_args object in Coach in case A is fixed so when we
     #initialize multiple MCTS objects below, we do not have to store multiple copies of A. 
     if self.args['fixed_matrix'] == True:
         if self.args['load_existing_matrix'] == True:
             self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
         else:
             self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
             self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])
     
     #keep track of learning time
     learning_start = time.time()
     
     #start training iterations
     for i in range(1, self.args['numIters']+1):
         print('------ITER ' + str(i) + '------')
         #If we are not loading a set of training data.... then:
         if not self.skipFirstSelfPlay or i>1:
             #1)Initialize empty deque for storing training data after every eps in the iteration has been processed
             iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue'])
             
             #3)Start search. A single search consists of a synchronous search over ALL eps in the current batch.
             #Essentially the number of MCTS trees that must be maintained at once is equal to number of eps in current batch
             for j in range(self.args['num_batches']):
                 #INITIALIZATION STEP---------------------------------------
                 #Each element in MCTS_States_list is in the form of (MCTS object, [list of States root traversed])
                 MCTS_States_list = []
                 batchTrainExamples = []
                 
                 #Initialize bookkeeping
                 print('Generating Self-Play Batch ' + str(j) + ':')
                 
                 bar = Bar('Self Play', max = self.args['eps_per_batch'])
                 
                 #Initialize MCTS_States_list. Number of pairs in MCTS_States_list should equal eps_per_batch
                 for ep in range(self.args['eps_per_batch']):
                     #Initialize Game_args() for MCTS
                     temp_game_args = Game_args()
                     if self.args['fixed_matrix'] == False:
                         temp_game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
                     else:
                         temp_game_args.sensing_matrix = self.game_args.sensing_matrix
                     temp_game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])
                     #Initialize MCTS and the first state for each MCTS
                     temp_MCTS = MCTS(self.game, self.nnet, self.args, temp_game_args, identifier = int(str(j) + str(ep))) 
                     temp_init_state = self.game.getInitBoard(self.args, temp_game_args, identifier = int(str(j) + str(ep)))
                     #Append to MCTS_States_list
                     MCTS_States_list.append([temp_MCTS, [temp_init_state]])
                 
                 #initialize some variables for bookkeeping bar in terminal
                 current_MCTSStateslist_size = len(MCTS_States_list)
                 completed_episodes = 0
                 total_completed_eps = 0
             
                 #Initialize Threading Class. Needed to call threaded_mcts below. 
                 threaded_mcts = Threading_MCTS(self.args, self.nnet)
                 #----------------------------------------------------------
                     
                 #While MCTS_States_list is nonempty, advance each episode in MCTS_States_list by one move.
                 #continue advancing by one move until MCTS_States_list is empty, meaning that all games are completed.
                 #When a game is completed, its corresponding pair should be removed from MCTS_States_list
                 
                 #----------------------------------------------------------
                 self_play_batchstart = time.time()
                 
                 while MCTS_States_list:
                     #advanceEpisodes returns new MCTS_States_list with all elements having advanced one move, and removes all completed games
                     #advanceEpisodes also returns a set of new trainExamples for games which have been completed after calling advanceEpisodes
                     
                     MCTS_States_list, trainExamples = self.advanceEpisodes(MCTS_States_list, threaded_mcts)
                     #save the States_list states whose last arrived node is a terminal node. These will be used as new training samples.
                     batchTrainExamples += trainExamples
                     
                     #for bookkeeping bar in the output of algorithm
                     if len(MCTS_States_list) < current_MCTSStateslist_size:
                         completed_episodes = current_MCTSStateslist_size - len(MCTS_States_list)
                         current_MCTSStateslist_size = len(MCTS_States_list)
                         total_completed_eps += completed_episodes
                         #advance bookkeeping bar if size of MCTS_States_list becomes smaller. 
                         #bar.next() only advances and outputs the progress bar
                         #bar.suffix only outputs the suffix text after "|"
                         bar.suffix  = '({eps_completed}/{maxeps})'.format(eps_completed = total_completed_eps, maxeps=self.args['eps_per_batch'])
                         
                         #advance the progress bar completed_episodes times
                         for k in range(completed_episodes):
                             bar.next()       
                 #----------------------------------------------------------    
                 #end the tracking of the bookkeeping bar
                 bar.finish()
                 self_play_batchend = time.time()
                 print('All Self-Play Games in batch have been played to completion.')
                 print('Total time taken for batch: ', self_play_batchend - self_play_batchstart)
                 
                 iterationTrainExamples += batchTrainExamples
             
             #Add the training samples generated in a single training iteration to self.trainExamplesHistory
             #This step is the last line included in "if not self.skipFirstSelfPlay or i>1:" block
             self.trainExamplesHistory.append(iterationTrainExamples)
         
         #Jump to here if self.skipFirstSelfPlay returns True or i<=1
         #Once iterationTrainExamples has been completed, we will use these iterationTrainExamples to retrain the Neural Network. 
         if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']:
             print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
             self.trainExamplesHistory.pop(0)
         
         #save trainExamplesHistory list of Coach
         self.saveTrainExamples(i-1)
         
         #move all training samples from trainExamplesHistory to trainExamples for shuffling
         #shuffle trainExamples
         trainExamples = []
         for e in self.trainExamplesHistory: 
             trainExamples.extend(e)
         shuffle(trainExamples)
         
         #The Arena--------------------------------------------------------
         if self.args['Arena'] == True:
             self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one
             self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
         
             #convert trainExamples into a format recognizable by Neural Network and train
             trainExamples = self.nnet.constructTraining(trainExamples)
             self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated
         
             #Pit the two neural networks self.pnet and self.nnet in the arena            
             print('PITTING AGAINST PREVIOUS VERSION')
         
             arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. 
             pwins, nwins, draws = arena.playGames()
         
             print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
             if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
                 print('REJECTING NEW MODEL')
                 self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
             else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                 print('ACCEPTING NEW MODEL')
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best')
         #-----------------------------------------------------------------
         
         else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).  
             print('TRAINING NEW NEURAL NETWORK...')
             trainExamples = self.nnet.constructTraining(trainExamples)
             
             #FOR TESTING-----------------------------------------------------
             #print('')
             #print('feature arrays shape: ', trainExamples[0][0].shape, trainExamples[0][1].shape)
             #print('trainExamples feature arrays: ', trainExamples[0])
             #print('')
             #print('label arrays shape: ', trainExamples[1][0].shape, trainExamples[1][1].shape)
             #print('trainExamples label arrays: ', trainExamples[1])
             #END TESTING-----------------------------------------------------
                 
             self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1))    
             
             #FOR TESTING-----------------------------------------------------
             #weights = self.nnet.nnet.model.get_weights()
             #min_max = []
             #for layer_weights in weights:
                 #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                 #layer_weights_min = np.amin(layer_weights)
                 #layer_weights_max = np.amax(layer_weights)
                 #min_max.append([layer_weights_min, layer_weights_max])
             #print('')
             #print('The smallest and largest weights of each layer are: ')
             #for pair in min_max:
                 #print(pair)
             #print('')
             #END TESTING-----------------------------------------------------
                   
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')
     
     #Compute total time to run alphazero
     learning_end = time.time()
     print('----------TRAINING COMPLETE----------')
     print('Total training time: ', learning_end - learning_start)
 def _out_of_arena(self):
     super()._out_of_arena()
     if uniform(0,1) < ASTEROIDS_DESPAWN_PROB: #a volte se ne vanno
         self.Dispose()
     else:
         Arena.wrap_to_arena(self)
Example #10
0
 def sleep(self):
     Magni.health = 1000
     Arena.sleep(Arena)
Example #11
0
from RockBot import RockBot
from ScissorsBot import ScissorsBot
from PaperBot import PaperBot
from BeatLastBot import BeatLastBot
from RockThenPaperThenScissorsBot import RockThenPaperThenScissorsBot
from ScissorsThenPaperThenRockBot import ScissorsThenPaperThenRockBot
from Arena import Arena

if __name__ == '__main__':
    rockBot = RockBot()
    scissorsBot = ScissorsBot()
    paperBot = PaperBot()
    beatLastBot = BeatLastBot()
    rockThenPaperThenScissorsBot = RockThenPaperThenScissorsBot()
    scissorsThenPaperThenRockBot = ScissorsThenPaperThenRockBot()
    arena = Arena()
    arena.battle(beatLastBot, rockThenPaperThenScissorsBot, 20)
    arena.__init__()
    beatLastBot.__init__()
    arena.battle(beatLastBot, scissorsThenPaperThenRockBot, 20)
Example #12
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i + 1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                trainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=self.args.numEps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint,
                                 filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins))
            if float(nwins) / (pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_' + str(i) +
                                          '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
                self.mcts = MCTS(self.game, self.nnet,
                                 self.args)  # reset search tree
Example #13
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):  #for number of rounds
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque(
                    [], maxlen=self.args.maxlenOfQueue
                )  #remove the previous training example

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(
                        self.args.numEps):  #for each self-play of this rounds
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree

                    #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)]
                    # v is the result
                    selfPlayResult = self.executeEpisode()
                    #play one game, adding the gaming history
                    iterationTrainExamples += selfPlayResult

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            #self-play finished, updating the move history
            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(
                    0)  #remove the oldest gaming history
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)  #adding new move record
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #save the previous net
            self.pnet.load_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #read the previous net
            pmcts = MCTS(self.game, self.pnet,
                         self.args)  #reset previous models' mcts

            #using new data to train the new model
            self.nnet.train(
                trainExamples)  #trin the network with new move record
            nmcts = MCTS(self.game, self.nnet,
                         self.args)  #rest new models' mcts

            #OLD VS NEW
            print('PITTING AGAINST PREVIOUS VERSION')
            # rp = RandomPlayer(self.game).play
            # abp2 = AbpPlayer(self.game, 1, abpDepth=2).play
            arena = Arena(
                lambda board, turn: np.argmax(
                    pmcts.getActionProb(board, turn, temp=0)),
                lambda board, turn: np.argmax(
                    nmcts.getActionProb(board, turn, temp=0)), self.game)
            # arena = Arena(abp2,
            #               lambda board, turn: np.argmax(nmcts.getActionProb(board, turn, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(
                self.args.arenaCompare)  #playing new mode against old models

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                #OLD WIN!
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(
                    folder=self.args.checkpoint, filename='temp.pth.tar'
                )  #using previous mode, as it beat new model
            else:
                #NEW WIN!
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(
                    folder=self.args.checkpoint, filename='best.pth.tar'
                )  #save the new model, as this is the best
Example #14
0
from Tkinter import *  # Import everything from Tkinter
from Arena import Arena  # Import our Arena
from Turtle import Turtle  # Import our Turtle
from Vector import *  # Import everything from our Vector

Turtle.m = 50.0  # Scaling factor
Turtle.origin = Vector(400, 300)
from Statue import *
from Mouse import *
from Cat import *

statue = Statue(Turtle.origin + Vector(0, 0), 0)
mouse = Mouse(Turtle.origin + Vector(0, -Turtle.m).rotate(40), 0)
cat = Cat(Turtle.origin + Vector(0, -4 * Turtle.m).rotate(200), 0, mouse)

tk = Tk()  # Create a Tk top-level widget
arena = Arena(tk, 800, 600)  # Create an Arena widget, arena
arena.pack()  # Tell arena to pack itself on screen
arena.add(statue)  # Add a very simple, statue
arena.add(mouse)  # Add a green mouse centered at the base of the statue
arena.add(cat)  # Add a red cat
tk.mainloop()  # Enter the Tkinter event loop
Example #15
0
			string = "Oh no! You did not attain War Hammer.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!"
		elif riddle.getHammer() == 1:
			player.setHammer()
			string = "Congratulations! You have attained War Hammer!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!"
		home = Home(string)
	elif home.getTrial() == 3:
		string = ""
		card = CardChallenge()
		if card.getLuckStatus() == 0:
			string = "Oh no! You did not attain Gambler's Luck.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!"
		elif card.getLuckStatus() == 1:
			player.setLuck()
			string = "Congratulations! You have attained Gambler's Luck!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!"
		home = Home(string)
	elif home.getTrial() == 4:
		string = ""
		arena = Arena(player, dragon)
		if arena.getWinner() == 0:
			string = "Only a coward flees from battle!"
			break
		elif arena.getWinner() == 1:
			string = "You have slain the dragon! We are forever in your debt."
			break
		elif arena.getWinner() == 2:
			string = "Uh oh. Looks like you died. Well, better luck in the afterlife"
			break
			
home = Home(string)
		
	
Example #16
0
class GameEngine:
    def __init__(self, arena_width=501, arena_height=501, max_num_ghost=35000):

        self.arena_width = arena_width
        self.arena_height = arena_height
        self.max_num_ghost = max_num_ghost
        self.arena = Arena(self, arena_width, arena_height)
        self.players = {}
        self.ghosts = {}

        self.__sec_per_tick = .5

    def update(self):

        for player in self.players.values():
            player.early_update()

        for ghost in self.ghosts.values():
            ghost.early_update()

        for player in self.players.values():
            player.update()

        for ghost in self.ghosts.values():
            ghost.update()

        players_to_delete = []
        for player in self.players.values():
            if player.is_dead:
                players_to_delete.append(player.id)

        for pid in players_to_delete:
            self.delete_player(pid)

        ghosts_to_delete = []
        for ghost in self.ghosts.values():
            if ghost.is_dead:
                ghosts_to_delete.append(ghost.id)

        for gid in ghosts_to_delete:
            self.delete_ghost(gid)

        self.arena.late_update()

    def get_arena(self):
        return self.arena

    def get_players(self):
        return self.players.values()

    def get_player(self, pid):
        return self.players[pid]

    def get_ghosts(self):
        return self.ghosts.values()

    def get_sec_per_tick(self):
        return self.__sec_per_tick

    def add_player(self, counter, name, x, y):
        player = Player(self, counter, name, x, y)
        self.players[counter] = player
        self.arena[x, y].insert_object_on_top(player)

    def add_ghost(self, counter, ghost_type, x, y):
        ghost = Ghost(self, counter, ghost_type, x, y)

        self.ghosts[counter] = ghost
        self.arena[x, y].insert_object_on_top(ghost)

    def new_player(self, player):
        self.players[player.get_id()] = player

    def new_ghost(self):
        raise Exception("Not implemented!")

    def delete_player(self, pid):
        p = self.players[pid]
        self.arena.lift(p)
        del self.players[pid]

    def delete_ghost(self, gid):
        g = self.ghosts[gid]
        self.arena.lift(g)
        del self.ghosts[gid]
Example #17
0
from Arena import Arena
from Stack import Stack
from Tile import Tile
from Wall import Wall
import random

# set gameover to False
gameover = False

# create initial Arena
arena1 = Arena(15)

# populate the Arena's tile dictionary with Tile objects
for i in range(arena1.numtiles):
    arena1.tiles[i] = Tile(i)

# populate the Arena's wall dictionary with Wall objects
for w in range(arena1.numwalls):
    arena1.walls[w] = Wall(w)

# populate the Arena's stack dictionary with Stack objects
for k in range(arena1.numstacks):
    randsize = random.randint(10, 20)
    arena1.stacks[k] = Stack(k, randsize)

# get list of unique tile positions for stacks
alltiles = [x for x in arena1.tiles]
alltiles.remove(112)
stackpositions = random.sample(alltiles, arena1.numstacks)

# remove stack positions from alltiles
 def _out_of_arena(self):
     super()._out_of_arena()
     Arena.wrap_to_arena(self)
Example #19
0
__author__ = 'zhengxiaoyu'
from Tkinter import *                  # Import everything from Tkinter
from Arena   import Arena              # Import our Arena
from Turtle  import Turtle             # Import our Turtle
from Vector  import *                  # Import everything from our Vector
from Mouse import *
from WalkingTurtle import *
from Status import *
from Cat import *
little_mouse = Mouse(Vector(345,350), 1, 1)
little_cat = Cat(Vector(800,350), 1 ,little_mouse)
little_status = Status(Vector(450,350), 1)
tk = Tk()                              # Create a Tk top-level widget
arena = Arena(tk, little_cat)                      # Create an Arena widget, arena
arena.pack()
arena.add(little_mouse)
arena.add(little_cat)
arena.add(little_status)
tk.mainloop()                          # Enter the Tkinter event loop
Example #20
0
-- -- turtle.setstate(new_state) *simulates parallel behavior
run -  loops over step over and over again
stop - stop running
quit - quit the program 
"""

from Tkinter import *                  # Import everything from Tkinter
from Arena   import Arena              # Import our Arena
from Circle  import Circle             # Import our Turtle
from Mouse import Mouse
from Cat import Cat
from Vector  import *                  # Import everything from our Vector
from random import randrange, uniform

tk = Tk()                              # Create a Tk top-level widget
arena = Arena(tk, width = 1000, height = 700)                      # Create an Arena widget, arena
arena.pack()                           # Tell arena to pack itself on screen
'''
Turtle(position, heading, outline, fill, width)
position - vector telling where turtle to be placed
heading - degrees, north = 0, east 90
outline - color, default to black
fill - color of turtle, default white
width - width of outline
'''
def initializeStatue(center_x, center_y, radius):
	"""
	Creates a circular statue centered at CENTER_X and CENTER_Y 
	with a radius of RADIUS.
	Returns the statue
	>>> statue = initializeStatue(200, 200, 2)
Example #21
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        #Generate a fixed sensing matrix if option is toggled to True.
        #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix.
        #2)the folder which saves the fixed sensing matrix is empty
        if self.args['fixed_matrix'] == True:
            if self.args['load_existing_matrix'] == True:
                self.game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                self.arena_game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

            else:  #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type']
                self.game_args.generateSensingMatrix(self.args['m'],
                                                     self.args['n'],
                                                     self.args['matrix_type'])
                self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix
                #Save the fixed matrix
                self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

        for i in range(1, self.args['numIters'] + 1):
            print('------ITER ' + str(i) + '------')
            if not self.skipFirstSelfPlay or i > 1:  #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples
                iterationTrainExamples = deque(
                    [], maxlen=self.args['maxlenOfQueue'])
                #bookkeeping objects contained in pytorch_classification.utils
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args['numEps'])
                end = time.time()
                #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y.
                #-----------------------------------------------------
                for eps in range(self.args['numEps']):
                    #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode()
                    if self.args[
                            'fixed_matrix'] == False:  #repeatedly generate sensing matrices if we are not fixing the sensing matrix.
                        self.game_args.generateSensingMatrix(
                            self.args['m'], self.args['n'],
                            self.args['matrix_type']
                        )  #generate a new sensing matrix
                    self.game_args.generateNewObsVec(
                        self.args['x_type'], self.args['sparsity']
                    )  #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!!
                    self.mcts = MCTS(
                        self.game, self.nnet, self.args, self.game_args
                    )  #create new search tree for each game we play

                    #TESTING-------------------------
                    #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter))
                    #--------------------------------

                    #TESTING--------------------------
                    #print('Starting self-play game iteration: ' + str(eps))
                    #start_game = time.time()
                    #--------------------------------

                    iterationTrainExamples += self.executeEpisode(
                    )  #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game

                    #TESTING--------------------------
                    #end_game = time.time()
                    #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game))
                    #-----------------------------------------------------
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args['numEps'],
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games
                self.trainExamplesHistory.append(iterationTrainExamples)

            #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below.
            if len(self.trainExamplesHistory
                   ) > self.args['numItersForTrainExamplesHistory']:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file by calling saveTrainExamples method
            # The examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(
                i - 1
            )  #save examples to self.args['checkpoint'] folder with given iteration name of i-1

            # shuffle examples before training
            #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques,
            #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training
            #samples in a single list, shuffled
            trainExamples = []
            for e in self.trainExamplesHistory:  #Each e is a deque
                trainExamples.extend(e)
            shuffle(trainExamples)

            #The Arena--------------------------------------------------------
            if self.args['Arena'] == True:
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='temp')  #copy old neural network into new one
                self.pnet.load_checkpoint(
                    folder=self.args['network_checkpoint'], filename='temp')

                #convert trainExamples into a format recognizable by Neural Network and train
                trainExamples = self.nnet.constructTraining(trainExamples)
                self.nnet.train(
                    trainExamples[0], trainExamples[1]
                )  #Train the new neural network self.nnet. The weights are now updated

                #Pit the two neural networks self.pnet and self.nnet in the arena
                print('PITTING AGAINST PREVIOUS VERSION')

                arena = Arena(
                    self.pnet, self.nnet, self.game, self.args,
                    self.arena_game_args
                )  #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it.
                pwins, nwins, draws = arena.playGames()

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                      (nwins, pwins, draws))
                if pwins + nwins > 0 and float(nwins) / (
                        pwins + nwins) < self.args['updateThreshold']:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='temp')
                else:  #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='nnet_checkpoint' + str(i - 1))
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='best')
            #-----------------------------------------------------------------

            else:  #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).
                print('TRAINING NEW NEURAL NETWORK...')
                trainExamples = self.nnet.constructTraining(trainExamples)

                #FOR TESTING-----------------------------------------------------
                #print('trainExamples feature arrays: ' + str(trainExamples[0]))
                #print('trainExamples label arrays: ' + str(trainExamples[1]))
                #END TESTING-----------------------------------------------------

                self.nnet.train(trainExamples[0],
                                trainExamples[1],
                                folder=self.args['network_checkpoint'],
                                filename='trainHistDict' + str(i - 1))

                #FOR TESTING-----------------------------------------------------
                #weights = self.nnet.nnet.model.get_weights()
                #min_max = []
                #for layer_weights in weights:
                #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                #layer_weights_min = np.amin(layer_weights)
                #layer_weights_max = np.amax(layer_weights)
                #min_max.append([layer_weights_min, layer_weights_max])
                #print('')
                #print('The smallest and largest weights of each layer are: ')
                #for pair in min_max:
                #print(pair)
                #print('')
                #END TESTING-----------------------------------------------------

                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='nnet_checkpoint' + str(i - 1))
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'], filename='best')
Example #22
0

# def setUpTest(cat_angle, mouse_angle, cat_radius):
#     """ Sets up testing environment for program, using cat_angle, mouse_angle, and cat_radius."""
#     tk = Tk()                              # Create a Tk top-level widget
#     arena = Arena(tk, width = 1000, height = 700)                      # Create an Arena widget, arena
#     arena.pack()
#     statue = Circle(Vector(200, 200), 0, radius = 1)
#     mouse = Mouse(statue.position + unit(statue.heading + mouse_angle) * statue.radius * statue.scale, speed = 1, orbit = statue, debug_flag = True, degree = mouse_angle)
#     cat = Cat(statue.position + unit(statue.heading + cat_angle) * (statue.radius + cat_radius) * statue.scale, speed = 1, orbit = statue, mouse = mouse, arena = arena, radius = statue.radius + cat_radius, debug_flag = True, degree = cat_angle)
#     doctest.testmod(extraglobs={'test_statue': statue, 'test_mouse': mouse, 'test_cat': cat})


if __name__ == "__main__":
    tk = Tk()  # Create a Tk top-level widget
    arena = Arena(tk, width=1000, height=700)  # Create an Arena widget, arena
    arena.pack()
    statue = Circle(Vector(200, 200), 0, radius=1)
    mouse = Mouse(
        statue.position + mouse_start * statue.radius * statue.scale, speed=1, orbit=statue, debug_flag=True, degree=0
    )
    cat = Cat(
        statue.position + unit(statue.heading + 270) * (statue.radius + 1) * statue.scale,
        speed=1,
        orbit=statue,
        mouse=mouse,
        arena=arena,
        radius=statue.radius + 1,
        debug_flag=True,
        degree=270,
    )
Example #23
0
sim = 100

#n2.load_checkpoint('/dev/8x50x25/','best.pth.tar')

for i in range(24,101,25):
    lastwins = 0
    prewins = 0
    draw = 0

    n2.load_checkpoint('./temp/Implement/deep3_feature',str(i+1)+'best.pth.tar')#last
    n3.load_checkpoint('./temp/Implement/origin',str(i+1)+'best.pth.tar')#pre
    args2 = dotdict({'numMCTSSims': sim, 'cpuct':1.0})
    args3 = dotdict({'numMCTSSims': sim, 'cpuct':1.0})
    #mcts2 = vmcts(g, n2, args2, visual())
    mcts2 = lastmcts(g, n2, args2)
    mcts3 = mcts(g, n3, args3)
    n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))
    n3p = lambda x: np.argmax(mcts3.getActionProb(x, temp=0))

    arena = Arena(n3p, n2p, g)
    #arena = Arena(n3p, n2p, g, mcts2, visual())
    pwins, nwins, draws = arena.playGames(100)

    print(i+1)
    print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))#nwins == n2pwin == oneloss
    lastwins += nwins
    prewins += pwins
    draw += draws

    print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (lastwins, prewins, draw))
Example #24
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        epochswin = []  # count the number of wins at every epoch of the network against the preceding version
        epochdraw = []  # count the number of draws at every epoch of the network against the preceding version
        epochswingreedy = []  # count the number of wins against greedy at every epoch
        epochswinrandom = []  # count the number of wins against random at every epoch
        epochsdrawgreedy = []  # count the number of draws against greedy at every epoch
        epochsdrawrandom = []  # count the number of wins against random at every epoch
        epochswinminmax = []  # count the number of wins against minmax at every epoch
        epochsdrawminmax = []  # count the number of draws against minmax at every epoch

        begining=1
        if self.args.load_model == True:
            file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str(
                self.args.numEps) + ":dim" + str(self.game.n) + ".txt", "r+")
            lines = file.readlines()
            for index, line in enumerate(lines):
                for word in line.split():
                    if index == 0:
                        epochswin.append(word)
                    elif index == 1:
                        epochdraw.append(word)
            file.close()

            file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str(
                self.args.numEps) + ":dim" + str(self.game.n) + ":greedyrandom.txt", "r+")
            lines = file.readlines()
            for index, line in enumerate(lines):
                for word in line.split():
                    if index == 0:
                        epochswingreedy.append(word)
                    elif index == 1:
                        epochsdrawgreedy.append(word)
                    elif index == 2:
                        epochswinrandom.append(word)
                    elif index == 3:
                        epochsdrawrandom.append(word)
                    elif index == 4:
                        epochswinminmax.append(word)
                    elif index == 5:
                        epochsdrawminmax.append(word)
            file.close()
            self.loadTrainExamples()

            file=open(self.args.trainExampleCheckpoint+"loopinformation","r+")
            lines=file.readlines()
            begining=lines[0]
            file.close()


        for i in range(int(begining), self.args.numIters + 1):

            fileLoopInformation = open(self.args.trainExampleCheckpoint + "loopinformation", "w+")
            fileLoopInformation.write(str(i))
            fileLoopInformation.close()

            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                iterationTrainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1,
                                                                                                           maxeps=self.args.numEps,
                                                                                                           et=eps_time.avg,
                                                                                                           total=bar.elapsed_td,
                                                                                                           eta=bar.eta_td)
                bar.next()
            bar.finish()

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one

            filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \
                       ":dim" + str(self.game.n) + ".pth.tar"
            filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                self.game.n) + ".pth.tar"
            print("path with filename "+filename)
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename)
            exists = os.path.isfile(filenameBest)
            if exists:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest)
            else:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filename)
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            filenameCurrent="currentforprocess:temp:iter" + str(self.args.numIters) + \
                            ":eps" + str(self.args.numEps) + ":dim" + str(self.game.n) + ".pth.tar"
            self.nnet.save_checkpoint(folder=self.args.checkpoint,filename=filenameCurrent)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game,nmcts,pmcts,evaluate=True,
                          name=self.args.name)

            pwins, nwins, draws = arena.playGames(self.args.arenaCompare, False)

            pmcts.clear()
            nmcts.clear()
            del pmcts
            del nmcts

            print(' ')
            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if i == 1:
                epochswin.append(pwins)
                epochdraw.append(0)

            epochswin.append(nwins)
            epochdraw.append(draws)
            self.writeLogsToFile(epochswin, epochdraw)

            ''' Get all the players and then pit them against the network. You need to modify here if you implement 
                more players
            '''
            (gp, rp, mp) = self.decidePlayers()

            if self.args.parallel == 0:


                nmcts1 = MCTS(self.game, self.nnet, self.args)
                nmcts2 = MCTS(self.game, self.nnet, self.args)
                nmcts3 = MCTS(self.game, self.nnet, self.args)

                arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, self.game,nmcts1
                                    ,name=self.args.name)
                arenarandom = Arena(lambda x: np.argmax(nmcts2.getActionProb(x, temp=0)), rp, self.game,nmcts2
                                    ,name=self.args.name)
                arenaminmax = Arena(lambda x: np.argmax(nmcts3.getActionProb(x, temp=0)), mp, self.game,nmcts3,
                                    evaluate=True,name=self.args.name)

                pwinsminmax, nwinsminmax, drawsminmax = arenaminmax.playGames(self.args.arenaCompare)
                print("minmax - "+str(pwinsminmax)+" "+str(nwinsminmax)+" "+str(drawsminmax))
                pwinsgreedy, nwinsgreedy, drawsgreedy = arenagreedy.playGames(self.args.arenaCompare)
                print("greedy - "+str(pwinsgreedy)+" "+str(nwinsgreedy)+" "+str(drawsgreedy))
                pwinsreandom, nwinsrandom, drawsrandom = arenarandom.playGames(self.args.arenaCompare)
                print("random - "+str(pwinsreandom)+" "+str(nwinsrandom)+" "+str(drawsrandom))

                nmcts1.clear()
                nmcts2.clear()
                nmcts3.clear()
                del nmcts1
                del nmcts2
                del nmcts3

            else:
                '''
                This will be used if you want to evaluate the network against the benchmarks in a parallel way
                '''

                self.args.update({'index': str(i)})

                p = self.parallel(self.args.arenaCompare)
                (pwinsminmax, nwinsminmax, drawsminmax) = p[0]  # self.parallel("minmax", self.args.arenaCompare)
                (pwinsgreedy, nwinsgreedy, drawsgreedy) = p[1]  # self.parallel("greedy",self.args.arenaCompare)
                (pwinsreandom, nwinsrandom, drawsrandom) = p[2]  # self.parallel("random",self.args.arenaCompare)

            epochsdrawgreedy.append(drawsgreedy)
            epochsdrawrandom.append(drawsrandom)
            epochswinrandom.append(pwinsreandom)
            epochswingreedy.append(pwinsgreedy)
            epochswinminmax.append(pwinsminmax)
            epochsdrawminmax.append(drawsminmax)

            self.writeLogsToFile(epochswingreedy, epochsdrawgreedy, epochswinrandom, epochsdrawrandom, epochswinminmax,
                                 epochsdrawminmax, training=False)

            if pwins + nwins == 0 or float(nwins) / (pwins + nwins) <= self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                exists = os.path.isfile(filenameBest)
                if exists:
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest)
                else:
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filename)

            else:
                print('ACCEPTING NEW MODEL')
                filename = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename)
            self.mcts.clear()
            del self.mcts
            self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True)  # reset search tree
        self.writeLogsToFile(epochswin, epochdraw, training=True)
Example #25
0
def runGame(catRadius, catAngle, mouseAngle):
    """
	Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area.
	"""
    tk = Tk()  # Create a Tk top-level widget
    arena = Arena(tk)  # Create an Arena widget, arena
    arena.pack()  # Tell arena to pack itself on screen

    statueObj = Statue(Vector(200, 200))  # Create statue at 200,200
    arena.add(statueObj)  # Add statue to arena

    mouseObj = Mouse(mouseAngle,
                     statueObj)  # Create mouse with statue passed as argument
    arena.add(mouseObj)  # Add mouse to arena

    cat_radius_scaled = catRadius * scale_factor  # Scale the radius of the cat to pixels
    catObj = Cat(catAngle, cat_radius_scaled,
                 mouseObj)  # Create cat with mouse passed as argument
    arena.add(catObj)  # Add cat to arena

    arena.setLabels()  # added (set labels to starting position)
    tk.mainloop()  # Enter the Tkinter event loop
Example #26
0
class Room(object):
    def __init__(self,
                 rid,
                 host,
                 max_user_num=2,
                 arena_conf_filename='Configuration.ArenaConf',
                 player_conf_filename='Configuration.PlayerConf'):
        super(Room, self).__init__()
        self.rid = rid
        self.host = host
        self.arena = None

        self.max_user_num = max_user_num
        self.username_to_user_map = {}

        # Generate dispatcher
        self.dispatcher = Dispatcher()

        # Configuration file
        self.arena_conf_filename = arena_conf_filename
        self.player_conf_filename = player_conf_filename

        # received message
        self.msg_dict = None

    def generate_msg_dict(self):
        from common.events import MsgCSPlayerMove
        self.msg_dict = {conf.MSG_CS_PLAYER_MOVE: MsgCSPlayerMove()}

    def register_dispatcher_services(self):
        self.dispatcher.register(conf.ARENA_SERVICES,
                                 ArenaServices(self.host, self.arena))

    def dispatch(self, msg, client_hid):
        self.dispatcher.dispatch(msg, client_hid)

    def handle_received_msg(self, msg_type, data, client_hid):
        if msg_type in self.msg_dict:
            msg = self.msg_dict[msg_type]
            msg.unmarshal(data)
            self.dispatcher.dispatch(msg, client_hid)
        else:
            print "Can't handle received message in room"

    def tick(self):
        if self.arena:
            self.arena.tick()

    def start_game(self):
        # Can't start game when game is running
        if self.arena and self.arena.is_game_start and not self.arena.is_game_stop:
            return False

        self.arena = Arena(self.host, self.arena_conf_filename,
                           self.player_conf_filename)

        self.register_dispatcher_services()

        self.generate_msg_dict()

        # Send start game message to all roommates
        data = MsgSCStartGame().marshal()
        for k, v in self.username_to_user_map.items():
            self.host.sendClient(v.client_hid, data)

        self.arena.start_game(self.username_to_user_map)

    def add_user(self, user):
        if self.username_to_user_map.has_key(user.username) == False and\
                        len(self.username_to_user_map) >= self.max_user_num:
            return False  # room is full

        # user back again
        if self.arena and self.arena.is_game_start and not self.arena.is_game_stop:
            self.arena.player_enter_again(user)
            return True

        self.username_to_user_map[user.username] = user
        self.broadcast_roommate_add(user.username)

        if len(self.username_to_user_map) >= self.max_user_num:
            self.start_game()

        return True

    def remove_user(self, user):
        if self.username_to_user_map.has_key(user.username) is False:
            return False  # user not find

        if self.arena and self.arena.is_game_start and not self.arena.is_game_stop:
            self.arena.player_leave(user.client_hid)
        else:
            del self.username_to_user_map[user.username]
            self.broadcast_roommate_del(user.username)
            if len(self.username_to_user_map) <= 0:
                return True

        return False

    def broadcast_roommate_add(self, username):
        msg = MsgSCRoommateAdd(username)
        data = msg.marshal()

        for username, user in self.username_to_user_map.items():
            self.host.sendClient(user.client_hid, data)

    def broadcast_roommate_del(self, username):
        msg = MsgSCRoommateDel(username)
        data = msg.marshal()

        for username, user in self.username_to_user_map.items():
            self.host.sendClient(user.client_hid, data)

    # game over return True else False
    def is_valid(self):
        if self.arena and self.arena.is_game_stop:
            return False
        else:
            return True
Example #27
0
    # Setup NN
    net = JasonNet()
    current_NN = net
    best_NN = net

    if not os.path.isdir("model_data"):
        os.mkdir("model_data")

    logger.info("Starting to train...")
    for i in range(args.iteration, args.total_iterations):
        logger.info(F"Iteration {i}")

        # Play a number of Episodes (games) of self play to generate data
        generate_data(current_NN, episodes, search_depth, i)

        # original monte carlo
        #run_monte_carlo(current_NN, 0, i, episodes, search_depth)

        # Train NN from dataset of monte carlo tree search above
        train_net(current_NN, i, args.lr, args.bs, args.epochs)

        # Fight new version against reigning champion in the Arena
        # Even with first iteration just battle against yourself
        arena = Arena(best_NN, current_NN)
        best_NN = arena.battle(episodes // 2, search_depth)
        # Save the winning net as a Pickle for battle later
        save_as_pickle(i, best_NN)

    print("End of the main driver program. Training has completed!")
Example #28
0
        def p1(x, turn):
            if turn <= 2:
                mcts1.reset()
            temp = args.temp if turn <= args.tempThreshold else args.arenaTemp
            policy = mcts1.getActionProb(x, temp=temp)
            return np.random.choice(len(policy), p=policy)

    for i in range(model_count):
        file = Path(networks[i])
        print(f'{short_name} vs {file.stem}')

        nnet2.load_checkpoint(folder='checkpoint', filename=file.name)
        if args.numMCTSSims <= 0:
            p2 = NNPlayer(g, nnet2, args.arenaTemp).play
        else:
            mcts2 = MCTS(g, nnet2, args)

            def p2(x, turn):
                if turn <= 2:
                    mcts2.reset()
                temp = args.temp if turn <= args.tempThreshold else args.arenaTemp
                policy = mcts2.getActionProb(x, temp=temp)
                return np.random.choice(len(policy), p=policy)

        arena = Arena(p1, p2, g)
        p1wins, p2wins, draws = arena.playGames(args.arenaCompare)
        writer.add_scalar(f'Win Rate vs {short_name}',
                          (p2wins + 0.5 * draws) / args.arenaCompare,
                          i * args.x)
        print(f'wins: {p1wins}, ties: {draws}, losses:{p2wins}\n')
    writer.close()
Example #29
0
from Arena import Arena
from IPython.display import clear_output
arena = Arena()
while True:
    print('''
        1. Create pokemon
        2. Show Existing pokemon
        3. Battle
        4. Quit
    ''')
    choice = int(input('Enter your choice:'))
    if choice == 1:
        arena.create_pokemon()
    elif choice == 2:
        arena.show_all()
        pass
    elif choice == 3:
        clear_output()
        arena.battle()
        pass
    else:
        break
Example #30
0
            # Bookkeeping to prepare for the next step
            self.qOld = qPrime

            np.save("./pegWeights.npy", self.peggingWeights)


if __name__ == '__main__':
    # Initialize variables
    player1 = LinearB(1, 0.5, 0.9, False)
    player2 = Myrmidon(2, 5, False)
    numHands = 5000
    repeatFlag = False
    windowSize = 100

    # Create and run arena
    arena = Arena([player1, player2], repeatFlag)
    results = arena.playHands(numHands)

    # Plot results from arena
    x = np.arange(1, numHands + 1 - windowSize, 1)
    y0 = np.zeros(len(results[0]) - windowSize)
    avgResult0 = np.average(results[0])
    mu0 = np.zeros(len(y0))
    y1 = np.zeros(len(results[1]) - windowSize)
    avgResult1 = np.average(results[1])
    mu1 = np.zeros(len(y1))
    y2 = np.zeros(len(results[2]) - windowSize)
    avgResult2 = np.average(results[2])
    mu2 = np.zeros(len(y2))

    for i in range(len(x)):
Example #31
0
            twoWon += i[1]
            draws += i[2]
        print("Model 1 Win:", oneWon, " Model 2 Win:", twoWon, " Draw:", draws)

    g = OthelloGame(6)

    # parallel version
    #ParallelPlay(g)

    # single process version
    # all players
    rp = RandomPlayer(g).play
    gp = GreedyOthelloPlayer(g).play
    hp = HumanOthelloPlayer(g).play

    # nnet players
    n1 = NNet(g)
    n1.load_checkpoint('./pretrained_models/othello/pytorch/',
                       '6x100x25_best.pth.tar')
    args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
    mcts1 = MCTS(g, n1, args1)
    n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

    #n2 = NNet(g)
    #n2.load_checkpoint('/dev/8x50x25/','best.pth.tar')
    #args2 = dotdict({'numMCTSSims': 25, 'cpuct':1.0})
    #mcts2 = MCTS(g, n2, args2)
    #n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))

    arena = Arena.Arena(n1p, hp, g, display=display)
    print(arena.playGames(2, verbose=True))
Example #32
0
import random
from Tkinter import *  # Import everything from Tkinter

from Mouse2 import Mouse2

from Arena import Arena  # Import our Arena
from Statue import Statue  # Import our Turtle
from Vector import *  # Import everything from our Vector
from Cat import Cat

# random vector for center of statue
random.seed()
tk = Tk()  # Create a Tk top-level widget
arena = Arena(tk)  # Create an Arena widget, arena
arena.pack()  # Tell arena to pack itself on screen
initialStatPosition = Vector(random.randint(200, 600),
                             random.randint(200, 400))


#initialCatAngle=random.randint(0,359)
#initialCatRadius=random.randint(200,500)
#initialMouseAngle=random.randint(0,359)
def inputdata():
    """
    :return: instead, input the cat angle, mouse angle, and cat radius manually
    """
    initialCatAngle = float(input('Input Cat angle(degrees):'))

    initialMouseAngle = float(input('Input Mouse angle(degrees):'))
    initialCatRadius = float(input('Input Cat radius (m)'))
    if initialCatRadius < 1.:
Example #33
0
#Name,Player Instance, Is Stochastic
players=[('RandomPlayer',rp,1),
         ('GreedyPlayer',gp,0),
         ('AntiGreedyPlayer',agp,0),
         ('CompositeGreedyPlayer',cgp,0),
         ('mini-AlphaZero',azp1,1)]


###############################################################################
num_trial=100

results=np.zeros(shape=(len(players),len(players)))

for i in range(len(players)):
    for j in range(len(players))[0:i+1]:
        arena=Arena(player1=players[i][1],player2=players[j][1],env=env,display=None)
        
        actual_num_trial=2 if (players[i][2]==0 and players[j][2]==0) else num_trial #playerが両方とも決定論的な場合は2回で済ます.
        
        one_win,two_win,draw=arena.play_games(actual_num_trial,verbose=False)
        print(players[i][0],' vs ',players[j][0],' : ',one_win/float(actual_num_trial),' wins.')
        results[i,j]=one_win/float(actual_num_trial)
        

###############################################################################
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.DataFrame(results,index=[x[0] for x in players],columns=[x[0] for x in players])
Example #34
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            log.info(f'Starting Iter #{i} ...')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                for _ in tqdm(range(self.args.numEps), desc="Self Play"):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                log.warning(
                    f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}"
                )
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            log.info('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                     (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                log.info('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                log.info('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Example #35
0
our_player = MCTS(game, neural_network, args)
# old_nn_player = MCTS(game, old_nn, args)
other_player = NN_player_wrapper()


def player_ramdon(board):
    game_2 = Game(men_count=9)
    valids = game_2.get_valid_moves(board,1)
    valids_ind = np.where(valids)
    choise = np.random.choice(valids_ind[0])
    return  choise

#
print('Let the fight Begin')
arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)),
              player_ramdon,
              game, lambda x: game.print_board(x),"Alpha_Zero", "Random")
print(arena.playGames(40, verbose=True))
#

# print('Let the fight Begin')
# arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)),
#               other_player,
#               game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Other")

# print(arena.playGames(20, verbose=True))

# print('Let the fight Begin')
# arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)),
#               player_ramdon, game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Ramdom")
#
Example #36
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            #if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold:
            if False:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Example #37
0
from Tkinter import *
from Arena import Arena
from WalkingTurtle import WalkingTurtle
from Vector import *

tk = Tk()
arena = Arena(tk)
arena.pack()
arena.add(WalkingTurtle(Vector(200, 300), 0, 1, fill='turquoise'))
arena.add(WalkingTurtle(Vector(600, 300), 0, 1, fill='purple'))
tk.mainloop()
Example #38
0
from Tkinter  import *                  # Import everything from Tkinter
from Arena    import Arena              # Import our Arena
from Cat      import Cat                # Import our Cat
from Mouse    import Mouse              # Import our Statue
from Statue   import Statue             # Import our Statue
from Vector   import *                  # Import everything from our Vector
from globalVars import *                # Import everything from globalVars
from random   import random             # Import random

tk = Tk()                               # Create a Tk top-level widget
arena = Arena(tk, 800, 600, padx=12, pady=6) # Create an Arena widget, arena
arena.pack()                            # Tell arena to pack itself on screen

midX = arena.width/2                    # Horizontal center of window
midY = arena.height/2                   # Vertical center of window
mouseAngle = random()*360*scaleRad      # Random mouse angle to initialize
catAngle = random()*360*scaleRad        # Random cat angle to initialize
catRadius = 5                           # Random cat radius to initialize

statue = Statue(Vector(midX,midY), 0)   # Create a statue in center of arena, arbitrary heading
arena.add(statue)                       # Add statue

mouse = Mouse(Vector(midX + statue.radius*scalePixel*cos(mouseAngle), midY - statue.radius*scalePixel*sin(mouseAngle)), 0, arena, statue) # Create a mouse at right edge of statue, arbitrary heading since it will be overwritten in initialization
arena.add(mouse)                        # Add mouse
 
cat = Cat(Vector(midX + catRadius*scalePixel*cos(catAngle), midY - catRadius*scalePixel*sin(catAngle)), 0, arena, statue, mouse) # Create a cat at given angle and radius, arbitrary heading since it will be overwritten in initialization
arena.add(cat, "cat")                   # Add cat and specify that it's a cat as extra argument

tk.mainloop()                           # Enter the Tkinter event loop
Example #39
0
testUnit = Hero(name="Ordeus",
                healthRange=(70, 80),
                strengthRange=(70, 80),
                defenceRange=(45, 55),
                speedRange=(40, 50),
                luckRange=(10, 30))
testUnit2 = Beast(name="Beast",
                  healthRange=(60, 90),
                  strengthRange=(60, 90),
                  defenceRange=(40, 60),
                  speedRange=(40, 60),
                  luckRange=(25, 40))

testArena = Arena(name="Emagia",
                  fighter1=testUnit,
                  fighter2=testUnit2,
                  maxTurns=20)


class TestArena(unittest.TestCase):
    def test_type(self):
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena)
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena, True,
                          testUnit, testUnit2, 20)
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena, 12,
                          testUnit, testUnit2, 20)
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena,
                          "Orderus", testUnit, testUnit2, "20")
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena,
                          "Orderus", testUnit, testUnit2, 20.0)
        self.assertRaises(TypeError, Arena.ValidateInputs, testArena,
Example #40
0
from WalkingTurtle import *
from Statue import Statue
from Mouse import Mouse
import Cat

meter = 20        # How many pixels is one meter?
statue_radius = 1# What is the statue's radius
cat_radius = 1.5
cat_angle = 0
mouse_angle = 45

x = Vector()
print x.length()

tk = Tk()                              # Create a Tk top-level widget
arena = Arena(tk)                      # Create an Arena widget, arena
arena.pack()                           # Tell arena to pack itself on screen
s = Statue(Vector(200,200),statue_radius,meter)
arena.add(s)
m = Mouse(s,mouse_angle)
c = Cat.Cat(cat_angle,cat_radius,m)
arena.add(c)
arena.add(m)
arena.cat_rad.set('CatRadius: '+str(cat_radius))
arena.cat_rad_label = Label(arena,textvariable=arena.cat_rad)
arena.cat_rad_label.pack()
arena.cat_ang.set('CatAngle: '+str(cat_angle))
arena.cat_ang_label = Label(arena,textvariable=arena.cat_ang)
arena.cat_ang_label.pack()
arena.mouse_ang.set('CatAngle: '+str(mouse_angle))
arena.mouse_ang_label = Label(arena,textvariable=arena.mouse_ang)
    def learn(self):
        """
        main loop of the training loop

        """
        if self.args.load_model:
            start = self.args.load_example[1] + 1
        else:
            start = 1
        for i in range(start, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            greedy = i == 1 and not self.args.load_model

            if not self.skipFirstSelfPlay or i > 1:
                iteration_train_examples = deque(
                    [], maxlen=self.args.maxlenOfQueue)

                num_eps = self.args.numEps
                if greedy:
                    num_eps = self.args.greedy_eps
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=num_eps)
                end = time.time()

                for eps in range(num_eps):
                    if greedy:
                        iteration_train_examples += self.execute_initialize_episode(
                        )
                    else:
                        iteration_train_examples += self.execute_episodes()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=num_eps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                if not greedy:
                    self.trainExamplesHistory.append(iteration_train_examples)

                    if len(self.trainExamplesHistory
                           ) > self.args.numItersForTrainExamplesHistory:
                        print("len(trainExamplesHistory) =",
                              len(self.trainExamplesHistory),
                              " => remove the oldest trainExamples")
                        self.trainExamplesHistory.pop(0)
                    # backup history to a file
                    # NB! the examples were collected using the model from the previous iteration, so (i-1)
                    self.saveTrainExamples(i)

                    # shuffle examples before training
                    trainExamples = []
                    for e in self.trainExamplesHistory:
                        trainExamples.extend(e)
                    shuffle(trainExamples)

                else:
                    trainExamples = iteration_train_examples

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.h5')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.h5')

            self.nnet.train(trainExamples)

            if not greedy:
                pmcts = MCTSSingle(self.game, self.pnet, self.args)
                nmcts = MCTSSingle(self.game, self.nnet, self.args)
                print('PITTING AGAINST PREVIOUS VERSION')
                arena = Arena(pmcts, nmcts, self.game, self.args)
                scores = arena.playGames(self.args.arenaCompare)

                if scores[1] == 0 or float(
                        scores[1]) / sum(scores) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                              filename='temp.h5')
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(
                        folder=self.args.checkpoint,
                        filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                              filename='temp.h5')
            else:
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_1.h5')
Example #42
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
Example #43
0
gladius = weapons["Gladius"]




char1 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml"))
char1.name = "Hugo"
action1 = Action(char1, hieb0, gladius)
reaction1 = Action(char1, block0, gladius)

char2 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml"))
char2.name = "Herbert"
action2 = Action(char2, hieb0, gladius)
reaction2 = Action(char2, block0, gladius)

arena = Arena()
arena.addTeam("blue", [char1])
arena.addTeam("red", [char2])
print (arena.getCharacters())
# Create a Player for each character


player1 = RandomPlayer("Player1", char1, arena, action1, reaction1)
player2 = RandomPlayer("Player2", char2, arena, action2, reaction2)
players = [player1, player2]

points = [0,0]
for round in range(100):
    # reset chars
    char1.reset()
    char2.reset()
Example #44
0
def runGame(catRadius, catAngle, mouseAngle):
	"""
	Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area.
	"""
	tk = Tk()                              # Create a Tk top-level widget
	arena = Arena(tk)                      # Create an Arena widget, arena
	arena.pack()                           # Tell arena to pack itself on screen

	statueObj = Statue(Vector(200,200))	   # Create statue at 200,200
	arena.add(statueObj)				   # Add statue to arena

	mouseObj = Mouse(mouseAngle, statueObj)	# Create mouse with statue passed as argument
	arena.add(mouseObj)						# Add mouse to arena

	cat_radius_scaled = catRadius * scale_factor		# Scale the radius of the cat to pixels
	catObj = Cat(catAngle, cat_radius_scaled, mouseObj)	# Create cat with mouse passed as argument
	arena.add(catObj)									# Add cat to arena

	arena.setLabels() # added (set labels to starting position)
	tk.mainloop()                          # Enter the Tkinter event loop
Example #45
0
from Tkinter import *  # Import everything from Tkinter
from Arena import Arena  # Import our Arena
from Turtle import Turtle  # Import our Turtle
from Vector import *  # Import everything from our Vector

tk = Tk()  # Create a Tk top-level widget
arena = Arena(tk)  # Create an Arena widget, arena
arena.pack()  # Tell arena to pack itself on screen
turtle = Turtle(Vector(200, 200), 0)
arena.add(turtle)  # Add a very simple, basic turtle
tk.mainloop()  # Enter the Tkinter event loop
Example #46
0
from Tkinter import *                  # Import everything from Tkinter
from Arena   import Arena              # Import our Arena
from Turtle  import Turtle             # Import our Turtle
from Vector  import *                  # Import everything from our Vector

tk = Tk()                              # Create a Tk top-level widget
arena = Arena(tk)                      # Create an Arena widget, arena
arena.pack()                           # Tell arena to pack itself on screen
arena.add(Turtle(Vector(200,200), 0))  # Add a very simple, basic turtle
tk.mainloop()                          # Enter the Tkinter event loop
Example #47
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(self.args.startIter,
                       self.args.numIters + self.args.startIter):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.args.skipFirstSelfPlay or i > self.args.startIter:  #or i > 1

                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                n_processes: int = 4
                # or, if you have enough compute (and VRAM):
                # n_processes = mp.cpu_count()

                with mp.Pool(n_processes) as pool:
                    self.nnet.nnet.share_memory()
                    pbar = tqdm.tqdm(range(self.args.numEps), position=0)
                    pbar.set_description(
                        f"Self Play using {n_processes} processes")

                    for eps in pbar:
                        # Arguments for each worker.
                        worker_args = [(i, self.game, self.args, self.nnet)
                                       for i in range(n_processes)]
                        # Apply the executeEpisode method on each argument:
                        for worker_examples in pool.starmap(
                                Coach.executeEpisode, worker_args):
                            iterationTrainExamples.extend(worker_examples)

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            if not self.args.skipFirstTrain or i > self.args.startIter:
                # training new network, keeping a copy of the old one
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='old.pth.tar')
                self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='old.pth.tar')
                #pmcts = MCTS(self.game, self.pnet, self.args)

                self.nnet.train(trainExamples)
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='new.pth.tar')
                #nmcts = MCTS(self.game, self.nnet, self.args)

            pmcts = MCTS(self.game, self.pnet, self.args)
            nmcts = MCTS(self.game, self.nnet, self.args)
            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(pmcts, nmcts, self.game, self.args.resignationOn,
                          self.args.resignationThreshold)
            #arena = Arena(lambda state,player: np.where(pmcts.getActionProb(state,player, temp=0) == 1)[0][0],
            #              lambda state,player: np.where(nmcts.getActionProb(state,player, temp=0) == 1)[0][0], self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)
            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='old.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')