def __init__(self, screen, currentLevel): Arena.__init__(self) currentLevel = 2 self.entities = [] self.keyListeners = [] self.accelerators = [] self.npcs = [] # on a timer, we reset which npc emit attractive and repulsive forces self.foodieAcceleratorReset = 5000 # ms # this counter is incremented and reset on each repopulation self.foodieAcceleratorResetTimer = 0 # we roll an n sided die for each Foodie to decide whether it will # become an accelerator on this reset self.foodieAcceleratorResetDice = 5 # init npcs maxSize = 50.0 minSize = 6.0 screenW = float(screen.get_width()) screenH = float(screen.get_height()) # rotation accel rates maxRotVel = math.pi / 20.0 maxRotAcc = math.pi / 40.0 for i in range(Variables.lvlFoodCount[currentLevel]): size = minSize + random.random() * (maxSize - minSize) pos = [0.0,0.0] pos[0] += int(random.random() * screenW) pos[1] += int(random.random() * screenH) prey = Prey(size, pos, maxRotVel, maxRotAcc, self) self.entities.append(prey) for i in range(Variables.lvlEaterCount[currentLevel]): size = minSize + random.random() * (maxSize - minSize) pos = [0.0,0.0] pos[0] += int(random.random() * screenW) pos[1] += int(random.random() * screenH) predator = Predator(size, pos, maxRotVel, maxRotAcc, self) self.entities.append(predator) # init players c = CircleGuy(self) m = MouseEntity(self) self.accelerators += [m] self.entities += [c, m] self.keyListeners += [c, m]
def open(self): self.isopen = True; self.moving = False; self.arena = Arena.getInstance() self.sched = Scheduler() self.sched.start() self.sched.add_interval_job(self.calc_pos, seconds=0.05) self.sched.add_interval_job(self.send_pos, seconds=0.05) self.keys = Keys() self.movement_enabled = True self.dirty_coll = True self.gravity = 0 self.jumping = False self.pos_transaction = None self.id = self.arena.addPlayer(self) self.active = False self.name = None self.arena.Join(self.id) self.cur_weapon = None self.weapons = [] self.add_weapon(Weapon("Mr. Default Gun")) self.maxhealth = 100 self.health = self.maxhealth self.pos = Vec3() self.rot = Vec2() self.spawn() print 'new connection'
def update(self, dT): Arena.update(self, dT) ''' self.foodieAcceleratorResetTimer += dT if self.foodieAcceleratorResetTimer > self.foodieAcceleratorReset: print "RESETING ACCELERATIN FOODIES" # remove all foodies from the accelerators list for a in self.accelerators: if isinstance(a, Foodie): self.accelerators.remove(a) # randomly add some foodies back in for f in self.foodies: if random.randint(0, self.foodieAcceleratorResetDice) == 0: self.accelerators.append(f) self.foodieAcceleratorResetTimer = 0 ''' pass
def play_test_case(radius, cat_angle, mouse_angle): m, c = make_test_case(radius, cat_angle, mouse_angle) s = m.statue tk = Tk() arena = Arena (tk, meter*10, meter*10) arena.add(s) arena.add(c) arena.add(m) arena.pack() tk.mainloop()
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # if 8 < get_hour() < 23: # log.warning('Sleeping to save CPU...') # while 8 < get_hour() < 23: # time.sleep(60) # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc="Self Play", ncols=100): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) while len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NOTE! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) random.seed(time.time()) random.shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print() print('Results') print(f'Won: {nwins}') print(f'Lost: {pwins}') if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_best.pth.tar') self.saveTrainExamples('best') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
if keys[pygame.K_UP]: if direction[1] != 1: direction = (0, -1) break elif keys[pygame.K_DOWN]: if direction[1] != -1: direction = (0, 1) break if keys[pygame.K_LEFT]: if direction[0] != 1: direction = (-1, 0) break elif keys[pygame.K_RIGHT]: if direction[0] != -1: direction = (1, 0) break newArena.movesnake(direction, snake) timer += clock.tick(100) width, height = 320, 320 newArena = Arena(30, 30) win = pygame.display.set_mode((width, height)) # pygame.display.flip() menu_screen()
def sleep(self): Magni.health = 1000 Arena.sleep(Arena)
def learn(self): #generate or load a matrix if fixed matrix set to True. We save a Game_args object in Coach in case A is fixed so when we #initialize multiple MCTS objects below, we do not have to store multiple copies of A. if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') else: self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #keep track of learning time learning_start = time.time() #start training iterations for i in range(1, self.args['numIters']+1): print('------ITER ' + str(i) + '------') #If we are not loading a set of training data.... then: if not self.skipFirstSelfPlay or i>1: #1)Initialize empty deque for storing training data after every eps in the iteration has been processed iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue']) #3)Start search. A single search consists of a synchronous search over ALL eps in the current batch. #Essentially the number of MCTS trees that must be maintained at once is equal to number of eps in current batch for j in range(self.args['num_batches']): #INITIALIZATION STEP--------------------------------------- #Each element in MCTS_States_list is in the form of (MCTS object, [list of States root traversed]) MCTS_States_list = [] batchTrainExamples = [] #Initialize bookkeeping print('Generating Self-Play Batch ' + str(j) + ':') bar = Bar('Self Play', max = self.args['eps_per_batch']) #Initialize MCTS_States_list. Number of pairs in MCTS_States_list should equal eps_per_batch for ep in range(self.args['eps_per_batch']): #Initialize Game_args() for MCTS temp_game_args = Game_args() if self.args['fixed_matrix'] == False: temp_game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) else: temp_game_args.sensing_matrix = self.game_args.sensing_matrix temp_game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity']) #Initialize MCTS and the first state for each MCTS temp_MCTS = MCTS(self.game, self.nnet, self.args, temp_game_args, identifier = int(str(j) + str(ep))) temp_init_state = self.game.getInitBoard(self.args, temp_game_args, identifier = int(str(j) + str(ep))) #Append to MCTS_States_list MCTS_States_list.append([temp_MCTS, [temp_init_state]]) #initialize some variables for bookkeeping bar in terminal current_MCTSStateslist_size = len(MCTS_States_list) completed_episodes = 0 total_completed_eps = 0 #Initialize Threading Class. Needed to call threaded_mcts below. threaded_mcts = Threading_MCTS(self.args, self.nnet) #---------------------------------------------------------- #While MCTS_States_list is nonempty, advance each episode in MCTS_States_list by one move. #continue advancing by one move until MCTS_States_list is empty, meaning that all games are completed. #When a game is completed, its corresponding pair should be removed from MCTS_States_list #---------------------------------------------------------- self_play_batchstart = time.time() while MCTS_States_list: #advanceEpisodes returns new MCTS_States_list with all elements having advanced one move, and removes all completed games #advanceEpisodes also returns a set of new trainExamples for games which have been completed after calling advanceEpisodes MCTS_States_list, trainExamples = self.advanceEpisodes(MCTS_States_list, threaded_mcts) #save the States_list states whose last arrived node is a terminal node. These will be used as new training samples. batchTrainExamples += trainExamples #for bookkeeping bar in the output of algorithm if len(MCTS_States_list) < current_MCTSStateslist_size: completed_episodes = current_MCTSStateslist_size - len(MCTS_States_list) current_MCTSStateslist_size = len(MCTS_States_list) total_completed_eps += completed_episodes #advance bookkeeping bar if size of MCTS_States_list becomes smaller. #bar.next() only advances and outputs the progress bar #bar.suffix only outputs the suffix text after "|" bar.suffix = '({eps_completed}/{maxeps})'.format(eps_completed = total_completed_eps, maxeps=self.args['eps_per_batch']) #advance the progress bar completed_episodes times for k in range(completed_episodes): bar.next() #---------------------------------------------------------- #end the tracking of the bookkeeping bar bar.finish() self_play_batchend = time.time() print('All Self-Play Games in batch have been played to completion.') print('Total time taken for batch: ', self_play_batchend - self_play_batchstart) iterationTrainExamples += batchTrainExamples #Add the training samples generated in a single training iteration to self.trainExamplesHistory #This step is the last line included in "if not self.skipFirstSelfPlay or i>1:" block self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here if self.skipFirstSelfPlay returns True or i<=1 #Once iterationTrainExamples has been completed, we will use these iterationTrainExamples to retrain the Neural Network. if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) #save trainExamplesHistory list of Coach self.saveTrainExamples(i-1) #move all training samples from trainExamplesHistory to trainExamples for shuffling #shuffle trainExamples trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('') #print('feature arrays shape: ', trainExamples[0][0].shape, trainExamples[0][1].shape) #print('trainExamples feature arrays: ', trainExamples[0]) #print('') #print('label arrays shape: ', trainExamples[1][0].shape, trainExamples[1][1].shape) #print('trainExamples label arrays: ', trainExamples[1]) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best') #Compute total time to run alphazero learning_end = time.time() print('----------TRAINING COMPLETE----------') print('Total training time: ', learning_end - learning_start)
def _out_of_arena(self): super()._out_of_arena() if uniform(0,1) < ASTEROIDS_DESPAWN_PROB: #a volte se ne vanno self.Dispose() else: Arena.wrap_to_arena(self)
def sleep(self): Magni.health = 1000 Arena.sleep(Arena)
from RockBot import RockBot from ScissorsBot import ScissorsBot from PaperBot import PaperBot from BeatLastBot import BeatLastBot from RockThenPaperThenScissorsBot import RockThenPaperThenScissorsBot from ScissorsThenPaperThenRockBot import ScissorsThenPaperThenRockBot from Arena import Arena if __name__ == '__main__': rockBot = RockBot() scissorsBot = ScissorsBot() paperBot = PaperBot() beatLastBot = BeatLastBot() rockThenPaperThenScissorsBot = RockThenPaperThenScissorsBot() scissorsThenPaperThenRockBot = ScissorsThenPaperThenRockBot() arena = Arena() arena.battle(beatLastBot, rockThenPaperThenScissorsBot, 20) arena.__init__() beatLastBot.__init__() arena.battle(beatLastBot, scissorsThenPaperThenRockBot, 20)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ trainExamples = deque([], maxlen=self.args.maxlenOfQueue) for i in range(self.args.numIters): # bookkeeping print('------ITER ' + str(i + 1) + '------') eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): trainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pnet = self.nnet.__class__(self.game) pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins)) if float(nwins) / (pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet = pnet else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): #for number of rounds # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque( [], maxlen=self.args.maxlenOfQueue ) #remove the previous training example eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range( self.args.numEps): #for each self-play of this rounds self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)] # v is the result selfPlayResult = self.executeEpisode() #play one game, adding the gaming history iterationTrainExamples += selfPlayResult # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) #self-play finished, updating the move history if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop( 0) #remove the oldest gaming history # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) #adding new move record shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #save the previous net self.pnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #read the previous net pmcts = MCTS(self.game, self.pnet, self.args) #reset previous models' mcts #using new data to train the new model self.nnet.train( trainExamples) #trin the network with new move record nmcts = MCTS(self.game, self.nnet, self.args) #rest new models' mcts #OLD VS NEW print('PITTING AGAINST PREVIOUS VERSION') # rp = RandomPlayer(self.game).play # abp2 = AbpPlayer(self.game, 1, abpDepth=2).play arena = Arena( lambda board, turn: np.argmax( pmcts.getActionProb(board, turn, temp=0)), lambda board, turn: np.argmax( nmcts.getActionProb(board, turn, temp=0)), self.game) # arena = Arena(abp2, # lambda board, turn: np.argmax(nmcts.getActionProb(board, turn, temp=0)), self.game) pwins, nwins, draws = arena.playGames( self.args.arenaCompare) #playing new mode against old models print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: #OLD WIN! print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar' ) #using previous mode, as it beat new model else: #NEW WIN! print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='best.pth.tar' ) #save the new model, as this is the best
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector Turtle.m = 50.0 # Scaling factor Turtle.origin = Vector(400, 300) from Statue import * from Mouse import * from Cat import * statue = Statue(Turtle.origin + Vector(0, 0), 0) mouse = Mouse(Turtle.origin + Vector(0, -Turtle.m).rotate(40), 0) cat = Cat(Turtle.origin + Vector(0, -4 * Turtle.m).rotate(200), 0, mouse) tk = Tk() # Create a Tk top-level widget arena = Arena(tk, 800, 600) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen arena.add(statue) # Add a very simple, statue arena.add(mouse) # Add a green mouse centered at the base of the statue arena.add(cat) # Add a red cat tk.mainloop() # Enter the Tkinter event loop
string = "Oh no! You did not attain War Hammer.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!" elif riddle.getHammer() == 1: player.setHammer() string = "Congratulations! You have attained War Hammer!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!" home = Home(string) elif home.getTrial() == 3: string = "" card = CardChallenge() if card.getLuckStatus() == 0: string = "Oh no! You did not attain Gambler's Luck.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!" elif card.getLuckStatus() == 1: player.setLuck() string = "Congratulations! You have attained Gambler's Luck!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!" home = Home(string) elif home.getTrial() == 4: string = "" arena = Arena(player, dragon) if arena.getWinner() == 0: string = "Only a coward flees from battle!" break elif arena.getWinner() == 1: string = "You have slain the dragon! We are forever in your debt." break elif arena.getWinner() == 2: string = "Uh oh. Looks like you died. Well, better luck in the afterlife" break home = Home(string)
class GameEngine: def __init__(self, arena_width=501, arena_height=501, max_num_ghost=35000): self.arena_width = arena_width self.arena_height = arena_height self.max_num_ghost = max_num_ghost self.arena = Arena(self, arena_width, arena_height) self.players = {} self.ghosts = {} self.__sec_per_tick = .5 def update(self): for player in self.players.values(): player.early_update() for ghost in self.ghosts.values(): ghost.early_update() for player in self.players.values(): player.update() for ghost in self.ghosts.values(): ghost.update() players_to_delete = [] for player in self.players.values(): if player.is_dead: players_to_delete.append(player.id) for pid in players_to_delete: self.delete_player(pid) ghosts_to_delete = [] for ghost in self.ghosts.values(): if ghost.is_dead: ghosts_to_delete.append(ghost.id) for gid in ghosts_to_delete: self.delete_ghost(gid) self.arena.late_update() def get_arena(self): return self.arena def get_players(self): return self.players.values() def get_player(self, pid): return self.players[pid] def get_ghosts(self): return self.ghosts.values() def get_sec_per_tick(self): return self.__sec_per_tick def add_player(self, counter, name, x, y): player = Player(self, counter, name, x, y) self.players[counter] = player self.arena[x, y].insert_object_on_top(player) def add_ghost(self, counter, ghost_type, x, y): ghost = Ghost(self, counter, ghost_type, x, y) self.ghosts[counter] = ghost self.arena[x, y].insert_object_on_top(ghost) def new_player(self, player): self.players[player.get_id()] = player def new_ghost(self): raise Exception("Not implemented!") def delete_player(self, pid): p = self.players[pid] self.arena.lift(p) del self.players[pid] def delete_ghost(self, gid): g = self.ghosts[gid] self.arena.lift(g) del self.ghosts[gid]
from Arena import Arena from Stack import Stack from Tile import Tile from Wall import Wall import random # set gameover to False gameover = False # create initial Arena arena1 = Arena(15) # populate the Arena's tile dictionary with Tile objects for i in range(arena1.numtiles): arena1.tiles[i] = Tile(i) # populate the Arena's wall dictionary with Wall objects for w in range(arena1.numwalls): arena1.walls[w] = Wall(w) # populate the Arena's stack dictionary with Stack objects for k in range(arena1.numstacks): randsize = random.randint(10, 20) arena1.stacks[k] = Stack(k, randsize) # get list of unique tile positions for stacks alltiles = [x for x in arena1.tiles] alltiles.remove(112) stackpositions = random.sample(alltiles, arena1.numstacks) # remove stack positions from alltiles
def _out_of_arena(self): super()._out_of_arena() Arena.wrap_to_arena(self)
__author__ = 'zhengxiaoyu' from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector from Mouse import * from WalkingTurtle import * from Status import * from Cat import * little_mouse = Mouse(Vector(345,350), 1, 1) little_cat = Cat(Vector(800,350), 1 ,little_mouse) little_status = Status(Vector(450,350), 1) tk = Tk() # Create a Tk top-level widget arena = Arena(tk, little_cat) # Create an Arena widget, arena arena.pack() arena.add(little_mouse) arena.add(little_cat) arena.add(little_status) tk.mainloop() # Enter the Tkinter event loop
-- -- turtle.setstate(new_state) *simulates parallel behavior run - loops over step over and over again stop - stop running quit - quit the program """ from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Circle import Circle # Import our Turtle from Mouse import Mouse from Cat import Cat from Vector import * # Import everything from our Vector from random import randrange, uniform tk = Tk() # Create a Tk top-level widget arena = Arena(tk, width = 1000, height = 700) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen ''' Turtle(position, heading, outline, fill, width) position - vector telling where turtle to be placed heading - degrees, north = 0, east 90 outline - color, default to black fill - color of turtle, default white width - width of outline ''' def initializeStatue(center_x, center_y, radius): """ Creates a circular statue centered at CENTER_X and CENTER_Y with a radius of RADIUS. Returns the statue >>> statue = initializeStatue(200, 200, 2)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ #Generate a fixed sensing matrix if option is toggled to True. #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. #2)the folder which saves the fixed sensing matrix is empty if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') self.arena_game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type'] self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix #Save the fixed matrix self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- for i in range(1, self.args['numIters'] + 1): print('------ITER ' + str(i) + '------') if not self.skipFirstSelfPlay or i > 1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples iterationTrainExamples = deque( [], maxlen=self.args['maxlenOfQueue']) #bookkeeping objects contained in pytorch_classification.utils eps_time = AverageMeter() bar = Bar('Self Play', max=self.args['numEps']) end = time.time() #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. #----------------------------------------------------- for eps in range(self.args['numEps']): #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode() if self.args[ 'fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. self.game_args.generateSensingMatrix( self.args['m'], self.args['n'], self.args['matrix_type'] ) #generate a new sensing matrix self.game_args.generateNewObsVec( self.args['x_type'], self.args['sparsity'] ) #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!! self.mcts = MCTS( self.game, self.nnet, self.args, self.game_args ) #create new search tree for each game we play #TESTING------------------------- #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter)) #-------------------------------- #TESTING-------------------------- #print('Starting self-play game iteration: ' + str(eps)) #start_game = time.time() #-------------------------------- iterationTrainExamples += self.executeEpisode( ) #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game #TESTING-------------------------- #end_game = time.time() #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game)) #----------------------------------------------------- # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below. if len(self.trainExamplesHistory ) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file by calling saveTrainExamples method # The examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples( i - 1 ) #save examples to self.args['checkpoint'] folder with given iteration name of i-1 # shuffle examples before training #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques, #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training #samples in a single list, shuffled trainExamples = [] for e in self.trainExamplesHistory: #Each e is a deque trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train( trainExamples[0], trainExamples[1] ) #Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( self.pnet, self.nnet, self.game, self.args, self.arena_game_args ) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') else: #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('trainExamples feature arrays: ' + str(trainExamples[0])) #print('trainExamples label arrays: ' + str(trainExamples[1])) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder=self.args['network_checkpoint'], filename='trainHistDict' + str(i - 1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best')
# def setUpTest(cat_angle, mouse_angle, cat_radius): # """ Sets up testing environment for program, using cat_angle, mouse_angle, and cat_radius.""" # tk = Tk() # Create a Tk top-level widget # arena = Arena(tk, width = 1000, height = 700) # Create an Arena widget, arena # arena.pack() # statue = Circle(Vector(200, 200), 0, radius = 1) # mouse = Mouse(statue.position + unit(statue.heading + mouse_angle) * statue.radius * statue.scale, speed = 1, orbit = statue, debug_flag = True, degree = mouse_angle) # cat = Cat(statue.position + unit(statue.heading + cat_angle) * (statue.radius + cat_radius) * statue.scale, speed = 1, orbit = statue, mouse = mouse, arena = arena, radius = statue.radius + cat_radius, debug_flag = True, degree = cat_angle) # doctest.testmod(extraglobs={'test_statue': statue, 'test_mouse': mouse, 'test_cat': cat}) if __name__ == "__main__": tk = Tk() # Create a Tk top-level widget arena = Arena(tk, width=1000, height=700) # Create an Arena widget, arena arena.pack() statue = Circle(Vector(200, 200), 0, radius=1) mouse = Mouse( statue.position + mouse_start * statue.radius * statue.scale, speed=1, orbit=statue, debug_flag=True, degree=0 ) cat = Cat( statue.position + unit(statue.heading + 270) * (statue.radius + 1) * statue.scale, speed=1, orbit=statue, mouse=mouse, arena=arena, radius=statue.radius + 1, debug_flag=True, degree=270, )
sim = 100 #n2.load_checkpoint('/dev/8x50x25/','best.pth.tar') for i in range(24,101,25): lastwins = 0 prewins = 0 draw = 0 n2.load_checkpoint('./temp/Implement/deep3_feature',str(i+1)+'best.pth.tar')#last n3.load_checkpoint('./temp/Implement/origin',str(i+1)+'best.pth.tar')#pre args2 = dotdict({'numMCTSSims': sim, 'cpuct':1.0}) args3 = dotdict({'numMCTSSims': sim, 'cpuct':1.0}) #mcts2 = vmcts(g, n2, args2, visual()) mcts2 = lastmcts(g, n2, args2) mcts3 = mcts(g, n3, args3) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) n3p = lambda x: np.argmax(mcts3.getActionProb(x, temp=0)) arena = Arena(n3p, n2p, g) #arena = Arena(n3p, n2p, g, mcts2, visual()) pwins, nwins, draws = arena.playGames(100) print(i+1) print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))#nwins == n2pwin == oneloss lastwins += nwins prewins += pwins draw += draws print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (lastwins, prewins, draw))
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ epochswin = [] # count the number of wins at every epoch of the network against the preceding version epochdraw = [] # count the number of draws at every epoch of the network against the preceding version epochswingreedy = [] # count the number of wins against greedy at every epoch epochswinrandom = [] # count the number of wins against random at every epoch epochsdrawgreedy = [] # count the number of draws against greedy at every epoch epochsdrawrandom = [] # count the number of wins against random at every epoch epochswinminmax = [] # count the number of wins against minmax at every epoch epochsdrawminmax = [] # count the number of draws against minmax at every epoch begining=1 if self.args.load_model == True: file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ".txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswin.append(word) elif index == 1: epochdraw.append(word) file.close() file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ":greedyrandom.txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswingreedy.append(word) elif index == 1: epochsdrawgreedy.append(word) elif index == 2: epochswinrandom.append(word) elif index == 3: epochsdrawrandom.append(word) elif index == 4: epochswinminmax.append(word) elif index == 5: epochsdrawminmax.append(word) file.close() self.loadTrainExamples() file=open(self.args.trainExampleCheckpoint+"loopinformation","r+") lines=file.readlines() begining=lines[0] file.close() for i in range(int(begining), self.args.numIters + 1): fileLoopInformation = open(self.args.trainExampleCheckpoint + "loopinformation", "w+") fileLoopInformation.write(str(i)) fileLoopInformation.close() # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \ ":dim" + str(self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" print("path with filename "+filename) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) exists = os.path.isfile(filenameBest) if exists: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) filenameCurrent="currentforprocess:temp:iter" + str(self.args.numIters) + \ ":eps" + str(self.args.numEps) + ":dim" + str(self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint,filename=filenameCurrent) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game,nmcts,pmcts,evaluate=True, name=self.args.name) pwins, nwins, draws = arena.playGames(self.args.arenaCompare, False) pmcts.clear() nmcts.clear() del pmcts del nmcts print(' ') print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if i == 1: epochswin.append(pwins) epochdraw.append(0) epochswin.append(nwins) epochdraw.append(draws) self.writeLogsToFile(epochswin, epochdraw) ''' Get all the players and then pit them against the network. You need to modify here if you implement more players ''' (gp, rp, mp) = self.decidePlayers() if self.args.parallel == 0: nmcts1 = MCTS(self.game, self.nnet, self.args) nmcts2 = MCTS(self.game, self.nnet, self.args) nmcts3 = MCTS(self.game, self.nnet, self.args) arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, self.game,nmcts1 ,name=self.args.name) arenarandom = Arena(lambda x: np.argmax(nmcts2.getActionProb(x, temp=0)), rp, self.game,nmcts2 ,name=self.args.name) arenaminmax = Arena(lambda x: np.argmax(nmcts3.getActionProb(x, temp=0)), mp, self.game,nmcts3, evaluate=True,name=self.args.name) pwinsminmax, nwinsminmax, drawsminmax = arenaminmax.playGames(self.args.arenaCompare) print("minmax - "+str(pwinsminmax)+" "+str(nwinsminmax)+" "+str(drawsminmax)) pwinsgreedy, nwinsgreedy, drawsgreedy = arenagreedy.playGames(self.args.arenaCompare) print("greedy - "+str(pwinsgreedy)+" "+str(nwinsgreedy)+" "+str(drawsgreedy)) pwinsreandom, nwinsrandom, drawsrandom = arenarandom.playGames(self.args.arenaCompare) print("random - "+str(pwinsreandom)+" "+str(nwinsrandom)+" "+str(drawsrandom)) nmcts1.clear() nmcts2.clear() nmcts3.clear() del nmcts1 del nmcts2 del nmcts3 else: ''' This will be used if you want to evaluate the network against the benchmarks in a parallel way ''' self.args.update({'index': str(i)}) p = self.parallel(self.args.arenaCompare) (pwinsminmax, nwinsminmax, drawsminmax) = p[0] # self.parallel("minmax", self.args.arenaCompare) (pwinsgreedy, nwinsgreedy, drawsgreedy) = p[1] # self.parallel("greedy",self.args.arenaCompare) (pwinsreandom, nwinsrandom, drawsrandom) = p[2] # self.parallel("random",self.args.arenaCompare) epochsdrawgreedy.append(drawsgreedy) epochsdrawrandom.append(drawsrandom) epochswinrandom.append(pwinsreandom) epochswingreedy.append(pwinsgreedy) epochswinminmax.append(pwinsminmax) epochsdrawminmax.append(drawsminmax) self.writeLogsToFile(epochswingreedy, epochsdrawgreedy, epochswinrandom, epochsdrawrandom, epochswinminmax, epochsdrawminmax, training=False) if pwins + nwins == 0 or float(nwins) / (pwins + nwins) <= self.args.updateThreshold: print('REJECTING NEW MODEL') filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" exists = os.path.isfile(filenameBest) if exists: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) else: print('ACCEPTING NEW MODEL') filename = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) self.mcts.clear() del self.mcts self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True) # reset search tree self.writeLogsToFile(epochswin, epochdraw, training=True)
def runGame(catRadius, catAngle, mouseAngle): """ Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area. """ tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen statueObj = Statue(Vector(200, 200)) # Create statue at 200,200 arena.add(statueObj) # Add statue to arena mouseObj = Mouse(mouseAngle, statueObj) # Create mouse with statue passed as argument arena.add(mouseObj) # Add mouse to arena cat_radius_scaled = catRadius * scale_factor # Scale the radius of the cat to pixels catObj = Cat(catAngle, cat_radius_scaled, mouseObj) # Create cat with mouse passed as argument arena.add(catObj) # Add cat to arena arena.setLabels() # added (set labels to starting position) tk.mainloop() # Enter the Tkinter event loop
class Room(object): def __init__(self, rid, host, max_user_num=2, arena_conf_filename='Configuration.ArenaConf', player_conf_filename='Configuration.PlayerConf'): super(Room, self).__init__() self.rid = rid self.host = host self.arena = None self.max_user_num = max_user_num self.username_to_user_map = {} # Generate dispatcher self.dispatcher = Dispatcher() # Configuration file self.arena_conf_filename = arena_conf_filename self.player_conf_filename = player_conf_filename # received message self.msg_dict = None def generate_msg_dict(self): from common.events import MsgCSPlayerMove self.msg_dict = {conf.MSG_CS_PLAYER_MOVE: MsgCSPlayerMove()} def register_dispatcher_services(self): self.dispatcher.register(conf.ARENA_SERVICES, ArenaServices(self.host, self.arena)) def dispatch(self, msg, client_hid): self.dispatcher.dispatch(msg, client_hid) def handle_received_msg(self, msg_type, data, client_hid): if msg_type in self.msg_dict: msg = self.msg_dict[msg_type] msg.unmarshal(data) self.dispatcher.dispatch(msg, client_hid) else: print "Can't handle received message in room" def tick(self): if self.arena: self.arena.tick() def start_game(self): # Can't start game when game is running if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: return False self.arena = Arena(self.host, self.arena_conf_filename, self.player_conf_filename) self.register_dispatcher_services() self.generate_msg_dict() # Send start game message to all roommates data = MsgSCStartGame().marshal() for k, v in self.username_to_user_map.items(): self.host.sendClient(v.client_hid, data) self.arena.start_game(self.username_to_user_map) def add_user(self, user): if self.username_to_user_map.has_key(user.username) == False and\ len(self.username_to_user_map) >= self.max_user_num: return False # room is full # user back again if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: self.arena.player_enter_again(user) return True self.username_to_user_map[user.username] = user self.broadcast_roommate_add(user.username) if len(self.username_to_user_map) >= self.max_user_num: self.start_game() return True def remove_user(self, user): if self.username_to_user_map.has_key(user.username) is False: return False # user not find if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: self.arena.player_leave(user.client_hid) else: del self.username_to_user_map[user.username] self.broadcast_roommate_del(user.username) if len(self.username_to_user_map) <= 0: return True return False def broadcast_roommate_add(self, username): msg = MsgSCRoommateAdd(username) data = msg.marshal() for username, user in self.username_to_user_map.items(): self.host.sendClient(user.client_hid, data) def broadcast_roommate_del(self, username): msg = MsgSCRoommateDel(username) data = msg.marshal() for username, user in self.username_to_user_map.items(): self.host.sendClient(user.client_hid, data) # game over return True else False def is_valid(self): if self.arena and self.arena.is_game_stop: return False else: return True
# Setup NN net = JasonNet() current_NN = net best_NN = net if not os.path.isdir("model_data"): os.mkdir("model_data") logger.info("Starting to train...") for i in range(args.iteration, args.total_iterations): logger.info(F"Iteration {i}") # Play a number of Episodes (games) of self play to generate data generate_data(current_NN, episodes, search_depth, i) # original monte carlo #run_monte_carlo(current_NN, 0, i, episodes, search_depth) # Train NN from dataset of monte carlo tree search above train_net(current_NN, i, args.lr, args.bs, args.epochs) # Fight new version against reigning champion in the Arena # Even with first iteration just battle against yourself arena = Arena(best_NN, current_NN) best_NN = arena.battle(episodes // 2, search_depth) # Save the winning net as a Pickle for battle later save_as_pickle(i, best_NN) print("End of the main driver program. Training has completed!")
def p1(x, turn): if turn <= 2: mcts1.reset() temp = args.temp if turn <= args.tempThreshold else args.arenaTemp policy = mcts1.getActionProb(x, temp=temp) return np.random.choice(len(policy), p=policy) for i in range(model_count): file = Path(networks[i]) print(f'{short_name} vs {file.stem}') nnet2.load_checkpoint(folder='checkpoint', filename=file.name) if args.numMCTSSims <= 0: p2 = NNPlayer(g, nnet2, args.arenaTemp).play else: mcts2 = MCTS(g, nnet2, args) def p2(x, turn): if turn <= 2: mcts2.reset() temp = args.temp if turn <= args.tempThreshold else args.arenaTemp policy = mcts2.getActionProb(x, temp=temp) return np.random.choice(len(policy), p=policy) arena = Arena(p1, p2, g) p1wins, p2wins, draws = arena.playGames(args.arenaCompare) writer.add_scalar(f'Win Rate vs {short_name}', (p2wins + 0.5 * draws) / args.arenaCompare, i * args.x) print(f'wins: {p1wins}, ties: {draws}, losses:{p2wins}\n') writer.close()
from Arena import Arena from IPython.display import clear_output arena = Arena() while True: print(''' 1. Create pokemon 2. Show Existing pokemon 3. Battle 4. Quit ''') choice = int(input('Enter your choice:')) if choice == 1: arena.create_pokemon() elif choice == 2: arena.show_all() pass elif choice == 3: clear_output() arena.battle() pass else: break
# Bookkeeping to prepare for the next step self.qOld = qPrime np.save("./pegWeights.npy", self.peggingWeights) if __name__ == '__main__': # Initialize variables player1 = LinearB(1, 0.5, 0.9, False) player2 = Myrmidon(2, 5, False) numHands = 5000 repeatFlag = False windowSize = 100 # Create and run arena arena = Arena([player1, player2], repeatFlag) results = arena.playHands(numHands) # Plot results from arena x = np.arange(1, numHands + 1 - windowSize, 1) y0 = np.zeros(len(results[0]) - windowSize) avgResult0 = np.average(results[0]) mu0 = np.zeros(len(y0)) y1 = np.zeros(len(results[1]) - windowSize) avgResult1 = np.average(results[1]) mu1 = np.zeros(len(y1)) y2 = np.zeros(len(results[2]) - windowSize) avgResult2 = np.average(results[2]) mu2 = np.zeros(len(y2)) for i in range(len(x)):
twoWon += i[1] draws += i[2] print("Model 1 Win:", oneWon, " Model 2 Win:", twoWon, " Draw:", draws) g = OthelloGame(6) # parallel version #ParallelPlay(g) # single process version # all players rp = RandomPlayer(g).play gp = GreedyOthelloPlayer(g).play hp = HumanOthelloPlayer(g).play # nnet players n1 = NNet(g) n1.load_checkpoint('./pretrained_models/othello/pytorch/', '6x100x25_best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) #n2 = NNet(g) #n2.load_checkpoint('/dev/8x50x25/','best.pth.tar') #args2 = dotdict({'numMCTSSims': 25, 'cpuct':1.0}) #mcts2 = MCTS(g, n2, args2) #n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) arena = Arena.Arena(n1p, hp, g, display=display) print(arena.playGames(2, verbose=True))
import random from Tkinter import * # Import everything from Tkinter from Mouse2 import Mouse2 from Arena import Arena # Import our Arena from Statue import Statue # Import our Turtle from Vector import * # Import everything from our Vector from Cat import Cat # random vector for center of statue random.seed() tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen initialStatPosition = Vector(random.randint(200, 600), random.randint(200, 400)) #initialCatAngle=random.randint(0,359) #initialCatRadius=random.randint(200,500) #initialMouseAngle=random.randint(0,359) def inputdata(): """ :return: instead, input the cat angle, mouse angle, and cat radius manually """ initialCatAngle = float(input('Input Cat angle(degrees):')) initialMouseAngle = float(input('Input Mouse angle(degrees):')) initialCatRadius = float(input('Input Cat radius (m)')) if initialCatRadius < 1.:
#Name,Player Instance, Is Stochastic players=[('RandomPlayer',rp,1), ('GreedyPlayer',gp,0), ('AntiGreedyPlayer',agp,0), ('CompositeGreedyPlayer',cgp,0), ('mini-AlphaZero',azp1,1)] ############################################################################### num_trial=100 results=np.zeros(shape=(len(players),len(players))) for i in range(len(players)): for j in range(len(players))[0:i+1]: arena=Arena(player1=players[i][1],player2=players[j][1],env=env,display=None) actual_num_trial=2 if (players[i][2]==0 and players[j][2]==0) else num_trial #playerが両方とも決定論的な場合は2回で済ます. one_win,two_win,draw=arena.play_games(actual_num_trial,verbose=False) print(players[i][0],' vs ',players[j][0],' : ',one_win/float(actual_num_trial),' wins.') results[i,j]=one_win/float(actual_num_trial) ############################################################################### import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df=pd.DataFrame(results,index=[x[0] for x in players],columns=[x[0] for x in players])
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping log.info(f'Starting Iter #{i} ...') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc="Self Play"): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: log.warning( f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}" ) self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) log.info('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: log.info('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: log.info('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
our_player = MCTS(game, neural_network, args) # old_nn_player = MCTS(game, old_nn, args) other_player = NN_player_wrapper() def player_ramdon(board): game_2 = Game(men_count=9) valids = game_2.get_valid_moves(board,1) valids_ind = np.where(valids) choise = np.random.choice(valids_ind[0]) return choise # print('Let the fight Begin') arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), player_ramdon, game, lambda x: game.print_board(x),"Alpha_Zero", "Random") print(arena.playGames(40, verbose=True)) # # print('Let the fight Begin') # arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), # other_player, # game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Other") # print(arena.playGames(20, verbose=True)) # print('Let the fight Begin') # arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), # player_ramdon, game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Ramdom") #
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) #if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold: if False: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
from Tkinter import * from Arena import Arena from WalkingTurtle import WalkingTurtle from Vector import * tk = Tk() arena = Arena(tk) arena.pack() arena.add(WalkingTurtle(Vector(200, 300), 0, 1, fill='turquoise')) arena.add(WalkingTurtle(Vector(600, 300), 0, 1, fill='purple')) tk.mainloop()
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Cat import Cat # Import our Cat from Mouse import Mouse # Import our Statue from Statue import Statue # Import our Statue from Vector import * # Import everything from our Vector from globalVars import * # Import everything from globalVars from random import random # Import random tk = Tk() # Create a Tk top-level widget arena = Arena(tk, 800, 600, padx=12, pady=6) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen midX = arena.width/2 # Horizontal center of window midY = arena.height/2 # Vertical center of window mouseAngle = random()*360*scaleRad # Random mouse angle to initialize catAngle = random()*360*scaleRad # Random cat angle to initialize catRadius = 5 # Random cat radius to initialize statue = Statue(Vector(midX,midY), 0) # Create a statue in center of arena, arbitrary heading arena.add(statue) # Add statue mouse = Mouse(Vector(midX + statue.radius*scalePixel*cos(mouseAngle), midY - statue.radius*scalePixel*sin(mouseAngle)), 0, arena, statue) # Create a mouse at right edge of statue, arbitrary heading since it will be overwritten in initialization arena.add(mouse) # Add mouse cat = Cat(Vector(midX + catRadius*scalePixel*cos(catAngle), midY - catRadius*scalePixel*sin(catAngle)), 0, arena, statue, mouse) # Create a cat at given angle and radius, arbitrary heading since it will be overwritten in initialization arena.add(cat, "cat") # Add cat and specify that it's a cat as extra argument tk.mainloop() # Enter the Tkinter event loop
testUnit = Hero(name="Ordeus", healthRange=(70, 80), strengthRange=(70, 80), defenceRange=(45, 55), speedRange=(40, 50), luckRange=(10, 30)) testUnit2 = Beast(name="Beast", healthRange=(60, 90), strengthRange=(60, 90), defenceRange=(40, 60), speedRange=(40, 60), luckRange=(25, 40)) testArena = Arena(name="Emagia", fighter1=testUnit, fighter2=testUnit2, maxTurns=20) class TestArena(unittest.TestCase): def test_type(self): self.assertRaises(TypeError, Arena.ValidateInputs, testArena) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, True, testUnit, testUnit2, 20) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, 12, testUnit, testUnit2, 20) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, "Orderus", testUnit, testUnit2, "20") self.assertRaises(TypeError, Arena.ValidateInputs, testArena, "Orderus", testUnit, testUnit2, 20.0) self.assertRaises(TypeError, Arena.ValidateInputs, testArena,
from WalkingTurtle import * from Statue import Statue from Mouse import Mouse import Cat meter = 20 # How many pixels is one meter? statue_radius = 1# What is the statue's radius cat_radius = 1.5 cat_angle = 0 mouse_angle = 45 x = Vector() print x.length() tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen s = Statue(Vector(200,200),statue_radius,meter) arena.add(s) m = Mouse(s,mouse_angle) c = Cat.Cat(cat_angle,cat_radius,m) arena.add(c) arena.add(m) arena.cat_rad.set('CatRadius: '+str(cat_radius)) arena.cat_rad_label = Label(arena,textvariable=arena.cat_rad) arena.cat_rad_label.pack() arena.cat_ang.set('CatAngle: '+str(cat_angle)) arena.cat_ang_label = Label(arena,textvariable=arena.cat_ang) arena.cat_ang_label.pack() arena.mouse_ang.set('CatAngle: '+str(mouse_angle)) arena.mouse_ang_label = Label(arena,textvariable=arena.mouse_ang)
def learn(self): """ main loop of the training loop """ if self.args.load_model: start = self.args.load_example[1] + 1 else: start = 1 for i in range(start, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration greedy = i == 1 and not self.args.load_model if not self.skipFirstSelfPlay or i > 1: iteration_train_examples = deque( [], maxlen=self.args.maxlenOfQueue) num_eps = self.args.numEps if greedy: num_eps = self.args.greedy_eps eps_time = AverageMeter() bar = Bar('Self Play', max=num_eps) end = time.time() for eps in range(num_eps): if greedy: iteration_train_examples += self.execute_initialize_episode( ) else: iteration_train_examples += self.execute_episodes() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num_eps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history if not greedy: self.trainExamplesHistory.append(iteration_train_examples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) else: trainExamples = iteration_train_examples # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.nnet.train(trainExamples) if not greedy: pmcts = MCTSSingle(self.game, self.pnet, self.args) nmcts = MCTSSingle(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(pmcts, nmcts, self.game, self.args) scores = arena.playGames(self.args.arenaCompare) if scores[1] == 0 or float( scores[1]) / sum(scores) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_1.h5')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') print(str(self.game.innerN) + "x" + str(self.game.innerM)) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts = MCTS(self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) tempfile = 'temp.pth.tar' bestfile = 'best.pth.tar' # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile) self.nnet.train(trainExamples) if self.arenaEnabled: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) pmcts = MCTS(self.pnet, self.args) nmcts = MCTS(self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0), lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
gladius = weapons["Gladius"] char1 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml")) char1.name = "Hugo" action1 = Action(char1, hieb0, gladius) reaction1 = Action(char1, block0, gladius) char2 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml")) char2.name = "Herbert" action2 = Action(char2, hieb0, gladius) reaction2 = Action(char2, block0, gladius) arena = Arena() arena.addTeam("blue", [char1]) arena.addTeam("red", [char2]) print (arena.getCharacters()) # Create a Player for each character player1 = RandomPlayer("Player1", char1, arena, action1, reaction1) player2 = RandomPlayer("Player2", char2, arena, action2, reaction2) players = [player1, player2] points = [0,0] for round in range(100): # reset chars char1.reset() char2.reset()
def runGame(catRadius, catAngle, mouseAngle): """ Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area. """ tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen statueObj = Statue(Vector(200,200)) # Create statue at 200,200 arena.add(statueObj) # Add statue to arena mouseObj = Mouse(mouseAngle, statueObj) # Create mouse with statue passed as argument arena.add(mouseObj) # Add mouse to arena cat_radius_scaled = catRadius * scale_factor # Scale the radius of the cat to pixels catObj = Cat(catAngle, cat_radius_scaled, mouseObj) # Create cat with mouse passed as argument arena.add(catObj) # Add cat to arena arena.setLabels() # added (set labels to starting position) tk.mainloop() # Enter the Tkinter event loop
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen turtle = Turtle(Vector(200, 200), 0) arena.add(turtle) # Add a very simple, basic turtle tk.mainloop() # Enter the Tkinter event loop
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen arena.add(Turtle(Vector(200,200), 0)) # Add a very simple, basic turtle tk.mainloop() # Enter the Tkinter event loop
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(self.args.startIter, self.args.numIters + self.args.startIter): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.args.skipFirstSelfPlay or i > self.args.startIter: #or i > 1 iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) n_processes: int = 4 # or, if you have enough compute (and VRAM): # n_processes = mp.cpu_count() with mp.Pool(n_processes) as pool: self.nnet.nnet.share_memory() pbar = tqdm.tqdm(range(self.args.numEps), position=0) pbar.set_description( f"Self Play using {n_processes} processes") for eps in pbar: # Arguments for each worker. worker_args = [(i, self.game, self.args, self.nnet) for i in range(n_processes)] # Apply the executeEpisode method on each argument: for worker_examples in pool.starmap( Coach.executeEpisode, worker_args): iterationTrainExamples.extend(worker_examples) # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) if not self.args.skipFirstTrain or i > self.args.startIter: # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') #pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='new.pth.tar') #nmcts = MCTS(self.game, self.nnet, self.args) pmcts = MCTS(self.game, self.pnet, self.args) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(pmcts, nmcts, self.game, self.args.resignationOn, self.args.resignationThreshold) #arena = Arena(lambda state,player: np.where(pmcts.getActionProb(state,player, temp=0) == 1)[0][0], # lambda state,player: np.where(nmcts.getActionProb(state,player, temp=0) == 1)[0][0], self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')