def __init__(self, screen, currentLevel): Arena.__init__(self) currentLevel = 2 self.entities = [] self.keyListeners = [] self.accelerators = [] self.npcs = [] # on a timer, we reset which npc emit attractive and repulsive forces self.foodieAcceleratorReset = 5000 # ms # this counter is incremented and reset on each repopulation self.foodieAcceleratorResetTimer = 0 # we roll an n sided die for each Foodie to decide whether it will # become an accelerator on this reset self.foodieAcceleratorResetDice = 5 # init npcs maxSize = 50.0 minSize = 6.0 screenW = float(screen.get_width()) screenH = float(screen.get_height()) # rotation accel rates maxRotVel = math.pi / 20.0 maxRotAcc = math.pi / 40.0 for i in range(Variables.lvlFoodCount[currentLevel]): size = minSize + random.random() * (maxSize - minSize) pos = [0.0,0.0] pos[0] += int(random.random() * screenW) pos[1] += int(random.random() * screenH) prey = Prey(size, pos, maxRotVel, maxRotAcc, self) self.entities.append(prey) for i in range(Variables.lvlEaterCount[currentLevel]): size = minSize + random.random() * (maxSize - minSize) pos = [0.0,0.0] pos[0] += int(random.random() * screenW) pos[1] += int(random.random() * screenH) predator = Predator(size, pos, maxRotVel, maxRotAcc, self) self.entities.append(predator) # init players c = CircleGuy(self) m = MouseEntity(self) self.accelerators += [m] self.entities += [c, m] self.keyListeners += [c, m]
def open(self): self.isopen = True; self.moving = False; self.arena = Arena.getInstance() self.sched = Scheduler() self.sched.start() self.sched.add_interval_job(self.calc_pos, seconds=0.05) self.sched.add_interval_job(self.send_pos, seconds=0.05) self.keys = Keys() self.movement_enabled = True self.dirty_coll = True self.gravity = 0 self.jumping = False self.pos_transaction = None self.id = self.arena.addPlayer(self) self.active = False self.name = None self.arena.Join(self.id) self.cur_weapon = None self.weapons = [] self.add_weapon(Weapon("Mr. Default Gun")) self.maxhealth = 100 self.health = self.maxhealth self.pos = Vec3() self.rot = Vec2() self.spawn() print 'new connection'
def update(self, dT): Arena.update(self, dT) ''' self.foodieAcceleratorResetTimer += dT if self.foodieAcceleratorResetTimer > self.foodieAcceleratorReset: print "RESETING ACCELERATIN FOODIES" # remove all foodies from the accelerators list for a in self.accelerators: if isinstance(a, Foodie): self.accelerators.remove(a) # randomly add some foodies back in for f in self.foodies: if random.randint(0, self.foodieAcceleratorResetDice) == 0: self.accelerators.append(f) self.foodieAcceleratorResetTimer = 0 ''' pass
def play_test_case(radius, cat_angle, mouse_angle): m, c = make_test_case(radius, cat_angle, mouse_angle) s = m.statue tk = Tk() arena = Arena (tk, meter*10, meter*10) arena.add(s) arena.add(c) arena.add(m) arena.pack() tk.mainloop()
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # if 8 < get_hour() < 23: # log.warning('Sleeping to save CPU...') # while 8 < get_hour() < 23: # time.sleep(60) # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc="Self Play", ncols=100): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) while len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NOTE! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) random.seed(time.time()) random.shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print() print('Results') print(f'Won: {nwins}') print(f'Lost: {pwins}') if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_best.pth.tar') self.saveTrainExamples('best') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
if keys[pygame.K_UP]: if direction[1] != 1: direction = (0, -1) break elif keys[pygame.K_DOWN]: if direction[1] != -1: direction = (0, 1) break if keys[pygame.K_LEFT]: if direction[0] != 1: direction = (-1, 0) break elif keys[pygame.K_RIGHT]: if direction[0] != -1: direction = (1, 0) break newArena.movesnake(direction, snake) timer += clock.tick(100) width, height = 320, 320 newArena = Arena(30, 30) win = pygame.display.set_mode((width, height)) # pygame.display.flip() menu_screen()
def sleep(self): Magni.health = 1000 Arena.sleep(Arena)
def learn(self): #generate or load a matrix if fixed matrix set to True. We save a Game_args object in Coach in case A is fixed so when we #initialize multiple MCTS objects below, we do not have to store multiple copies of A. if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') else: self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #keep track of learning time learning_start = time.time() #start training iterations for i in range(1, self.args['numIters']+1): print('------ITER ' + str(i) + '------') #If we are not loading a set of training data.... then: if not self.skipFirstSelfPlay or i>1: #1)Initialize empty deque for storing training data after every eps in the iteration has been processed iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue']) #3)Start search. A single search consists of a synchronous search over ALL eps in the current batch. #Essentially the number of MCTS trees that must be maintained at once is equal to number of eps in current batch for j in range(self.args['num_batches']): #INITIALIZATION STEP--------------------------------------- #Each element in MCTS_States_list is in the form of (MCTS object, [list of States root traversed]) MCTS_States_list = [] batchTrainExamples = [] #Initialize bookkeeping print('Generating Self-Play Batch ' + str(j) + ':') bar = Bar('Self Play', max = self.args['eps_per_batch']) #Initialize MCTS_States_list. Number of pairs in MCTS_States_list should equal eps_per_batch for ep in range(self.args['eps_per_batch']): #Initialize Game_args() for MCTS temp_game_args = Game_args() if self.args['fixed_matrix'] == False: temp_game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) else: temp_game_args.sensing_matrix = self.game_args.sensing_matrix temp_game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity']) #Initialize MCTS and the first state for each MCTS temp_MCTS = MCTS(self.game, self.nnet, self.args, temp_game_args, identifier = int(str(j) + str(ep))) temp_init_state = self.game.getInitBoard(self.args, temp_game_args, identifier = int(str(j) + str(ep))) #Append to MCTS_States_list MCTS_States_list.append([temp_MCTS, [temp_init_state]]) #initialize some variables for bookkeeping bar in terminal current_MCTSStateslist_size = len(MCTS_States_list) completed_episodes = 0 total_completed_eps = 0 #Initialize Threading Class. Needed to call threaded_mcts below. threaded_mcts = Threading_MCTS(self.args, self.nnet) #---------------------------------------------------------- #While MCTS_States_list is nonempty, advance each episode in MCTS_States_list by one move. #continue advancing by one move until MCTS_States_list is empty, meaning that all games are completed. #When a game is completed, its corresponding pair should be removed from MCTS_States_list #---------------------------------------------------------- self_play_batchstart = time.time() while MCTS_States_list: #advanceEpisodes returns new MCTS_States_list with all elements having advanced one move, and removes all completed games #advanceEpisodes also returns a set of new trainExamples for games which have been completed after calling advanceEpisodes MCTS_States_list, trainExamples = self.advanceEpisodes(MCTS_States_list, threaded_mcts) #save the States_list states whose last arrived node is a terminal node. These will be used as new training samples. batchTrainExamples += trainExamples #for bookkeeping bar in the output of algorithm if len(MCTS_States_list) < current_MCTSStateslist_size: completed_episodes = current_MCTSStateslist_size - len(MCTS_States_list) current_MCTSStateslist_size = len(MCTS_States_list) total_completed_eps += completed_episodes #advance bookkeeping bar if size of MCTS_States_list becomes smaller. #bar.next() only advances and outputs the progress bar #bar.suffix only outputs the suffix text after "|" bar.suffix = '({eps_completed}/{maxeps})'.format(eps_completed = total_completed_eps, maxeps=self.args['eps_per_batch']) #advance the progress bar completed_episodes times for k in range(completed_episodes): bar.next() #---------------------------------------------------------- #end the tracking of the bookkeeping bar bar.finish() self_play_batchend = time.time() print('All Self-Play Games in batch have been played to completion.') print('Total time taken for batch: ', self_play_batchend - self_play_batchstart) iterationTrainExamples += batchTrainExamples #Add the training samples generated in a single training iteration to self.trainExamplesHistory #This step is the last line included in "if not self.skipFirstSelfPlay or i>1:" block self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here if self.skipFirstSelfPlay returns True or i<=1 #Once iterationTrainExamples has been completed, we will use these iterationTrainExamples to retrain the Neural Network. if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) #save trainExamplesHistory list of Coach self.saveTrainExamples(i-1) #move all training samples from trainExamplesHistory to trainExamples for shuffling #shuffle trainExamples trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('') #print('feature arrays shape: ', trainExamples[0][0].shape, trainExamples[0][1].shape) #print('trainExamples feature arrays: ', trainExamples[0]) #print('') #print('label arrays shape: ', trainExamples[1][0].shape, trainExamples[1][1].shape) #print('trainExamples label arrays: ', trainExamples[1]) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best') #Compute total time to run alphazero learning_end = time.time() print('----------TRAINING COMPLETE----------') print('Total training time: ', learning_end - learning_start)
def _out_of_arena(self): super()._out_of_arena() if uniform(0,1) < ASTEROIDS_DESPAWN_PROB: #a volte se ne vanno self.Dispose() else: Arena.wrap_to_arena(self)
from RockBot import RockBot from ScissorsBot import ScissorsBot from PaperBot import PaperBot from BeatLastBot import BeatLastBot from RockThenPaperThenScissorsBot import RockThenPaperThenScissorsBot from ScissorsThenPaperThenRockBot import ScissorsThenPaperThenRockBot from Arena import Arena if __name__ == '__main__': rockBot = RockBot() scissorsBot = ScissorsBot() paperBot = PaperBot() beatLastBot = BeatLastBot() rockThenPaperThenScissorsBot = RockThenPaperThenScissorsBot() scissorsThenPaperThenRockBot = ScissorsThenPaperThenRockBot() arena = Arena() arena.battle(beatLastBot, rockThenPaperThenScissorsBot, 20) arena.__init__() beatLastBot.__init__() arena.battle(beatLastBot, scissorsThenPaperThenRockBot, 20)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ trainExamples = deque([], maxlen=self.args.maxlenOfQueue) for i in range(self.args.numIters): # bookkeeping print('------ITER ' + str(i + 1) + '------') eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): trainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pnet = self.nnet.__class__(self.game) pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins)) if float(nwins) / (pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet = pnet else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): #for number of rounds # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque( [], maxlen=self.args.maxlenOfQueue ) #remove the previous training example eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range( self.args.numEps): #for each self-play of this rounds self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)] # v is the result selfPlayResult = self.executeEpisode() #play one game, adding the gaming history iterationTrainExamples += selfPlayResult # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) #self-play finished, updating the move history if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop( 0) #remove the oldest gaming history # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) #adding new move record shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #save the previous net self.pnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #read the previous net pmcts = MCTS(self.game, self.pnet, self.args) #reset previous models' mcts #using new data to train the new model self.nnet.train( trainExamples) #trin the network with new move record nmcts = MCTS(self.game, self.nnet, self.args) #rest new models' mcts #OLD VS NEW print('PITTING AGAINST PREVIOUS VERSION') # rp = RandomPlayer(self.game).play # abp2 = AbpPlayer(self.game, 1, abpDepth=2).play arena = Arena( lambda board, turn: np.argmax( pmcts.getActionProb(board, turn, temp=0)), lambda board, turn: np.argmax( nmcts.getActionProb(board, turn, temp=0)), self.game) # arena = Arena(abp2, # lambda board, turn: np.argmax(nmcts.getActionProb(board, turn, temp=0)), self.game) pwins, nwins, draws = arena.playGames( self.args.arenaCompare) #playing new mode against old models print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: #OLD WIN! print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar' ) #using previous mode, as it beat new model else: #NEW WIN! print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='best.pth.tar' ) #save the new model, as this is the best
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector Turtle.m = 50.0 # Scaling factor Turtle.origin = Vector(400, 300) from Statue import * from Mouse import * from Cat import * statue = Statue(Turtle.origin + Vector(0, 0), 0) mouse = Mouse(Turtle.origin + Vector(0, -Turtle.m).rotate(40), 0) cat = Cat(Turtle.origin + Vector(0, -4 * Turtle.m).rotate(200), 0, mouse) tk = Tk() # Create a Tk top-level widget arena = Arena(tk, 800, 600) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen arena.add(statue) # Add a very simple, statue arena.add(mouse) # Add a green mouse centered at the base of the statue arena.add(cat) # Add a red cat tk.mainloop() # Enter the Tkinter event loop
string = "Oh no! You did not attain War Hammer.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!" elif riddle.getHammer() == 1: player.setHammer() string = "Congratulations! You have attained War Hammer!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!" home = Home(string) elif home.getTrial() == 3: string = "" card = CardChallenge() if card.getLuckStatus() == 0: string = "Oh no! You did not attain Gambler's Luck.\nYou can continue to your next trial, but you might want to try again.\nYou may need it later!" elif card.getLuckStatus() == 1: player.setLuck() string = "Congratulations! You have attained Gambler's Luck!\nYou may choose your next trial.\nIf you have completed all the trials, you are ready to face the dragon!" home = Home(string) elif home.getTrial() == 4: string = "" arena = Arena(player, dragon) if arena.getWinner() == 0: string = "Only a coward flees from battle!" break elif arena.getWinner() == 1: string = "You have slain the dragon! We are forever in your debt." break elif arena.getWinner() == 2: string = "Uh oh. Looks like you died. Well, better luck in the afterlife" break home = Home(string)
class GameEngine: def __init__(self, arena_width=501, arena_height=501, max_num_ghost=35000): self.arena_width = arena_width self.arena_height = arena_height self.max_num_ghost = max_num_ghost self.arena = Arena(self, arena_width, arena_height) self.players = {} self.ghosts = {} self.__sec_per_tick = .5 def update(self): for player in self.players.values(): player.early_update() for ghost in self.ghosts.values(): ghost.early_update() for player in self.players.values(): player.update() for ghost in self.ghosts.values(): ghost.update() players_to_delete = [] for player in self.players.values(): if player.is_dead: players_to_delete.append(player.id) for pid in players_to_delete: self.delete_player(pid) ghosts_to_delete = [] for ghost in self.ghosts.values(): if ghost.is_dead: ghosts_to_delete.append(ghost.id) for gid in ghosts_to_delete: self.delete_ghost(gid) self.arena.late_update() def get_arena(self): return self.arena def get_players(self): return self.players.values() def get_player(self, pid): return self.players[pid] def get_ghosts(self): return self.ghosts.values() def get_sec_per_tick(self): return self.__sec_per_tick def add_player(self, counter, name, x, y): player = Player(self, counter, name, x, y) self.players[counter] = player self.arena[x, y].insert_object_on_top(player) def add_ghost(self, counter, ghost_type, x, y): ghost = Ghost(self, counter, ghost_type, x, y) self.ghosts[counter] = ghost self.arena[x, y].insert_object_on_top(ghost) def new_player(self, player): self.players[player.get_id()] = player def new_ghost(self): raise Exception("Not implemented!") def delete_player(self, pid): p = self.players[pid] self.arena.lift(p) del self.players[pid] def delete_ghost(self, gid): g = self.ghosts[gid] self.arena.lift(g) del self.ghosts[gid]
from Arena import Arena from Stack import Stack from Tile import Tile from Wall import Wall import random # set gameover to False gameover = False # create initial Arena arena1 = Arena(15) # populate the Arena's tile dictionary with Tile objects for i in range(arena1.numtiles): arena1.tiles[i] = Tile(i) # populate the Arena's wall dictionary with Wall objects for w in range(arena1.numwalls): arena1.walls[w] = Wall(w) # populate the Arena's stack dictionary with Stack objects for k in range(arena1.numstacks): randsize = random.randint(10, 20) arena1.stacks[k] = Stack(k, randsize) # get list of unique tile positions for stacks alltiles = [x for x in arena1.tiles] alltiles.remove(112) stackpositions = random.sample(alltiles, arena1.numstacks) # remove stack positions from alltiles
def _out_of_arena(self): super()._out_of_arena() Arena.wrap_to_arena(self)
__author__ = 'zhengxiaoyu' from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector from Mouse import * from WalkingTurtle import * from Status import * from Cat import * little_mouse = Mouse(Vector(345,350), 1, 1) little_cat = Cat(Vector(800,350), 1 ,little_mouse) little_status = Status(Vector(450,350), 1) tk = Tk() # Create a Tk top-level widget arena = Arena(tk, little_cat) # Create an Arena widget, arena arena.pack() arena.add(little_mouse) arena.add(little_cat) arena.add(little_status) tk.mainloop() # Enter the Tkinter event loop
-- -- turtle.setstate(new_state) *simulates parallel behavior run - loops over step over and over again stop - stop running quit - quit the program """ from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Circle import Circle # Import our Turtle from Mouse import Mouse from Cat import Cat from Vector import * # Import everything from our Vector from random import randrange, uniform tk = Tk() # Create a Tk top-level widget arena = Arena(tk, width = 1000, height = 700) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen ''' Turtle(position, heading, outline, fill, width) position - vector telling where turtle to be placed heading - degrees, north = 0, east 90 outline - color, default to black fill - color of turtle, default white width - width of outline ''' def initializeStatue(center_x, center_y, radius): """ Creates a circular statue centered at CENTER_X and CENTER_Y with a radius of RADIUS. Returns the statue >>> statue = initializeStatue(200, 200, 2)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ #Generate a fixed sensing matrix if option is toggled to True. #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. #2)the folder which saves the fixed sensing matrix is empty if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') self.arena_game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type'] self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix #Save the fixed matrix self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- for i in range(1, self.args['numIters'] + 1): print('------ITER ' + str(i) + '------') if not self.skipFirstSelfPlay or i > 1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples iterationTrainExamples = deque( [], maxlen=self.args['maxlenOfQueue']) #bookkeeping objects contained in pytorch_classification.utils eps_time = AverageMeter() bar = Bar('Self Play', max=self.args['numEps']) end = time.time() #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. #----------------------------------------------------- for eps in range(self.args['numEps']): #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode() if self.args[ 'fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. self.game_args.generateSensingMatrix( self.args['m'], self.args['n'], self.args['matrix_type'] ) #generate a new sensing matrix self.game_args.generateNewObsVec( self.args['x_type'], self.args['sparsity'] ) #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!! self.mcts = MCTS( self.game, self.nnet, self.args, self.game_args ) #create new search tree for each game we play #TESTING------------------------- #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter)) #-------------------------------- #TESTING-------------------------- #print('Starting self-play game iteration: ' + str(eps)) #start_game = time.time() #-------------------------------- iterationTrainExamples += self.executeEpisode( ) #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game #TESTING-------------------------- #end_game = time.time() #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game)) #----------------------------------------------------- # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below. if len(self.trainExamplesHistory ) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file by calling saveTrainExamples method # The examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples( i - 1 ) #save examples to self.args['checkpoint'] folder with given iteration name of i-1 # shuffle examples before training #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques, #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training #samples in a single list, shuffled trainExamples = [] for e in self.trainExamplesHistory: #Each e is a deque trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train( trainExamples[0], trainExamples[1] ) #Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( self.pnet, self.nnet, self.game, self.args, self.arena_game_args ) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') else: #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('trainExamples feature arrays: ' + str(trainExamples[0])) #print('trainExamples label arrays: ' + str(trainExamples[1])) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder=self.args['network_checkpoint'], filename='trainHistDict' + str(i - 1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best')
# def setUpTest(cat_angle, mouse_angle, cat_radius): # """ Sets up testing environment for program, using cat_angle, mouse_angle, and cat_radius.""" # tk = Tk() # Create a Tk top-level widget # arena = Arena(tk, width = 1000, height = 700) # Create an Arena widget, arena # arena.pack() # statue = Circle(Vector(200, 200), 0, radius = 1) # mouse = Mouse(statue.position + unit(statue.heading + mouse_angle) * statue.radius * statue.scale, speed = 1, orbit = statue, debug_flag = True, degree = mouse_angle) # cat = Cat(statue.position + unit(statue.heading + cat_angle) * (statue.radius + cat_radius) * statue.scale, speed = 1, orbit = statue, mouse = mouse, arena = arena, radius = statue.radius + cat_radius, debug_flag = True, degree = cat_angle) # doctest.testmod(extraglobs={'test_statue': statue, 'test_mouse': mouse, 'test_cat': cat}) if __name__ == "__main__": tk = Tk() # Create a Tk top-level widget arena = Arena(tk, width=1000, height=700) # Create an Arena widget, arena arena.pack() statue = Circle(Vector(200, 200), 0, radius=1) mouse = Mouse( statue.position + mouse_start * statue.radius * statue.scale, speed=1, orbit=statue, debug_flag=True, degree=0 ) cat = Cat( statue.position + unit(statue.heading + 270) * (statue.radius + 1) * statue.scale, speed=1, orbit=statue, mouse=mouse, arena=arena, radius=statue.radius + 1, debug_flag=True, degree=270, )
sim = 100 #n2.load_checkpoint('/dev/8x50x25/','best.pth.tar') for i in range(24,101,25): lastwins = 0 prewins = 0 draw = 0 n2.load_checkpoint('./temp/Implement/deep3_feature',str(i+1)+'best.pth.tar')#last n3.load_checkpoint('./temp/Implement/origin',str(i+1)+'best.pth.tar')#pre args2 = dotdict({'numMCTSSims': sim, 'cpuct':1.0}) args3 = dotdict({'numMCTSSims': sim, 'cpuct':1.0}) #mcts2 = vmcts(g, n2, args2, visual()) mcts2 = lastmcts(g, n2, args2) mcts3 = mcts(g, n3, args3) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) n3p = lambda x: np.argmax(mcts3.getActionProb(x, temp=0)) arena = Arena(n3p, n2p, g) #arena = Arena(n3p, n2p, g, mcts2, visual()) pwins, nwins, draws = arena.playGames(100) print(i+1) print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))#nwins == n2pwin == oneloss lastwins += nwins prewins += pwins draw += draws print('lastmcts/MCTS WINS : %d / %d ; DRAWS : %d' % (lastwins, prewins, draw))
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ epochswin = [] # count the number of wins at every epoch of the network against the preceding version epochdraw = [] # count the number of draws at every epoch of the network against the preceding version epochswingreedy = [] # count the number of wins against greedy at every epoch epochswinrandom = [] # count the number of wins against random at every epoch epochsdrawgreedy = [] # count the number of draws against greedy at every epoch epochsdrawrandom = [] # count the number of wins against random at every epoch epochswinminmax = [] # count the number of wins against minmax at every epoch epochsdrawminmax = [] # count the number of draws against minmax at every epoch begining=1 if self.args.load_model == True: file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ".txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswin.append(word) elif index == 1: epochdraw.append(word) file.close() file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ":greedyrandom.txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswingreedy.append(word) elif index == 1: epochsdrawgreedy.append(word) elif index == 2: epochswinrandom.append(word) elif index == 3: epochsdrawrandom.append(word) elif index == 4: epochswinminmax.append(word) elif index == 5: epochsdrawminmax.append(word) file.close() self.loadTrainExamples() file=open(self.args.trainExampleCheckpoint+"loopinformation","r+") lines=file.readlines() begining=lines[0] file.close() for i in range(int(begining), self.args.numIters + 1): fileLoopInformation = open(self.args.trainExampleCheckpoint + "loopinformation", "w+") fileLoopInformation.write(str(i)) fileLoopInformation.close() # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \ ":dim" + str(self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" print("path with filename "+filename) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) exists = os.path.isfile(filenameBest) if exists: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) filenameCurrent="currentforprocess:temp:iter" + str(self.args.numIters) + \ ":eps" + str(self.args.numEps) + ":dim" + str(self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint,filename=filenameCurrent) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game,nmcts,pmcts,evaluate=True, name=self.args.name) pwins, nwins, draws = arena.playGames(self.args.arenaCompare, False) pmcts.clear() nmcts.clear() del pmcts del nmcts print(' ') print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if i == 1: epochswin.append(pwins) epochdraw.append(0) epochswin.append(nwins) epochdraw.append(draws) self.writeLogsToFile(epochswin, epochdraw) ''' Get all the players and then pit them against the network. You need to modify here if you implement more players ''' (gp, rp, mp) = self.decidePlayers() if self.args.parallel == 0: nmcts1 = MCTS(self.game, self.nnet, self.args) nmcts2 = MCTS(self.game, self.nnet, self.args) nmcts3 = MCTS(self.game, self.nnet, self.args) arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, self.game,nmcts1 ,name=self.args.name) arenarandom = Arena(lambda x: np.argmax(nmcts2.getActionProb(x, temp=0)), rp, self.game,nmcts2 ,name=self.args.name) arenaminmax = Arena(lambda x: np.argmax(nmcts3.getActionProb(x, temp=0)), mp, self.game,nmcts3, evaluate=True,name=self.args.name) pwinsminmax, nwinsminmax, drawsminmax = arenaminmax.playGames(self.args.arenaCompare) print("minmax - "+str(pwinsminmax)+" "+str(nwinsminmax)+" "+str(drawsminmax)) pwinsgreedy, nwinsgreedy, drawsgreedy = arenagreedy.playGames(self.args.arenaCompare) print("greedy - "+str(pwinsgreedy)+" "+str(nwinsgreedy)+" "+str(drawsgreedy)) pwinsreandom, nwinsrandom, drawsrandom = arenarandom.playGames(self.args.arenaCompare) print("random - "+str(pwinsreandom)+" "+str(nwinsrandom)+" "+str(drawsrandom)) nmcts1.clear() nmcts2.clear() nmcts3.clear() del nmcts1 del nmcts2 del nmcts3 else: ''' This will be used if you want to evaluate the network against the benchmarks in a parallel way ''' self.args.update({'index': str(i)}) p = self.parallel(self.args.arenaCompare) (pwinsminmax, nwinsminmax, drawsminmax) = p[0] # self.parallel("minmax", self.args.arenaCompare) (pwinsgreedy, nwinsgreedy, drawsgreedy) = p[1] # self.parallel("greedy",self.args.arenaCompare) (pwinsreandom, nwinsrandom, drawsrandom) = p[2] # self.parallel("random",self.args.arenaCompare) epochsdrawgreedy.append(drawsgreedy) epochsdrawrandom.append(drawsrandom) epochswinrandom.append(pwinsreandom) epochswingreedy.append(pwinsgreedy) epochswinminmax.append(pwinsminmax) epochsdrawminmax.append(drawsminmax) self.writeLogsToFile(epochswingreedy, epochsdrawgreedy, epochswinrandom, epochsdrawrandom, epochswinminmax, epochsdrawminmax, training=False) if pwins + nwins == 0 or float(nwins) / (pwins + nwins) <= self.args.updateThreshold: print('REJECTING NEW MODEL') filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" exists = os.path.isfile(filenameBest) if exists: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) else: print('ACCEPTING NEW MODEL') filename = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) self.mcts.clear() del self.mcts self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True) # reset search tree self.writeLogsToFile(epochswin, epochdraw, training=True)
def runGame(catRadius, catAngle, mouseAngle): """ Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area. """ tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen statueObj = Statue(Vector(200, 200)) # Create statue at 200,200 arena.add(statueObj) # Add statue to arena mouseObj = Mouse(mouseAngle, statueObj) # Create mouse with statue passed as argument arena.add(mouseObj) # Add mouse to arena cat_radius_scaled = catRadius * scale_factor # Scale the radius of the cat to pixels catObj = Cat(catAngle, cat_radius_scaled, mouseObj) # Create cat with mouse passed as argument arena.add(catObj) # Add cat to arena arena.setLabels() # added (set labels to starting position) tk.mainloop() # Enter the Tkinter event loop
class Room(object): def __init__(self, rid, host, max_user_num=2, arena_conf_filename='Configuration.ArenaConf', player_conf_filename='Configuration.PlayerConf'): super(Room, self).__init__() self.rid = rid self.host = host self.arena = None self.max_user_num = max_user_num self.username_to_user_map = {} # Generate dispatcher self.dispatcher = Dispatcher() # Configuration file self.arena_conf_filename = arena_conf_filename self.player_conf_filename = player_conf_filename # received message self.msg_dict = None def generate_msg_dict(self): from common.events import MsgCSPlayerMove self.msg_dict = {conf.MSG_CS_PLAYER_MOVE: MsgCSPlayerMove()} def register_dispatcher_services(self): self.dispatcher.register(conf.ARENA_SERVICES, ArenaServices(self.host, self.arena)) def dispatch(self, msg, client_hid): self.dispatcher.dispatch(msg, client_hid) def handle_received_msg(self, msg_type, data, client_hid): if msg_type in self.msg_dict: msg = self.msg_dict[msg_type] msg.unmarshal(data) self.dispatcher.dispatch(msg, client_hid) else: print "Can't handle received message in room" def tick(self): if self.arena: self.arena.tick() def start_game(self): # Can't start game when game is running if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: return False self.arena = Arena(self.host, self.arena_conf_filename, self.player_conf_filename) self.register_dispatcher_services() self.generate_msg_dict() # Send start game message to all roommates data = MsgSCStartGame().marshal() for k, v in self.username_to_user_map.items(): self.host.sendClient(v.client_hid, data) self.arena.start_game(self.username_to_user_map) def add_user(self, user): if self.username_to_user_map.has_key(user.username) == False and\ len(self.username_to_user_map) >= self.max_user_num: return False # room is full # user back again if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: self.arena.player_enter_again(user) return True self.username_to_user_map[user.username] = user self.broadcast_roommate_add(user.username) if len(self.username_to_user_map) >= self.max_user_num: self.start_game() return True def remove_user(self, user): if self.username_to_user_map.has_key(user.username) is False: return False # user not find if self.arena and self.arena.is_game_start and not self.arena.is_game_stop: self.arena.player_leave(user.client_hid) else: del self.username_to_user_map[user.username] self.broadcast_roommate_del(user.username) if len(self.username_to_user_map) <= 0: return True return False def broadcast_roommate_add(self, username): msg = MsgSCRoommateAdd(username) data = msg.marshal() for username, user in self.username_to_user_map.items(): self.host.sendClient(user.client_hid, data) def broadcast_roommate_del(self, username): msg = MsgSCRoommateDel(username) data = msg.marshal() for username, user in self.username_to_user_map.items(): self.host.sendClient(user.client_hid, data) # game over return True else False def is_valid(self): if self.arena and self.arena.is_game_stop: return False else: return True
# Setup NN net = JasonNet() current_NN = net best_NN = net if not os.path.isdir("model_data"): os.mkdir("model_data") logger.info("Starting to train...") for i in range(args.iteration, args.total_iterations): logger.info(F"Iteration {i}") # Play a number of Episodes (games) of self play to generate data generate_data(current_NN, episodes, search_depth, i) # original monte carlo #run_monte_carlo(current_NN, 0, i, episodes, search_depth) # Train NN from dataset of monte carlo tree search above train_net(current_NN, i, args.lr, args.bs, args.epochs) # Fight new version against reigning champion in the Arena # Even with first iteration just battle against yourself arena = Arena(best_NN, current_NN) best_NN = arena.battle(episodes // 2, search_depth) # Save the winning net as a Pickle for battle later save_as_pickle(i, best_NN) print("End of the main driver program. Training has completed!")
def p1(x, turn): if turn <= 2: mcts1.reset() temp = args.temp if turn <= args.tempThreshold else args.arenaTemp policy = mcts1.getActionProb(x, temp=temp) return np.random.choice(len(policy), p=policy) for i in range(model_count): file = Path(networks[i]) print(f'{short_name} vs {file.stem}') nnet2.load_checkpoint(folder='checkpoint', filename=file.name) if args.numMCTSSims <= 0: p2 = NNPlayer(g, nnet2, args.arenaTemp).play else: mcts2 = MCTS(g, nnet2, args) def p2(x, turn): if turn <= 2: mcts2.reset() temp = args.temp if turn <= args.tempThreshold else args.arenaTemp policy = mcts2.getActionProb(x, temp=temp) return np.random.choice(len(policy), p=policy) arena = Arena(p1, p2, g) p1wins, p2wins, draws = arena.playGames(args.arenaCompare) writer.add_scalar(f'Win Rate vs {short_name}', (p2wins + 0.5 * draws) / args.arenaCompare, i * args.x) print(f'wins: {p1wins}, ties: {draws}, losses:{p2wins}\n') writer.close()
from Arena import Arena from IPython.display import clear_output arena = Arena() while True: print(''' 1. Create pokemon 2. Show Existing pokemon 3. Battle 4. Quit ''') choice = int(input('Enter your choice:')) if choice == 1: arena.create_pokemon() elif choice == 2: arena.show_all() pass elif choice == 3: clear_output() arena.battle() pass else: break
# Bookkeeping to prepare for the next step self.qOld = qPrime np.save("./pegWeights.npy", self.peggingWeights) if __name__ == '__main__': # Initialize variables player1 = LinearB(1, 0.5, 0.9, False) player2 = Myrmidon(2, 5, False) numHands = 5000 repeatFlag = False windowSize = 100 # Create and run arena arena = Arena([player1, player2], repeatFlag) results = arena.playHands(numHands) # Plot results from arena x = np.arange(1, numHands + 1 - windowSize, 1) y0 = np.zeros(len(results[0]) - windowSize) avgResult0 = np.average(results[0]) mu0 = np.zeros(len(y0)) y1 = np.zeros(len(results[1]) - windowSize) avgResult1 = np.average(results[1]) mu1 = np.zeros(len(y1)) y2 = np.zeros(len(results[2]) - windowSize) avgResult2 = np.average(results[2]) mu2 = np.zeros(len(y2)) for i in range(len(x)):
twoWon += i[1] draws += i[2] print("Model 1 Win:", oneWon, " Model 2 Win:", twoWon, " Draw:", draws) g = OthelloGame(6) # parallel version #ParallelPlay(g) # single process version # all players rp = RandomPlayer(g).play gp = GreedyOthelloPlayer(g).play hp = HumanOthelloPlayer(g).play # nnet players n1 = NNet(g) n1.load_checkpoint('./pretrained_models/othello/pytorch/', '6x100x25_best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) #n2 = NNet(g) #n2.load_checkpoint('/dev/8x50x25/','best.pth.tar') #args2 = dotdict({'numMCTSSims': 25, 'cpuct':1.0}) #mcts2 = MCTS(g, n2, args2) #n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) arena = Arena.Arena(n1p, hp, g, display=display) print(arena.playGames(2, verbose=True))
import random from Tkinter import * # Import everything from Tkinter from Mouse2 import Mouse2 from Arena import Arena # Import our Arena from Statue import Statue # Import our Turtle from Vector import * # Import everything from our Vector from Cat import Cat # random vector for center of statue random.seed() tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen initialStatPosition = Vector(random.randint(200, 600), random.randint(200, 400)) #initialCatAngle=random.randint(0,359) #initialCatRadius=random.randint(200,500) #initialMouseAngle=random.randint(0,359) def inputdata(): """ :return: instead, input the cat angle, mouse angle, and cat radius manually """ initialCatAngle = float(input('Input Cat angle(degrees):')) initialMouseAngle = float(input('Input Mouse angle(degrees):')) initialCatRadius = float(input('Input Cat radius (m)')) if initialCatRadius < 1.:
#Name,Player Instance, Is Stochastic players=[('RandomPlayer',rp,1), ('GreedyPlayer',gp,0), ('AntiGreedyPlayer',agp,0), ('CompositeGreedyPlayer',cgp,0), ('mini-AlphaZero',azp1,1)] ############################################################################### num_trial=100 results=np.zeros(shape=(len(players),len(players))) for i in range(len(players)): for j in range(len(players))[0:i+1]: arena=Arena(player1=players[i][1],player2=players[j][1],env=env,display=None) actual_num_trial=2 if (players[i][2]==0 and players[j][2]==0) else num_trial #playerが両方とも決定論的な場合は2回で済ます. one_win,two_win,draw=arena.play_games(actual_num_trial,verbose=False) print(players[i][0],' vs ',players[j][0],' : ',one_win/float(actual_num_trial),' wins.') results[i,j]=one_win/float(actual_num_trial) ############################################################################### import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df=pd.DataFrame(results,index=[x[0] for x in players],columns=[x[0] for x in players])
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping log.info(f'Starting Iter #{i} ...') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc="Self Play"): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: log.warning( f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}" ) self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) log.info('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: log.info('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: log.info('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
our_player = MCTS(game, neural_network, args) # old_nn_player = MCTS(game, old_nn, args) other_player = NN_player_wrapper() def player_ramdon(board): game_2 = Game(men_count=9) valids = game_2.get_valid_moves(board,1) valids_ind = np.where(valids) choise = np.random.choice(valids_ind[0]) return choise # print('Let the fight Begin') arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), player_ramdon, game, lambda x: game.print_board(x),"Alpha_Zero", "Random") print(arena.playGames(40, verbose=True)) # # print('Let the fight Begin') # arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), # other_player, # game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Other") # print(arena.playGames(20, verbose=True)) # print('Let the fight Begin') # arena = Arena(lambda x: np.argmax(our_player.get_action_prob(x)), # player_ramdon, game, lambda x: Board(x).verbose_game(x),"Alpha_Zero", "Ramdom") #
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) #if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold: if False: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
from Tkinter import * from Arena import Arena from WalkingTurtle import WalkingTurtle from Vector import * tk = Tk() arena = Arena(tk) arena.pack() arena.add(WalkingTurtle(Vector(200, 300), 0, 1, fill='turquoise')) arena.add(WalkingTurtle(Vector(600, 300), 0, 1, fill='purple')) tk.mainloop()
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Cat import Cat # Import our Cat from Mouse import Mouse # Import our Statue from Statue import Statue # Import our Statue from Vector import * # Import everything from our Vector from globalVars import * # Import everything from globalVars from random import random # Import random tk = Tk() # Create a Tk top-level widget arena = Arena(tk, 800, 600, padx=12, pady=6) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen midX = arena.width/2 # Horizontal center of window midY = arena.height/2 # Vertical center of window mouseAngle = random()*360*scaleRad # Random mouse angle to initialize catAngle = random()*360*scaleRad # Random cat angle to initialize catRadius = 5 # Random cat radius to initialize statue = Statue(Vector(midX,midY), 0) # Create a statue in center of arena, arbitrary heading arena.add(statue) # Add statue mouse = Mouse(Vector(midX + statue.radius*scalePixel*cos(mouseAngle), midY - statue.radius*scalePixel*sin(mouseAngle)), 0, arena, statue) # Create a mouse at right edge of statue, arbitrary heading since it will be overwritten in initialization arena.add(mouse) # Add mouse cat = Cat(Vector(midX + catRadius*scalePixel*cos(catAngle), midY - catRadius*scalePixel*sin(catAngle)), 0, arena, statue, mouse) # Create a cat at given angle and radius, arbitrary heading since it will be overwritten in initialization arena.add(cat, "cat") # Add cat and specify that it's a cat as extra argument tk.mainloop() # Enter the Tkinter event loop
testUnit = Hero(name="Ordeus", healthRange=(70, 80), strengthRange=(70, 80), defenceRange=(45, 55), speedRange=(40, 50), luckRange=(10, 30)) testUnit2 = Beast(name="Beast", healthRange=(60, 90), strengthRange=(60, 90), defenceRange=(40, 60), speedRange=(40, 60), luckRange=(25, 40)) testArena = Arena(name="Emagia", fighter1=testUnit, fighter2=testUnit2, maxTurns=20) class TestArena(unittest.TestCase): def test_type(self): self.assertRaises(TypeError, Arena.ValidateInputs, testArena) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, True, testUnit, testUnit2, 20) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, 12, testUnit, testUnit2, 20) self.assertRaises(TypeError, Arena.ValidateInputs, testArena, "Orderus", testUnit, testUnit2, "20") self.assertRaises(TypeError, Arena.ValidateInputs, testArena, "Orderus", testUnit, testUnit2, 20.0) self.assertRaises(TypeError, Arena.ValidateInputs, testArena,
from WalkingTurtle import * from Statue import Statue from Mouse import Mouse import Cat meter = 20 # How many pixels is one meter? statue_radius = 1# What is the statue's radius cat_radius = 1.5 cat_angle = 0 mouse_angle = 45 x = Vector() print x.length() tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen s = Statue(Vector(200,200),statue_radius,meter) arena.add(s) m = Mouse(s,mouse_angle) c = Cat.Cat(cat_angle,cat_radius,m) arena.add(c) arena.add(m) arena.cat_rad.set('CatRadius: '+str(cat_radius)) arena.cat_rad_label = Label(arena,textvariable=arena.cat_rad) arena.cat_rad_label.pack() arena.cat_ang.set('CatAngle: '+str(cat_angle)) arena.cat_ang_label = Label(arena,textvariable=arena.cat_ang) arena.cat_ang_label.pack() arena.mouse_ang.set('CatAngle: '+str(mouse_angle)) arena.mouse_ang_label = Label(arena,textvariable=arena.mouse_ang)
def learn(self): """ main loop of the training loop """ if self.args.load_model: start = self.args.load_example[1] + 1 else: start = 1 for i in range(start, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration greedy = i == 1 and not self.args.load_model if not self.skipFirstSelfPlay or i > 1: iteration_train_examples = deque( [], maxlen=self.args.maxlenOfQueue) num_eps = self.args.numEps if greedy: num_eps = self.args.greedy_eps eps_time = AverageMeter() bar = Bar('Self Play', max=num_eps) end = time.time() for eps in range(num_eps): if greedy: iteration_train_examples += self.execute_initialize_episode( ) else: iteration_train_examples += self.execute_episodes() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num_eps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history if not greedy: self.trainExamplesHistory.append(iteration_train_examples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) else: trainExamples = iteration_train_examples # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.nnet.train(trainExamples) if not greedy: pmcts = MCTSSingle(self.game, self.pnet, self.args) nmcts = MCTSSingle(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(pmcts, nmcts, self.game, self.args) scores = arena.playGames(self.args.arenaCompare) if scores[1] == 0 or float( scores[1]) / sum(scores) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_1.h5')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') print(str(self.game.innerN) + "x" + str(self.game.innerM)) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts = MCTS(self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) tempfile = 'temp.pth.tar' bestfile = 'best.pth.tar' # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile) self.nnet.train(trainExamples) if self.arenaEnabled: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) pmcts = MCTS(self.pnet, self.args) nmcts = MCTS(self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0), lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
gladius = weapons["Gladius"] char1 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml")) char1.name = "Hugo" action1 = Action(char1, hieb0, gladius) reaction1 = Action(char1, block0, gladius) char2 = readCharacterFromXML(os.path.join(basedir, "res/Charactere/Agilitus.xml")) char2.name = "Herbert" action2 = Action(char2, hieb0, gladius) reaction2 = Action(char2, block0, gladius) arena = Arena() arena.addTeam("blue", [char1]) arena.addTeam("red", [char2]) print (arena.getCharacters()) # Create a Player for each character player1 = RandomPlayer("Player1", char1, arena, action1, reaction1) player2 = RandomPlayer("Player2", char2, arena, action2, reaction2) players = [player1, player2] points = [0,0] for round in range(100): # reset chars char1.reset() char2.reset()
def runGame(catRadius, catAngle, mouseAngle): """ Main function that organizes and runs the game. Creates statue, mouse, and cat and adds them to the area. """ tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen statueObj = Statue(Vector(200,200)) # Create statue at 200,200 arena.add(statueObj) # Add statue to arena mouseObj = Mouse(mouseAngle, statueObj) # Create mouse with statue passed as argument arena.add(mouseObj) # Add mouse to arena cat_radius_scaled = catRadius * scale_factor # Scale the radius of the cat to pixels catObj = Cat(catAngle, cat_radius_scaled, mouseObj) # Create cat with mouse passed as argument arena.add(catObj) # Add cat to arena arena.setLabels() # added (set labels to starting position) tk.mainloop() # Enter the Tkinter event loop
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen turtle = Turtle(Vector(200, 200), 0) arena.add(turtle) # Add a very simple, basic turtle tk.mainloop() # Enter the Tkinter event loop
from Tkinter import * # Import everything from Tkinter from Arena import Arena # Import our Arena from Turtle import Turtle # Import our Turtle from Vector import * # Import everything from our Vector tk = Tk() # Create a Tk top-level widget arena = Arena(tk) # Create an Arena widget, arena arena.pack() # Tell arena to pack itself on screen arena.add(Turtle(Vector(200,200), 0)) # Add a very simple, basic turtle tk.mainloop() # Enter the Tkinter event loop
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(self.args.startIter, self.args.numIters + self.args.startIter): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.args.skipFirstSelfPlay or i > self.args.startIter: #or i > 1 iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) n_processes: int = 4 # or, if you have enough compute (and VRAM): # n_processes = mp.cpu_count() with mp.Pool(n_processes) as pool: self.nnet.nnet.share_memory() pbar = tqdm.tqdm(range(self.args.numEps), position=0) pbar.set_description( f"Self Play using {n_processes} processes") for eps in pbar: # Arguments for each worker. worker_args = [(i, self.game, self.args, self.nnet) for i in range(n_processes)] # Apply the executeEpisode method on each argument: for worker_examples in pool.starmap( Coach.executeEpisode, worker_args): iterationTrainExamples.extend(worker_examples) # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) if not self.args.skipFirstTrain or i > self.args.startIter: # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') #pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='new.pth.tar') #nmcts = MCTS(self.game, self.nnet, self.args) pmcts = MCTS(self.game, self.pnet, self.args) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(pmcts, nmcts, self.game, self.args.resignationOn, self.args.resignationThreshold) #arena = Arena(lambda state,player: np.where(pmcts.getActionProb(state,player, temp=0) == 1)[0][0], # lambda state,player: np.where(nmcts.getActionProb(state,player, temp=0) == 1)[0][0], self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='old.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')