def __init__(self, actor_id, args): self.tetris = TetrisApp(emulator=True) self.legal_actions = [0, 1, 2, 3, 4] self.screen_width, self.screen_height = 288, 396 self.lives = 1 self.random_start = args.random_start self.single_life_episodes = args.single_life_episodes self.call_on_new_frame = args.visualize self.global_step = 0 self.compteur = 0 # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.rgb = args.rgb self.depth = 1 if self.rgb: self.depth = 3 self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width, self.depth), dtype=np.uint8), self.__process_frame_pool) self.observation_pool = ObservationPool( np.zeros((IMG_SIZE_X, IMG_SIZE_Y, self.depth, NR_IMAGES), dtype=np.uint8), self.rgb)
def __init__(self): self.app = TetrisApp(self) self.ai = AI(self.app) self.app.ai = self.ai self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)] self.current_chromosome = 0 self.current_generation = 1 self.ai.heuristics = self.population[self.current_chromosome].heuristics
def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1): self.qval=util.Counter() self.alpha=alpha self.epsilon=epsilon self.discount=gamma self.Tetris= TetrisApp() self.boardprev=0.
def __init__(self): self.num_of_organisms = POPSIZE self.survivors = ELITE self.new_organisms = self.num_of_organisms - self.survivors self.mutation_rate = MUTRATE self.crossover_rate = CROSSRATE #initialize the population self.population = self.InitPop(self.num_of_organisms) #keep track of which organism in the population we are working on self.current_organism = 0 #keeps track of what generation we are on self.current_generation = 0 #GA gets the application self.sequenceType = SEQUENCE self.seed = numpy.random.random() self.app = TetrisApp(self) #GA gets our agent, which needs the organism #so it can access weights of the organism self.ai = Agent(self.app) self.app.ai = self.ai self.cycleStart = 0 self.cycleEnd = 0 self.fitnessDictionary = {} self.lastBest = []
def getAction(self, state): """ Returns the next action in the path chosen earlier (in registerInitialState). Returns gameover if no more actions to take. state = a GameState object (TetrisSearch.py) """ dont_burn_my_cpu = pygame.time.Clock() key_actions = { 'ESCAPE': state.quit, 'LEFT': lambda: state.move(-1), 'RIGHT': lambda: state.move(+1), 'DOWN': lambda: state.drop(True), 'UP': state.rotate_stone, 'p': state.toggle_pause, 'SPACE': state.start_game, 'RETURN': state.insta_drop } self.actionIndex = 0 i = self.actionIndex self.actionIndex += 1 for event in pygame.eveng.get(): if event.type == pygame.USEREVENT + 1: self.drop(False) elif event.type == pygame.QUIT: self.quit() elif i < len(self.actions): for key in key_actions: if self.actions[i] == eval(key): return key_actions[key]() if event.type == pygame.USEREVENT + 1: self.drop(False) elif event.type == pygame.QUIT: self.quit() else: return TetrisApp.quit()
# Setup to grab statistics # - Max, mean, min, std, variance # Setup containers for highest score and highest scoring individual # Create a Genetic Algorithm loop # - This loop will achieve the following: # a. Select and clone the next generation individuals # b. Apply crossover and mutation on the offspring # c. Replace population with offspring #------------------------------------------------------------ n_gen = 50 # n_gen = 100 prob_xover = 0.3 prob_mut = 0.05 pop = toolbox.population(n=25) # pop = toolbox.population(n=1000) game = TetrisApp(training=True) best_ind = [] best_score = -1 max_out = open("max50.txt", "w") mean_out = open("mean50.txt", "w") min_out = open("min50.txt", "w") std_out = open("std50.txt", "w") var_out = open("var50.txt", "w") for g in range(1, n_gen + 1): print("Current Generation " + str(g)) scores = [] for ind in pop: score = game.run_train(ind) scores.append(score)
def run_game(self, player, seed=10000, debug=False): App = TetrisApp(player, seed=seed, debug=debug) score = App.run() return score
class GA(object): def __init__(self): self.num_of_organisms = POPSIZE self.survivors = ELITE self.new_organisms = self.num_of_organisms - self.survivors self.mutation_rate = MUTRATE self.crossover_rate = CROSSRATE #initialize the population self.population = self.InitPop(self.num_of_organisms) #keep track of which organism in the population we are working on self.current_organism = 0 #keeps track of what generation we are on self.current_generation = 0 #GA gets the application self.sequenceType = SEQUENCE self.seed = numpy.random.random() self.app = TetrisApp(self) #GA gets our agent, which needs the organism #so it can access weights of the organism self.ai = Agent(self.app) self.app.ai = self.ai self.cycleStart = 0 self.cycleEnd = 0 self.fitnessDictionary = {} self.lastBest = [] def RandomOrganism(self): nums = [] for j in range(0, 12): a = numpy.random.uniform(LOWERBOUND, UPPERBOUND) nums.append(a) organism = Organism(nums) self.normalize(organism) return organism def InitPop(self, populationSize): #init population with a seed #random.seed(7) population = [] #for each organism in the population #population.append(Organism([-0.25835108880355967, -0.18873479853738032, -0.6081190254748627, -0.5281331622290867, -0.0936639080926526, -0.10826897335053938, 0.15010957868145391, -0.21161009827721672, -0.04113776799016001, 0.2957493369775496, -0.07093022881256028, -0.2586553756116776])) for i in range(0, populationSize): organism = self.RandomOrganism() population.append(organism) #returns a list of a list of 4 bitarraysc\ return population #start running the game def Run(self): with open(RESULTS, 'w') as f: f.write( "\n Cross Type: %s, Selection Type: %s, Crossover Rate: %s, Mutation Rate: %s , Replacement Per Cycle: %s\n Theoretical Line limit: %s " % (CROSSTYPE, SELECTIONTYPE, self.crossover_rate, self.mutation_rate, self.new_organisms, (NUMGAMES * self.app.limit * 4 / 10))) #all heuristics #f.write("Weights: Aggregate Height, Bumpiness, Holes, LinesCleared, Connected Holes, Blockades, Altitude Delta, Weighted Blocks, H-Roughness, V-Roughness, Wells, Biggest Well, Total Height.\n Mutation Rate: %s , Replacement Per Cycle: %s\n" % (self.mutation_rate, self.new_organisms)) self.cycleStart = time.time() self.app.run() def NextAI(self): self.current_organism += 1 #if we have worked on every organism in the current population, get the next #generation self.app.piecesPlayed = 0 if self.current_organism >= self.num_of_organisms: self.current_organism = 0 self.NextGeneration() #this is for if we keep the same sequence for every generation and each organism only plays 1 game - we can use this to skip playing the elites if SEQUENCE == "fixed" and NUMGAMES == 1: while self.current_organism < self.num_of_organisms and tuple( self.population[self.current_organism].heuristics ) in self.fitnessDictionary.keys(): #print("ALREADY SEEN %s" % self.population[self.current_organism].heuristics) self.population[ self.current_organism].fitness = self.fitnessDictionary[ tuple( self.population[self.current_organism].heuristics)] self.current_organism += 1 if self.current_organism >= self.num_of_organisms: self.current_organism = 0 self.NextGeneration() #update the heuristics for the organism we are working on self.ai.heuristics = self.population[self.current_organism].heuristics #handles when a game we are testing the current organism on ends def GameOver(self, lines_cleared): organism = self.population[self.current_organism] organism.fitness += lines_cleared #load the next organism into the algo organism.played += 1 if organism.played == NUMGAMES: self.fitnessDictionary[tuple( organism.heuristics)] = organism.fitness self.NextAI() if SEQUENCE == "fixed": self.app.start_game(self.seed) elif SEQUENCE == "random": self.app.start_game(numpy.random.random()) else: #restart the game self.app.piecesPlayed = 0 if SEQUENCE == "fixed": self.app.start_game(self.seed) elif SEQUENCE == "random": self.app.start_game(numpy.random.random()) #add normalization def normalize(self, org): squared = [] for h in org.heuristics: squared.append(h * h) norm = numpy.sqrt(sum(squared)) for i, weight in enumerate(org.heuristics): org.heuristics[i] /= norm #check if the population has converged -- TOD0 #tournament selection might be more valuable. add this def tournament(self): indices = [i for i in range(0, len(self.population))] #since the population is sorted, just select the two smallest indices from the pool. v1 = None v2 = None x = int(self.num_of_organisms * .1) for a in range(0, x): selected = numpy.random.choice(indices) if v1 == None or selected < v1: v2 = v1 v1 = selected elif v2 == None or selected < v2: v2 = selected return self.population[v1], self.population[v2] #roulette selection def roulette(self): fSum = float(sum([org.fitness for org in self.population])) relativeFitness = [] for x in range(0, len(self.population)): relativeFitness.append(self.population[x].fitness / fSum) #worse organisms are more likey to miss in roulette probs = [ sum(relativeFitness[:i + 1]) for i in range(len(relativeFitness)) ] r = random.random() for i, organism in enumerate(self.population): if r <= probs[i]: return organism def NextGeneration(self): self.population.sort(key=lambda x: x.fitness, reverse=True) averageScore = 0 elite = self.population[:self.survivors] for a in elite: averageScore += a.fitness averageScore = averageScore / len(elite) self.cycleEnd = time.time() - self.cycleStart #print the last generation out with open(RESULTS, 'a') as f: f.write( "\nGeneration: %s, Sequence Type: %s, Cycle Time: %s Elite Average Lines Cleared in %s Games: %s\n" % (self.current_generation, SEQUENCE, str(datetime.timedelta(seconds=self.cycleEnd)), NUMGAMES, averageScore)) for a in self.population: f.write("%s, Age: %s Weights: %s - Lines Cleared:%s\n" % (a.name, a.age, a.heuristics, a.fitness)) for key in self.fitnessDictionary.keys(): if key not in [tuple(org.heuristics) for org in self.population]: del self.fitnessDictionary[key] #increment the generation self.current_generation += 1 #create the new population with only the survivors self.SelectSurvivors() eliteScores = [org.fitness for org in self.population[:self.survivors]] if eliteScores == self.lastBest: self.mutation_rate += .05 #create the new organisms to add to the new_pop #roulette selction if SELECTIONTYPE == "roulette": for x in range(0, self.new_organisms): #select two parents parent1 = self.roulette() parent2 = self.roulette() while parent1 == parent2: parent2 = self.roulette() #print("p1: %s , p2: %s" % (parent1.name, parent2.name)) #create the new organism a = self.Crossover(parent1, parent2) #mutate the children if numpy.random.random() < MUTRATE: self.mutate(a) #add to population self.population.append(a) elif SELECTIONTYPE == "tournament": #tounament selection for x in range(0, self.new_organisms): p1, p2 = self.tournament() while p1 == p2 or p1.fitness == 0 or p2.fitness == 0: p1, p2 = self.tournament() new = self.Crossover(p1, p2) if numpy.random.random() < MUTRATE: self.mutate(new) self.population.append(new) #reset the fitness to 0 for org in self.population: org.played = 0 org.fitness = 0 self.lastBest = eliteScores self.cycleStart = time.time() #check to make sure we have the correct number of organisms in the new #population assert self.num_of_organisms == len( self.population ), "ERROR: new population doesnt the correct number of organisms have %s, want %s" % ( len(self.population), self.num_of_organisms) #Will return the survivors of a population, will return self.survivors number of organisms def SelectSurvivors(self): #sort the population by Organism.fitness self.population.sort(key=lambda x: x.fitness, reverse=True) #kill off amount needed to introduce specified amount of new organisms self.population = self.population[:self.survivors] for organism in self.population: organism.age += 1 #takes two parents and does uniform crossover #returns an Organism def Crossover(self, parent1, parent2): child = [] #two point # add other crossover methods that can be specified at launch and crossover using the CROSSRATE #uniform crossover if CROSSTYPE == "uniform": for x in range(0, len(parent1.heuristics)): if numpy.random.random() < .5: child.append(parent1.heuristics[x]) else: child.append(parent2.heuristics[x]) elif CROSSTYPE == "average": #weighted average crossover a = parent1.fitness b = parent2.fitness for x in range(0, len(parent1.heuristics)): child.append((a * parent1.heuristics[x]) + (b * parent2.heuristics[x])) offspring = Organism(child) #print("CROSSOVER NORMALIZING %s" % offspring.heuristics) self.normalize(offspring) return offspring #mutates the weights of a chromosome def mutate(self, organism): #mutation range of -.2 to +.2 mutation = numpy.random.random() * .4 - .2 #choose a random weight to mutate x = numpy.random.randint(0, 12) organism.heuristics[x] += mutation self.normalize(organism)
class QLearningAgent(TetrisApp): def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1): self.qval=util.Counter() self.alpha=alpha self.epsilon=epsilon self.discount=gamma self.Tetris= TetrisApp() self.boardprev=0. def observeTransition(self, state,action,nextState,deltaReward): self.episodeRewards += deltaReward self.update(state,action,nextState,deltaReward) # returns 0.0 if new state or the q value if we've seen it, and because # we cant use tuples as keys in a python dict we hash them def getQValue(self, state, action): if hash(str((state, action))) not in self.qval: self.qval[hash(str((state,action)))]=0.0 return self.qval[hash(str((state,action)))] def computeValueFromQValues(self, state): val = 0.0 action=self.computeActionFromQValues(state) if action != None: val= self.getQValue(state,action) return val def computeActionFromQValues(self, state): finalaction=None legalActions = self.Tetris.get_legal_actions(state[1]) if len(legalActions)!=0: maxval= -999999 for action in self.Tetris.get_legal_actions(state[1]): Qval=self.getQValue(state,action) if Qval>=maxval: maxval=Qval finalaction=action return finalaction def helperfunction(self, lst, legalactions): value, action, new_board = lst val = (value + max(self.ideal_place_2(new_board, legalactions,True))[0], action) return val def getAction(self, state): legalActions = self.Tetris.get_legal_actions(state[1]) action = None if len(legalActions)!=0: if util.flipCoin(self.epsilon): valuedict = {} actionlist= self.ideal_place_2(self.Tetris.board, legalActions, False) valuelist = map((lambda x: self.helperfunction(x, legalActions)), actionlist) return max(valuelist)[1] else: action = self.computeActionFromQValues(state) return action def update(self, state, action, nextState, reward): self.qval[hash(str((state,action)))]+= self.alpha*(reward+self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state,action)) def getPolicy(self, state): return self.computeActionFromQValues(state) def getValue(self, state): return self.computeValueFromQValues(state) def run(self,n): key_actions = { 'ESCAPE': self.Tetris.quit, 'LEFT': lambda:self.Tetris.move(-1), 'RIGHT': lambda:self.Tetris.move(+1), 'DOWN': lambda:self.Tetris.drop(True), 'UP': self.Tetris.rotate_stone, 'SPACE': self.Tetris.toggle_pause, 'SPACE': self.Tetris.start_game, 'RETURN': self.Tetris.insta_drop } self.Tetris.board = tetris.new_board() self.boardprev=self.Tetris.board if n< value_iter_rounds: self.epsilon = 1 else: self.epsilon = 1/(15.*math.log(float(n)+1)) self.Tetris.gameover = False self.Tetris.paused = False dont_burn_my_cpu = pygame.time.Clock() rot, col = self.getAction((self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone)) prevboard = self.Tetris.board n+=1 while not(self.Tetris.gameover): self.update((prevboard,self.Tetris.stone), (rot,col), (self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone), self.Tetris.heuristic(self.Tetris.board)) piece = self.Tetris.stone prevboard = tetris.deepishcopy(self.Tetris.board) legalactions = self.Tetris.get_legal_actions(self.Tetris.stone) rot, col =self.getAction((self.Tetris.get_board_state(self.Tetris.board), self.Tetris.stone)) i= 1 while i ==1: self.Tetris.screen.fill((0,0,0)) if self.Tetris.gameover: self.Tetris.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.Tetris.score) if n< 10000: self.Tetris.start_game() else: self.Tetris.quit() else: if self.Tetris.paused: self.Tetris.center_msg("Paused") else: pygame.draw.line(self.Tetris.screen, (255,255,255), (self.Tetris.rlim+1, 0), (self.Tetris.rlim+1, self.Tetris.height-1)) self.Tetris.disp_msg("Next:", ( self.Tetris.rlim+cell_size, 2)) self.Tetris.disp_msg("Score: %d\n\nLevel: %d\ \nLines: %d" % (self.Tetris.score, self.Tetris.level, self.Tetris.lines), (self.Tetris.rlim+cell_size, cell_size*5)) self.Tetris.draw_matrix(self.Tetris.bground_grid, (0,0)) self.Tetris.draw_matrix(self.Tetris.board, (0,0)) self.Tetris.draw_matrix(self.Tetris.stone, (self.Tetris.stone_x, self.Tetris.stone_y)) self.Tetris.draw_matrix(self.Tetris.next_stone, (cols+1,2)) pygame.display.update() self.Tetris.place_brick(rot,col) i= 0 for event in pygame.event.get(): if event.type == pygame.USEREVENT+1: pass elif event.type == pygame.QUIT: self.Tetris.quit() elif event.type == pygame.KEYDOWN: for key in key_actions: if event.key == eval("pygame.K_" +key): key_actions[key]()
class TetrisEmulator(BaseEnvironment): def __init__(self, actor_id, args): self.tetris = TetrisApp(emulator=True) self.legal_actions = [0, 1, 2, 3, 4] self.screen_width, self.screen_height = 288, 396 self.lives = 1 self.random_start = args.random_start self.single_life_episodes = args.single_life_episodes self.call_on_new_frame = args.visualize self.global_step = 0 self.compteur = 0 # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.rgb = args.rgb self.depth = 1 if self.rgb: self.depth = 3 self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width, self.depth), dtype=np.uint8), self.__process_frame_pool) self.observation_pool = ObservationPool( np.zeros((IMG_SIZE_X, IMG_SIZE_Y, self.depth, NR_IMAGES), dtype=np.uint8), self.rgb) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance. Return: the current frame """ self.gray_screen = self.tetris.getScreen(rgb=False) if self.rgb: self.rgb_screen = self.tetris.getScreen() if self.call_on_new_frame: self.rgb_screen = self.tetris.getScreen() self.on_new_frame(self.rgb_screen) self.compteur += 1 if self.rgb: return self.rgb_screen return self.gray_screen def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.tetris.init_game() self.lives = 1 if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.tetris.act(0) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) if not self.rgb: img = np.reshape(img, (self.screen_height, self.screen_width)) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) if not self.rgb: img = np.reshape(img, (84, 84, 1)) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.tetris.act(a) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.tetris.act(a) img = self.__get_screen_image() if not self.rgb: img = np.reshape(img, (self.screen_height, self.screen_width, 1)) self.frame_pool.new_frame(img) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(NR_IMAGES): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(action) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = 0 if terminal else 1 observation = self.observation_pool.get_pooled_observations() self.global_step += 1 return observation, reward, terminal def __is_terminal(self): return self.tetris.gameover def __is_over(self): return self.tetris.gameover def get_noop(self): return [1.0, 0.0]
def dqn(): env = TetrisApp(8, 16, 750, False, 40, 30 * 100) episodes = 5000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) # log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' # log = CustomTensorBoard(log_dir=log_dir) scores = [] env.pcrun() for episode in tqdm(range(episodes)): env.reset() current_state = env._get_board_props(env.board) done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.pcplace(best_action[0], best_action[1]) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs # if log_every and episode and episode % log_every == 0: # avg_score = mean(scores[-log_every:]) # min_score = min(scores[-log_every:]) # max_score = max(scores[-log_every:]) # log.log(episode, avg_score=avg_score, min_score=min_score, # max_score=max_score) plt.xlabel("Episodes") plt.ylabel('Average score over 30 episodes') plt.grid() plt.plot(np.linspace(30, episodes, episodes - 29), moving_average(scores, 30)) plt.savefig("nlinker.png")
within the `models` directory. An example is given as `ai_rando`.", ) parser.add_argument( "-d", "--debug", action="store_true", help="Enable debug mode: The Tetris Engine will wait until input \ is received from your Model before updating the frame") parser.add_argument( '-s', '--seed', dest='seed', default=69, help= 'The seed for the random number generator. Affects block generation') args = parser.parse_args() args.seed = int(args.seed) if args.model: model = import_player(args.model) # Let the games begin App = TetrisApp(model, debug=args.debug, seed=args.seed) if args.arena: players = args.arena player_models = [import_player(player) for player in players] Arena(player_models, debug=args.debug).run_round_robin(seed=args.seed) print("Nothing else to do.")
[0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,1,0,0,0,0], [1,1,1,1,1,1,1,0,0,0], [1,1,1,1,1,1,1,1,1,1]], ] app = TetrisApp() ai = TetrisAI(app) # test indavidual function used for score moves class TestEval(unittest.TestCase): def test_get_max_height(self): heights = [] for board in test_boards: heights.append(ai.get_max_height(board)) self.assertEqual(heights, [0,5,5,2]) def test_get_roughness(self): roughs = [] for board in test_boards: roughs.append(ai.get_roughness(board))
class GeneticAlgorithms(object): def __init__(self): self.app = TetrisApp(self) self.ai = AI(self.app) self.app.ai = self.ai self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)] self.current_chromosome = 0 self.current_generation = 1 self.ai.heuristics = self.population[self.current_chromosome].heuristics def run(self): self.app.run() def next_ai(self): self.current_chromosome += 1 if self.current_chromosome >= POPULATION_SIZE: self.current_chromosome = 0 self.next_generation() self.ai.heuristics = self.population[self.current_chromosome].heuristics def on_game_over(self, score): chromosome = self.population[self.current_chromosome] chromosome.games += 1 chromosome.total_fitness += score if chromosome.games % GAMES_TO_AVG == 0: self.next_ai() self.app.start_game() def population_has_converged(self): t = CONVERGED_THRESHOLD pop = self.population return all(all(pop[0].heuristics[f]-t < w < pop[0].heuristics[f]+t for f, w in c.heuristics.items()) for c in pop) def next_generation(self): print("__________________\n") if self.population_has_converged(): print("Population has converged on generation %s.\n values: %s" % (self.current_generation, [(f.__name__, w) for f, w in self.population[0].heuristics.items()])) sys.exit() print("GENERATION %s COMPLETE" % self.current_generation) print("AVG FITNESS", sum([c.avg_fitness() for c in self.population]) / POPULATION_SIZE) self.current_generation += 1 for c in self.population: print("chromosome", c.name, "fitness", c.avg_fitness()) best_chromosome = max(self.population, key=lambda c: c.avg_fitness()) print("Fittest chromosome:", best_chromosome.name, "fitness", best_chromosome.avg_fitness(), "\n%s" % [(f.__name__, w) for f, w in best_chromosome.heuristics.items()]) print("\nEVOLUTION") new_population = self.selection(SURVIVORS_PER_GENERATION, SELECTION_METHOD) for c in new_population: print("chromosome", c.name, "fitness", c.avg_fitness(), "SURVIVED") for _ in range(NEWBORNS_PER_GENERATION): parents = self.selection(2, SELECTION_METHOD) new_population.append(self.crossover(parents[0], parents[1], CROSSOVER_METHOD)) print(parents[0].name, "and", parents[1].name, "PRODUCED", new_population[-1].name) for _ in range(MUTATION_PASSES): for chromosome in new_population: self.mutation(chromosome, MUTATION_RATE / MUTATION_PASSES) print("__________________\n") assert len(new_population) == len(self.population), "SURVIVORS_PER_GENERATION + NEWBORNS_PER_GENERATION != POPULATION_SIZE" self.population = new_population def selection(self, num_selected, method): def roulette(population): total_fitness = sum([c.avg_fitness() for c in population]) winner = randrange(int(total_fitness)) fitness_so_far = 0 for chromosome in population: fitness_so_far += chromosome.avg_fitness() if fitness_so_far > winner: return chromosome if method == SelectionMethod.roulette: survivors = [] for _ in range(num_selected): survivors.append(roulette([c for c in self.population if c not in survivors])) return survivors raise ValueError('SelectionMethod %s not implemented' % method) def crossover(self, c1, c2, method): def random_attributes(): heuristics = {} for fun, _ in c1.heuristics.items(): heuristics[fun] = random.choice((c1, c2)).heuristics[fun] return Chromosome(heuristics) def average_attributes(): heuristics = {} for fun, _ in c1.heuristics.items(): heuristics[fun] = (c1.heuristics[fun] + c2.heuristics[fun]) / 2 return Chromosome(heuristics) if method == CrossoverMethod.random_attributes: return random_attributes() if method == CrossoverMethod.average_attributes: return average_attributes() raise ValueError('CrossoverMethod %s not implemented' % method) def mutation(self, chromosome, mutation_rate): if randint(0, int(mutation_rate)) == 0: h = chromosome.heuristics h[random.choice(list(h.keys()))] = randrange(-1000, 1000) print(chromosome.name, "MUTATED") def random_chromosome(self): return Chromosome({fun: randrange(-1000, 1000) for fun, weight in self.ai.heuristics.items()})
def fitness(individual, seeds, pieceLimit): results = [] for seed in seeds: results.append(TetrisApp(False, seed).run(indiv, pieceLimit)) return int(sum(results)/len(results))