class QLearningAgent(TetrisApp): def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1): self.qval=util.Counter() self.alpha=alpha self.epsilon=epsilon self.discount=gamma self.Tetris= TetrisApp() self.boardprev=0. def observeTransition(self, state,action,nextState,deltaReward): self.episodeRewards += deltaReward self.update(state,action,nextState,deltaReward) # returns 0.0 if new state or the q value if we've seen it, and because # we cant use tuples as keys in a python dict we hash them def getQValue(self, state, action): if hash(str((state, action))) not in self.qval: self.qval[hash(str((state,action)))]=0.0 return self.qval[hash(str((state,action)))] def computeValueFromQValues(self, state): val = 0.0 action=self.computeActionFromQValues(state) if action != None: val= self.getQValue(state,action) return val def computeActionFromQValues(self, state): finalaction=None legalActions = self.Tetris.get_legal_actions(state[1]) if len(legalActions)!=0: maxval= -999999 for action in self.Tetris.get_legal_actions(state[1]): Qval=self.getQValue(state,action) if Qval>=maxval: maxval=Qval finalaction=action return finalaction def helperfunction(self, lst, legalactions): value, action, new_board = lst val = (value + max(self.ideal_place_2(new_board, legalactions,True))[0], action) return val def getAction(self, state): legalActions = self.Tetris.get_legal_actions(state[1]) action = None if len(legalActions)!=0: if util.flipCoin(self.epsilon): valuedict = {} actionlist= self.ideal_place_2(self.Tetris.board, legalActions, False) valuelist = map((lambda x: self.helperfunction(x, legalActions)), actionlist) return max(valuelist)[1] else: action = self.computeActionFromQValues(state) return action def update(self, state, action, nextState, reward): self.qval[hash(str((state,action)))]+= self.alpha*(reward+self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state,action)) def getPolicy(self, state): return self.computeActionFromQValues(state) def getValue(self, state): return self.computeValueFromQValues(state) def run(self,n): key_actions = { 'ESCAPE': self.Tetris.quit, 'LEFT': lambda:self.Tetris.move(-1), 'RIGHT': lambda:self.Tetris.move(+1), 'DOWN': lambda:self.Tetris.drop(True), 'UP': self.Tetris.rotate_stone, 'SPACE': self.Tetris.toggle_pause, 'SPACE': self.Tetris.start_game, 'RETURN': self.Tetris.insta_drop } self.Tetris.board = tetris.new_board() self.boardprev=self.Tetris.board if n< value_iter_rounds: self.epsilon = 1 else: self.epsilon = 1/(15.*math.log(float(n)+1)) self.Tetris.gameover = False self.Tetris.paused = False dont_burn_my_cpu = pygame.time.Clock() rot, col = self.getAction((self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone)) prevboard = self.Tetris.board n+=1 while not(self.Tetris.gameover): self.update((prevboard,self.Tetris.stone), (rot,col), (self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone), self.Tetris.heuristic(self.Tetris.board)) piece = self.Tetris.stone prevboard = tetris.deepishcopy(self.Tetris.board) legalactions = self.Tetris.get_legal_actions(self.Tetris.stone) rot, col =self.getAction((self.Tetris.get_board_state(self.Tetris.board), self.Tetris.stone)) i= 1 while i ==1: self.Tetris.screen.fill((0,0,0)) if self.Tetris.gameover: self.Tetris.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.Tetris.score) if n< 10000: self.Tetris.start_game() else: self.Tetris.quit() else: if self.Tetris.paused: self.Tetris.center_msg("Paused") else: pygame.draw.line(self.Tetris.screen, (255,255,255), (self.Tetris.rlim+1, 0), (self.Tetris.rlim+1, self.Tetris.height-1)) self.Tetris.disp_msg("Next:", ( self.Tetris.rlim+cell_size, 2)) self.Tetris.disp_msg("Score: %d\n\nLevel: %d\ \nLines: %d" % (self.Tetris.score, self.Tetris.level, self.Tetris.lines), (self.Tetris.rlim+cell_size, cell_size*5)) self.Tetris.draw_matrix(self.Tetris.bground_grid, (0,0)) self.Tetris.draw_matrix(self.Tetris.board, (0,0)) self.Tetris.draw_matrix(self.Tetris.stone, (self.Tetris.stone_x, self.Tetris.stone_y)) self.Tetris.draw_matrix(self.Tetris.next_stone, (cols+1,2)) pygame.display.update() self.Tetris.place_brick(rot,col) i= 0 for event in pygame.event.get(): if event.type == pygame.USEREVENT+1: pass elif event.type == pygame.QUIT: self.Tetris.quit() elif event.type == pygame.KEYDOWN: for key in key_actions: if event.key == eval("pygame.K_" +key): key_actions[key]()
class GA(object): def __init__(self): self.num_of_organisms = POPSIZE self.survivors = ELITE self.new_organisms = self.num_of_organisms - self.survivors self.mutation_rate = MUTRATE self.crossover_rate = CROSSRATE #initialize the population self.population = self.InitPop(self.num_of_organisms) #keep track of which organism in the population we are working on self.current_organism = 0 #keeps track of what generation we are on self.current_generation = 0 #GA gets the application self.sequenceType = SEQUENCE self.seed = numpy.random.random() self.app = TetrisApp(self) #GA gets our agent, which needs the organism #so it can access weights of the organism self.ai = Agent(self.app) self.app.ai = self.ai self.cycleStart = 0 self.cycleEnd = 0 self.fitnessDictionary = {} self.lastBest = [] def RandomOrganism(self): nums = [] for j in range(0, 12): a = numpy.random.uniform(LOWERBOUND, UPPERBOUND) nums.append(a) organism = Organism(nums) self.normalize(organism) return organism def InitPop(self, populationSize): #init population with a seed #random.seed(7) population = [] #for each organism in the population #population.append(Organism([-0.25835108880355967, -0.18873479853738032, -0.6081190254748627, -0.5281331622290867, -0.0936639080926526, -0.10826897335053938, 0.15010957868145391, -0.21161009827721672, -0.04113776799016001, 0.2957493369775496, -0.07093022881256028, -0.2586553756116776])) for i in range(0, populationSize): organism = self.RandomOrganism() population.append(organism) #returns a list of a list of 4 bitarraysc\ return population #start running the game def Run(self): with open(RESULTS, 'w') as f: f.write( "\n Cross Type: %s, Selection Type: %s, Crossover Rate: %s, Mutation Rate: %s , Replacement Per Cycle: %s\n Theoretical Line limit: %s " % (CROSSTYPE, SELECTIONTYPE, self.crossover_rate, self.mutation_rate, self.new_organisms, (NUMGAMES * self.app.limit * 4 / 10))) #all heuristics #f.write("Weights: Aggregate Height, Bumpiness, Holes, LinesCleared, Connected Holes, Blockades, Altitude Delta, Weighted Blocks, H-Roughness, V-Roughness, Wells, Biggest Well, Total Height.\n Mutation Rate: %s , Replacement Per Cycle: %s\n" % (self.mutation_rate, self.new_organisms)) self.cycleStart = time.time() self.app.run() def NextAI(self): self.current_organism += 1 #if we have worked on every organism in the current population, get the next #generation self.app.piecesPlayed = 0 if self.current_organism >= self.num_of_organisms: self.current_organism = 0 self.NextGeneration() #this is for if we keep the same sequence for every generation and each organism only plays 1 game - we can use this to skip playing the elites if SEQUENCE == "fixed" and NUMGAMES == 1: while self.current_organism < self.num_of_organisms and tuple( self.population[self.current_organism].heuristics ) in self.fitnessDictionary.keys(): #print("ALREADY SEEN %s" % self.population[self.current_organism].heuristics) self.population[ self.current_organism].fitness = self.fitnessDictionary[ tuple( self.population[self.current_organism].heuristics)] self.current_organism += 1 if self.current_organism >= self.num_of_organisms: self.current_organism = 0 self.NextGeneration() #update the heuristics for the organism we are working on self.ai.heuristics = self.population[self.current_organism].heuristics #handles when a game we are testing the current organism on ends def GameOver(self, lines_cleared): organism = self.population[self.current_organism] organism.fitness += lines_cleared #load the next organism into the algo organism.played += 1 if organism.played == NUMGAMES: self.fitnessDictionary[tuple( organism.heuristics)] = organism.fitness self.NextAI() if SEQUENCE == "fixed": self.app.start_game(self.seed) elif SEQUENCE == "random": self.app.start_game(numpy.random.random()) else: #restart the game self.app.piecesPlayed = 0 if SEQUENCE == "fixed": self.app.start_game(self.seed) elif SEQUENCE == "random": self.app.start_game(numpy.random.random()) #add normalization def normalize(self, org): squared = [] for h in org.heuristics: squared.append(h * h) norm = numpy.sqrt(sum(squared)) for i, weight in enumerate(org.heuristics): org.heuristics[i] /= norm #check if the population has converged -- TOD0 #tournament selection might be more valuable. add this def tournament(self): indices = [i for i in range(0, len(self.population))] #since the population is sorted, just select the two smallest indices from the pool. v1 = None v2 = None x = int(self.num_of_organisms * .1) for a in range(0, x): selected = numpy.random.choice(indices) if v1 == None or selected < v1: v2 = v1 v1 = selected elif v2 == None or selected < v2: v2 = selected return self.population[v1], self.population[v2] #roulette selection def roulette(self): fSum = float(sum([org.fitness for org in self.population])) relativeFitness = [] for x in range(0, len(self.population)): relativeFitness.append(self.population[x].fitness / fSum) #worse organisms are more likey to miss in roulette probs = [ sum(relativeFitness[:i + 1]) for i in range(len(relativeFitness)) ] r = random.random() for i, organism in enumerate(self.population): if r <= probs[i]: return organism def NextGeneration(self): self.population.sort(key=lambda x: x.fitness, reverse=True) averageScore = 0 elite = self.population[:self.survivors] for a in elite: averageScore += a.fitness averageScore = averageScore / len(elite) self.cycleEnd = time.time() - self.cycleStart #print the last generation out with open(RESULTS, 'a') as f: f.write( "\nGeneration: %s, Sequence Type: %s, Cycle Time: %s Elite Average Lines Cleared in %s Games: %s\n" % (self.current_generation, SEQUENCE, str(datetime.timedelta(seconds=self.cycleEnd)), NUMGAMES, averageScore)) for a in self.population: f.write("%s, Age: %s Weights: %s - Lines Cleared:%s\n" % (a.name, a.age, a.heuristics, a.fitness)) for key in self.fitnessDictionary.keys(): if key not in [tuple(org.heuristics) for org in self.population]: del self.fitnessDictionary[key] #increment the generation self.current_generation += 1 #create the new population with only the survivors self.SelectSurvivors() eliteScores = [org.fitness for org in self.population[:self.survivors]] if eliteScores == self.lastBest: self.mutation_rate += .05 #create the new organisms to add to the new_pop #roulette selction if SELECTIONTYPE == "roulette": for x in range(0, self.new_organisms): #select two parents parent1 = self.roulette() parent2 = self.roulette() while parent1 == parent2: parent2 = self.roulette() #print("p1: %s , p2: %s" % (parent1.name, parent2.name)) #create the new organism a = self.Crossover(parent1, parent2) #mutate the children if numpy.random.random() < MUTRATE: self.mutate(a) #add to population self.population.append(a) elif SELECTIONTYPE == "tournament": #tounament selection for x in range(0, self.new_organisms): p1, p2 = self.tournament() while p1 == p2 or p1.fitness == 0 or p2.fitness == 0: p1, p2 = self.tournament() new = self.Crossover(p1, p2) if numpy.random.random() < MUTRATE: self.mutate(new) self.population.append(new) #reset the fitness to 0 for org in self.population: org.played = 0 org.fitness = 0 self.lastBest = eliteScores self.cycleStart = time.time() #check to make sure we have the correct number of organisms in the new #population assert self.num_of_organisms == len( self.population ), "ERROR: new population doesnt the correct number of organisms have %s, want %s" % ( len(self.population), self.num_of_organisms) #Will return the survivors of a population, will return self.survivors number of organisms def SelectSurvivors(self): #sort the population by Organism.fitness self.population.sort(key=lambda x: x.fitness, reverse=True) #kill off amount needed to introduce specified amount of new organisms self.population = self.population[:self.survivors] for organism in self.population: organism.age += 1 #takes two parents and does uniform crossover #returns an Organism def Crossover(self, parent1, parent2): child = [] #two point # add other crossover methods that can be specified at launch and crossover using the CROSSRATE #uniform crossover if CROSSTYPE == "uniform": for x in range(0, len(parent1.heuristics)): if numpy.random.random() < .5: child.append(parent1.heuristics[x]) else: child.append(parent2.heuristics[x]) elif CROSSTYPE == "average": #weighted average crossover a = parent1.fitness b = parent2.fitness for x in range(0, len(parent1.heuristics)): child.append((a * parent1.heuristics[x]) + (b * parent2.heuristics[x])) offspring = Organism(child) #print("CROSSOVER NORMALIZING %s" % offspring.heuristics) self.normalize(offspring) return offspring #mutates the weights of a chromosome def mutate(self, organism): #mutation range of -.2 to +.2 mutation = numpy.random.random() * .4 - .2 #choose a random weight to mutate x = numpy.random.randint(0, 12) organism.heuristics[x] += mutation self.normalize(organism)
class GeneticAlgorithms(object): def __init__(self): self.app = TetrisApp(self) self.ai = AI(self.app) self.app.ai = self.ai self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)] self.current_chromosome = 0 self.current_generation = 1 self.ai.heuristics = self.population[self.current_chromosome].heuristics def run(self): self.app.run() def next_ai(self): self.current_chromosome += 1 if self.current_chromosome >= POPULATION_SIZE: self.current_chromosome = 0 self.next_generation() self.ai.heuristics = self.population[self.current_chromosome].heuristics def on_game_over(self, score): chromosome = self.population[self.current_chromosome] chromosome.games += 1 chromosome.total_fitness += score if chromosome.games % GAMES_TO_AVG == 0: self.next_ai() self.app.start_game() def population_has_converged(self): t = CONVERGED_THRESHOLD pop = self.population return all(all(pop[0].heuristics[f]-t < w < pop[0].heuristics[f]+t for f, w in c.heuristics.items()) for c in pop) def next_generation(self): print("__________________\n") if self.population_has_converged(): print("Population has converged on generation %s.\n values: %s" % (self.current_generation, [(f.__name__, w) for f, w in self.population[0].heuristics.items()])) sys.exit() print("GENERATION %s COMPLETE" % self.current_generation) print("AVG FITNESS", sum([c.avg_fitness() for c in self.population]) / POPULATION_SIZE) self.current_generation += 1 for c in self.population: print("chromosome", c.name, "fitness", c.avg_fitness()) best_chromosome = max(self.population, key=lambda c: c.avg_fitness()) print("Fittest chromosome:", best_chromosome.name, "fitness", best_chromosome.avg_fitness(), "\n%s" % [(f.__name__, w) for f, w in best_chromosome.heuristics.items()]) print("\nEVOLUTION") new_population = self.selection(SURVIVORS_PER_GENERATION, SELECTION_METHOD) for c in new_population: print("chromosome", c.name, "fitness", c.avg_fitness(), "SURVIVED") for _ in range(NEWBORNS_PER_GENERATION): parents = self.selection(2, SELECTION_METHOD) new_population.append(self.crossover(parents[0], parents[1], CROSSOVER_METHOD)) print(parents[0].name, "and", parents[1].name, "PRODUCED", new_population[-1].name) for _ in range(MUTATION_PASSES): for chromosome in new_population: self.mutation(chromosome, MUTATION_RATE / MUTATION_PASSES) print("__________________\n") assert len(new_population) == len(self.population), "SURVIVORS_PER_GENERATION + NEWBORNS_PER_GENERATION != POPULATION_SIZE" self.population = new_population def selection(self, num_selected, method): def roulette(population): total_fitness = sum([c.avg_fitness() for c in population]) winner = randrange(int(total_fitness)) fitness_so_far = 0 for chromosome in population: fitness_so_far += chromosome.avg_fitness() if fitness_so_far > winner: return chromosome if method == SelectionMethod.roulette: survivors = [] for _ in range(num_selected): survivors.append(roulette([c for c in self.population if c not in survivors])) return survivors raise ValueError('SelectionMethod %s not implemented' % method) def crossover(self, c1, c2, method): def random_attributes(): heuristics = {} for fun, _ in c1.heuristics.items(): heuristics[fun] = random.choice((c1, c2)).heuristics[fun] return Chromosome(heuristics) def average_attributes(): heuristics = {} for fun, _ in c1.heuristics.items(): heuristics[fun] = (c1.heuristics[fun] + c2.heuristics[fun]) / 2 return Chromosome(heuristics) if method == CrossoverMethod.random_attributes: return random_attributes() if method == CrossoverMethod.average_attributes: return average_attributes() raise ValueError('CrossoverMethod %s not implemented' % method) def mutation(self, chromosome, mutation_rate): if randint(0, int(mutation_rate)) == 0: h = chromosome.heuristics h[random.choice(list(h.keys()))] = randrange(-1000, 1000) print(chromosome.name, "MUTATED") def random_chromosome(self): return Chromosome({fun: randrange(-1000, 1000) for fun, weight in self.ai.heuristics.items()})