class QLearningAgent(TetrisApp):
    def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1):
        self.qval=util.Counter()
        self.alpha=alpha
        self.epsilon=epsilon
        self.discount=gamma
        self.Tetris= TetrisApp()
        self.boardprev=0.

    def observeTransition(self, state,action,nextState,deltaReward):
        self.episodeRewards += deltaReward
        self.update(state,action,nextState,deltaReward)

    # returns 0.0 if new state or the q value if we've seen it, and because
    # we cant use tuples as keys in a python dict we hash them
    def getQValue(self, state, action):
        if hash(str((state, action))) not in self.qval:
          self.qval[hash(str((state,action)))]=0.0
        return self.qval[hash(str((state,action)))]

    def computeValueFromQValues(self, state):
        val = 0.0
        action=self.computeActionFromQValues(state)
        if action != None:
          val= self.getQValue(state,action)
        return val

    def computeActionFromQValues(self, state):
        finalaction=None
        legalActions = self.Tetris.get_legal_actions(state[1])

        if len(legalActions)!=0:
          maxval= -999999
          for action in self.Tetris.get_legal_actions(state[1]):
            Qval=self.getQValue(state,action)
            if Qval>=maxval:
              maxval=Qval
              finalaction=action
        return finalaction

    def helperfunction(self, lst, legalactions):
      value, action, new_board = lst
      val = (value + max(self.ideal_place_2(new_board, legalactions,True))[0], action)
      return val


    def getAction(self, state):
        legalActions = self.Tetris.get_legal_actions(state[1])
        action = None
        if len(legalActions)!=0:
              if util.flipCoin(self.epsilon):
                valuedict = {}
                actionlist= self.ideal_place_2(self.Tetris.board, legalActions, False)
                valuelist = map((lambda x: self.helperfunction(x, legalActions)), actionlist)
                return max(valuelist)[1]
              else:
                action = self.computeActionFromQValues(state)
        return action
        
    def update(self, state, action, nextState, reward):
        self.qval[hash(str((state,action)))]+= self.alpha*(reward+self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state,action))
  
    def getPolicy(self, state):
        return self.computeActionFromQValues(state)

    def getValue(self, state):
        return self.computeValueFromQValues(state)

    def run(self,n):
      key_actions = {
        'ESCAPE': self.Tetris.quit,
        'LEFT':   lambda:self.Tetris.move(-1),
        'RIGHT':    lambda:self.Tetris.move(+1),
        'DOWN':   lambda:self.Tetris.drop(True),
        'UP':   self.Tetris.rotate_stone,
        'SPACE':    self.Tetris.toggle_pause,
        'SPACE':  self.Tetris.start_game,
        'RETURN': self.Tetris.insta_drop
      }

      self.Tetris.board = tetris.new_board()
      self.boardprev=self.Tetris.board

      if n< value_iter_rounds:
        self.epsilon = 1
      else:
        self.epsilon = 1/(15.*math.log(float(n)+1))
  
      self.Tetris.gameover = False
      self.Tetris.paused = False
      
      dont_burn_my_cpu = pygame.time.Clock()
      rot, col = self.getAction((self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone))
      prevboard = self.Tetris.board
      n+=1
      while not(self.Tetris.gameover):
        self.update((prevboard,self.Tetris.stone), (rot,col), (self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone), self.Tetris.heuristic(self.Tetris.board)) 
        piece = self.Tetris.stone
        prevboard = tetris.deepishcopy(self.Tetris.board)
        legalactions = self.Tetris.get_legal_actions(self.Tetris.stone)
        rot, col =self.getAction((self.Tetris.get_board_state(self.Tetris.board), self.Tetris.stone))
        i= 1
        while i ==1:
          self.Tetris.screen.fill((0,0,0))
          if self.Tetris.gameover:
            self.Tetris.center_msg("""Game Over!\nYour score: %d
    Press space to continue""" % self.Tetris.score)
            if n< 10000:
              self.Tetris.start_game()
            else: 
              self.Tetris.quit()
          else:
            if self.Tetris.paused:
              self.Tetris.center_msg("Paused")
            else:
              pygame.draw.line(self.Tetris.screen,
                (255,255,255),
                (self.Tetris.rlim+1, 0),
                (self.Tetris.rlim+1, self.Tetris.height-1))
              self.Tetris.disp_msg("Next:", (
                self.Tetris.rlim+cell_size,
                2))
              self.Tetris.disp_msg("Score: %d\n\nLevel: %d\
    \nLines: %d" % (self.Tetris.score, self.Tetris.level, self.Tetris.lines),
                (self.Tetris.rlim+cell_size, cell_size*5))
              self.Tetris.draw_matrix(self.Tetris.bground_grid, (0,0))
              self.Tetris.draw_matrix(self.Tetris.board, (0,0))
              self.Tetris.draw_matrix(self.Tetris.stone,
                (self.Tetris.stone_x, self.Tetris.stone_y))
              self.Tetris.draw_matrix(self.Tetris.next_stone,
                (cols+1,2))
          pygame.display.update()



          self.Tetris.place_brick(rot,col)
          i= 0
          for event in pygame.event.get():
            if event.type == pygame.USEREVENT+1:
              pass
            elif event.type == pygame.QUIT:
              self.Tetris.quit()
            elif event.type == pygame.KEYDOWN:
              for key in key_actions:
                if event.key == eval("pygame.K_"
                +key):
                  key_actions[key]()
Esempio n. 2
0
class GA(object):
    def __init__(self):
        self.num_of_organisms = POPSIZE
        self.survivors = ELITE
        self.new_organisms = self.num_of_organisms - self.survivors
        self.mutation_rate = MUTRATE
        self.crossover_rate = CROSSRATE
        #initialize the population
        self.population = self.InitPop(self.num_of_organisms)
        #keep track of which organism in the population we are working on
        self.current_organism = 0
        #keeps track of what generation we are on
        self.current_generation = 0
        #GA gets the application
        self.sequenceType = SEQUENCE
        self.seed = numpy.random.random()
        self.app = TetrisApp(self)
        #GA gets our agent, which needs the organism
        #so it can access weights of the organism
        self.ai = Agent(self.app)
        self.app.ai = self.ai
        self.cycleStart = 0
        self.cycleEnd = 0
        self.fitnessDictionary = {}
        self.lastBest = []

    def RandomOrganism(self):
        nums = []
        for j in range(0, 12):
            a = numpy.random.uniform(LOWERBOUND, UPPERBOUND)
            nums.append(a)
        organism = Organism(nums)
        self.normalize(organism)
        return organism

    def InitPop(self, populationSize):
        #init population with a seed
        #random.seed(7)
        population = []
        #for each organism in the population
        #population.append(Organism([-0.25835108880355967, -0.18873479853738032, -0.6081190254748627, -0.5281331622290867, -0.0936639080926526, -0.10826897335053938, 0.15010957868145391, -0.21161009827721672, -0.04113776799016001, 0.2957493369775496, -0.07093022881256028, -0.2586553756116776]))
        for i in range(0, populationSize):
            organism = self.RandomOrganism()
            population.append(organism)
        #returns a list of a list of 4 bitarraysc\
        return population

    #start running the game
    def Run(self):
        with open(RESULTS, 'w') as f:
            f.write(
                "\n Cross Type: %s, Selection Type: %s, Crossover Rate: %s, Mutation Rate: %s , Replacement Per Cycle: %s\n Theoretical Line limit: %s "
                % (CROSSTYPE, SELECTIONTYPE, self.crossover_rate,
                   self.mutation_rate, self.new_organisms,
                   (NUMGAMES * self.app.limit * 4 / 10)))
            #all heuristics
            #f.write("Weights: Aggregate Height, Bumpiness, Holes, LinesCleared, Connected Holes, Blockades, Altitude Delta, Weighted Blocks, H-Roughness, V-Roughness, Wells, Biggest Well, Total Height.\n Mutation Rate: %s , Replacement Per Cycle: %s\n" % (self.mutation_rate, self.new_organisms))
        self.cycleStart = time.time()
        self.app.run()

    def NextAI(self):
        self.current_organism += 1
        #if we have worked on every organism in the current population, get the next
        #generation
        self.app.piecesPlayed = 0
        if self.current_organism >= self.num_of_organisms:
            self.current_organism = 0
            self.NextGeneration()

        #this is for if we keep the same sequence for every generation and each organism only plays 1 game - we can use this to skip playing the elites
        if SEQUENCE == "fixed" and NUMGAMES == 1:
            while self.current_organism < self.num_of_organisms and tuple(
                    self.population[self.current_organism].heuristics
            ) in self.fitnessDictionary.keys():
                #print("ALREADY SEEN %s" % self.population[self.current_organism].heuristics)
                self.population[
                    self.current_organism].fitness = self.fitnessDictionary[
                        tuple(
                            self.population[self.current_organism].heuristics)]
                self.current_organism += 1

        if self.current_organism >= self.num_of_organisms:
            self.current_organism = 0
            self.NextGeneration()

        #update the heuristics for the organism we are working on
        self.ai.heuristics = self.population[self.current_organism].heuristics

    #handles when a game we are testing the current organism on ends
    def GameOver(self, lines_cleared):
        organism = self.population[self.current_organism]
        organism.fitness += lines_cleared
        #load the next organism into the algo
        organism.played += 1
        if organism.played == NUMGAMES:
            self.fitnessDictionary[tuple(
                organism.heuristics)] = organism.fitness
            self.NextAI()
            if SEQUENCE == "fixed":
                self.app.start_game(self.seed)
            elif SEQUENCE == "random":
                self.app.start_game(numpy.random.random())
        else:
            #restart the game
            self.app.piecesPlayed = 0
            if SEQUENCE == "fixed":
                self.app.start_game(self.seed)
            elif SEQUENCE == "random":
                self.app.start_game(numpy.random.random())

    #add normalization
    def normalize(self, org):
        squared = []
        for h in org.heuristics:
            squared.append(h * h)
        norm = numpy.sqrt(sum(squared))
        for i, weight in enumerate(org.heuristics):
            org.heuristics[i] /= norm

    #check if the population has converged -- TOD0

    #tournament selection might be more valuable. add this
    def tournament(self):
        indices = [i for i in range(0, len(self.population))]
        #since the population is sorted, just select the two smallest indices from the pool.
        v1 = None
        v2 = None

        x = int(self.num_of_organisms * .1)

        for a in range(0, x):
            selected = numpy.random.choice(indices)
            if v1 == None or selected < v1:
                v2 = v1
                v1 = selected
            elif v2 == None or selected < v2:
                v2 = selected

        return self.population[v1], self.population[v2]

    #roulette selection
    def roulette(self):
        fSum = float(sum([org.fitness for org in self.population]))
        relativeFitness = []
        for x in range(0, len(self.population)):
            relativeFitness.append(self.population[x].fitness / fSum)
        #worse organisms are more likey to miss in roulette
        probs = [
            sum(relativeFitness[:i + 1]) for i in range(len(relativeFitness))
        ]
        r = random.random()
        for i, organism in enumerate(self.population):
            if r <= probs[i]:
                return organism

    def NextGeneration(self):
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        averageScore = 0
        elite = self.population[:self.survivors]
        for a in elite:
            averageScore += a.fitness
        averageScore = averageScore / len(elite)

        self.cycleEnd = time.time() - self.cycleStart

        #print the last generation out
        with open(RESULTS, 'a') as f:
            f.write(
                "\nGeneration: %s,  Sequence Type: %s, Cycle Time: %s  Elite Average Lines Cleared in %s Games: %s\n"
                % (self.current_generation, SEQUENCE,
                   str(datetime.timedelta(seconds=self.cycleEnd)), NUMGAMES,
                   averageScore))

            for a in self.population:
                f.write("%s, Age: %s Weights: %s - Lines Cleared:%s\n" %
                        (a.name, a.age, a.heuristics, a.fitness))

        for key in self.fitnessDictionary.keys():
            if key not in [tuple(org.heuristics) for org in self.population]:
                del self.fitnessDictionary[key]

        #increment the generation
        self.current_generation += 1
        #create the new population with only the survivors
        self.SelectSurvivors()

        eliteScores = [org.fitness for org in self.population[:self.survivors]]
        if eliteScores == self.lastBest:
            self.mutation_rate += .05
        #create the new organisms to add to the new_pop

    #roulette selction
        if SELECTIONTYPE == "roulette":
            for x in range(0, self.new_organisms):
                #select two parents
                parent1 = self.roulette()
                parent2 = self.roulette()
                while parent1 == parent2:
                    parent2 = self.roulette()
                #print("p1: %s , p2: %s" % (parent1.name, parent2.name))
                #create the new organism
                a = self.Crossover(parent1, parent2)
                #mutate the children
                if numpy.random.random() < MUTRATE:
                    self.mutate(a)
                #add to population
                self.population.append(a)
        elif SELECTIONTYPE == "tournament":
            #tounament selection
            for x in range(0, self.new_organisms):
                p1, p2 = self.tournament()
                while p1 == p2 or p1.fitness == 0 or p2.fitness == 0:
                    p1, p2 = self.tournament()
                new = self.Crossover(p1, p2)
                if numpy.random.random() < MUTRATE:
                    self.mutate(new)
                self.population.append(new)

        #reset the fitness to 0
        for org in self.population:
            org.played = 0
            org.fitness = 0

        self.lastBest = eliteScores
        self.cycleStart = time.time()

        #check to make sure we have the correct number of organisms in the new
        #population
        assert self.num_of_organisms == len(
            self.population
        ), "ERROR: new population doesnt the correct number of organisms have %s, want %s" % (
            len(self.population), self.num_of_organisms)

    #Will return the survivors of a population, will return self.survivors number of organisms

    def SelectSurvivors(self):
        #sort the population by Organism.fitness
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        #kill off amount needed to introduce specified amount of new organisms
        self.population = self.population[:self.survivors]
        for organism in self.population:
            organism.age += 1

    #takes two parents and does uniform crossover
    #returns an Organism
    def Crossover(self, parent1, parent2):
        child = []
        #two point
        # add other crossover methods that can be specified at launch and crossover using the CROSSRATE
        #uniform crossover
        if CROSSTYPE == "uniform":
            for x in range(0, len(parent1.heuristics)):
                if numpy.random.random() < .5:
                    child.append(parent1.heuristics[x])
                else:
                    child.append(parent2.heuristics[x])

        elif CROSSTYPE == "average":
            #weighted average crossover
            a = parent1.fitness
            b = parent2.fitness
            for x in range(0, len(parent1.heuristics)):
                child.append((a * parent1.heuristics[x]) +
                             (b * parent2.heuristics[x]))

        offspring = Organism(child)
        #print("CROSSOVER NORMALIZING %s" % offspring.heuristics)
        self.normalize(offspring)
        return offspring

    #mutates the weights of a chromosome
    def mutate(self, organism):
        #mutation range of -.2 to +.2
        mutation = numpy.random.random() * .4 - .2
        #choose a random weight to mutate
        x = numpy.random.randint(0, 12)
        organism.heuristics[x] += mutation
        self.normalize(organism)
class GeneticAlgorithms(object):
	def __init__(self):
		self.app = TetrisApp(self)
		self.ai = AI(self.app)
		self.app.ai = self.ai
		self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)]
		self.current_chromosome = 0
		self.current_generation = 1
		self.ai.heuristics = self.population[self.current_chromosome].heuristics

	def run(self):
		self.app.run()

	def next_ai(self):
		self.current_chromosome += 1
		if self.current_chromosome >= POPULATION_SIZE:
			self.current_chromosome = 0
			self.next_generation()
		self.ai.heuristics = self.population[self.current_chromosome].heuristics
	
	def on_game_over(self, score):
		chromosome = self.population[self.current_chromosome]
		chromosome.games += 1
		chromosome.total_fitness += score
		if chromosome.games % GAMES_TO_AVG == 0:
			self.next_ai()
		self.app.start_game()

	def population_has_converged(self):
		t = CONVERGED_THRESHOLD
		pop = self.population
		return all(all(pop[0].heuristics[f]-t < w < pop[0].heuristics[f]+t for f, w in c.heuristics.items()) for c in pop)

	def next_generation(self):
		print("__________________\n")
		if self.population_has_converged():
			print("Population has converged on generation %s.\n values: %s" 
				% (self.current_generation, [(f.__name__, w) for f, w in self.population[0].heuristics.items()]))
			sys.exit()
		print("GENERATION %s COMPLETE" % self.current_generation)
		print("AVG FITNESS", sum([c.avg_fitness() for c in self.population]) / POPULATION_SIZE)
		self.current_generation += 1
		for c in self.population:
			print("chromosome", c.name, "fitness", c.avg_fitness())
		best_chromosome = max(self.population, key=lambda c: c.avg_fitness())
		print("Fittest chromosome:", best_chromosome.name, "fitness", best_chromosome.avg_fitness(), "\n%s" % [(f.__name__, w) for f, w in best_chromosome.heuristics.items()])

		print("\nEVOLUTION")
		new_population = self.selection(SURVIVORS_PER_GENERATION, SELECTION_METHOD)
		for c in new_population:
			print("chromosome", c.name, "fitness", c.avg_fitness(), "SURVIVED")
		for _ in range(NEWBORNS_PER_GENERATION):
			parents = self.selection(2, SELECTION_METHOD)
			new_population.append(self.crossover(parents[0], parents[1], CROSSOVER_METHOD))
			print(parents[0].name, "and", parents[1].name, "PRODUCED", new_population[-1].name)
		for _ in range(MUTATION_PASSES):
			for chromosome in new_population:
				self.mutation(chromosome, MUTATION_RATE / MUTATION_PASSES)
		print("__________________\n")
		assert len(new_population) == len(self.population), "SURVIVORS_PER_GENERATION + NEWBORNS_PER_GENERATION != POPULATION_SIZE"
		self.population = new_population

	def selection(self, num_selected, method):
		def roulette(population):
			total_fitness = sum([c.avg_fitness() for c in population])
			winner = randrange(int(total_fitness))
			fitness_so_far = 0
			for chromosome in population:
				fitness_so_far += chromosome.avg_fitness()
				if fitness_so_far > winner:
					return chromosome
		
		if method == SelectionMethod.roulette:
			survivors = []
			for _ in range(num_selected):
				survivors.append(roulette([c for c in self.population if c not in survivors]))
			return survivors

		raise ValueError('SelectionMethod %s not implemented' % method)

	def crossover(self, c1, c2, method):
		def random_attributes():
			heuristics = {}
			for fun, _ in c1.heuristics.items():
				heuristics[fun] = random.choice((c1, c2)).heuristics[fun]
			return Chromosome(heuristics)

		def average_attributes():
			heuristics = {}
			for fun, _ in c1.heuristics.items():
				heuristics[fun] = (c1.heuristics[fun] + c2.heuristics[fun]) / 2
			return Chromosome(heuristics)			

		if method == CrossoverMethod.random_attributes:
			return random_attributes()
		if method == CrossoverMethod.average_attributes:
			return average_attributes()
		raise ValueError('CrossoverMethod %s not implemented' % method)

	def mutation(self, chromosome, mutation_rate):
		if randint(0, int(mutation_rate)) == 0:
			h = chromosome.heuristics
			h[random.choice(list(h.keys()))] = randrange(-1000, 1000)
			print(chromosome.name, "MUTATED")

	def random_chromosome(self):
		return Chromosome({fun: randrange(-1000, 1000) for fun, weight in self.ai.heuristics.items()})