Esempio n. 1
0
    def __init__(self, actor_id, args):
        self.tetris = TetrisApp(emulator=True)

        self.legal_actions = [0, 1, 2, 3, 4]
        self.screen_width, self.screen_height = 288, 396
        self.lives = 1

        self.random_start = args.random_start
        self.single_life_episodes = args.single_life_episodes
        self.call_on_new_frame = args.visualize
        self.global_step = 0

        self.compteur = 0

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.rgb = args.rgb
        self.depth = 1
        if self.rgb: self.depth = 3
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width, self.depth),
                     dtype=np.uint8), self.__process_frame_pool)
        self.observation_pool = ObservationPool(
            np.zeros((IMG_SIZE_X, IMG_SIZE_Y, self.depth, NR_IMAGES),
                     dtype=np.uint8), self.rgb)
	def __init__(self):
		self.app = TetrisApp(self)
		self.ai = AI(self.app)
		self.app.ai = self.ai
		self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)]
		self.current_chromosome = 0
		self.current_generation = 1
		self.ai.heuristics = self.population[self.current_chromosome].heuristics
 def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1):
     self.qval=util.Counter()
     self.alpha=alpha
     self.epsilon=epsilon
     self.discount=gamma
     self.Tetris= TetrisApp()
     self.boardprev=0.
Esempio n. 4
0
 def __init__(self):
     self.num_of_organisms = POPSIZE
     self.survivors = ELITE
     self.new_organisms = self.num_of_organisms - self.survivors
     self.mutation_rate = MUTRATE
     self.crossover_rate = CROSSRATE
     #initialize the population
     self.population = self.InitPop(self.num_of_organisms)
     #keep track of which organism in the population we are working on
     self.current_organism = 0
     #keeps track of what generation we are on
     self.current_generation = 0
     #GA gets the application
     self.sequenceType = SEQUENCE
     self.seed = numpy.random.random()
     self.app = TetrisApp(self)
     #GA gets our agent, which needs the organism
     #so it can access weights of the organism
     self.ai = Agent(self.app)
     self.app.ai = self.ai
     self.cycleStart = 0
     self.cycleEnd = 0
     self.fitnessDictionary = {}
     self.lastBest = []
    def getAction(self, state):
        """
        Returns the next action in the path chosen earlier (in
        registerInitialState). Returns gameover if no more actions to take.
        state = a GameState object (TetrisSearch.py)

        """
        dont_burn_my_cpu = pygame.time.Clock()
        key_actions = {
            'ESCAPE': state.quit,
            'LEFT': lambda: state.move(-1),
            'RIGHT': lambda: state.move(+1),
            'DOWN': lambda: state.drop(True),
            'UP': state.rotate_stone,
            'p': state.toggle_pause,
            'SPACE': state.start_game,
            'RETURN': state.insta_drop
        }

        self.actionIndex = 0
        i = self.actionIndex
        self.actionIndex += 1
        for event in pygame.eveng.get():
            if event.type == pygame.USEREVENT + 1:
                self.drop(False)
            elif event.type == pygame.QUIT:
                self.quit()
            elif i < len(self.actions):
                for key in key_actions:
                    if self.actions[i] == eval(key):
                        return key_actions[key]()
                if event.type == pygame.USEREVENT + 1:
                    self.drop(False)
                elif event.type == pygame.QUIT:
                    self.quit()
        else:
            return TetrisApp.quit()
Esempio n. 6
0
    # Setup to grab statistics
    #   - Max, mean, min, std, variance
    # Setup containers for highest score and highest scoring individual
    # Create a Genetic Algorithm loop
    #   - This loop will achieve the following:
    #       a. Select and clone the next generation individuals
    #       b. Apply crossover and mutation on the offspring
    #       c. Replace population with offspring
    #------------------------------------------------------------
    n_gen = 50
    # n_gen = 100
    prob_xover = 0.3
    prob_mut = 0.05
    pop = toolbox.population(n=25)
    # pop = toolbox.population(n=1000)
    game = TetrisApp(training=True)
    best_ind = []
    best_score = -1

    max_out  = open("max50.txt", "w")
    mean_out = open("mean50.txt", "w")
    min_out  = open("min50.txt", "w")
    std_out  = open("std50.txt", "w")
    var_out  = open("var50.txt", "w")

    for g in range(1, n_gen + 1):
        print("Current Generation " + str(g))
        scores = []
        for ind in pop:
            score = game.run_train(ind)
            scores.append(score)
Esempio n. 7
0
    def run_game(self, player, seed=10000, debug=False):

        App = TetrisApp(player, seed=seed, debug=debug)
        score = App.run()
        return score
Esempio n. 8
0
class GA(object):
    def __init__(self):
        self.num_of_organisms = POPSIZE
        self.survivors = ELITE
        self.new_organisms = self.num_of_organisms - self.survivors
        self.mutation_rate = MUTRATE
        self.crossover_rate = CROSSRATE
        #initialize the population
        self.population = self.InitPop(self.num_of_organisms)
        #keep track of which organism in the population we are working on
        self.current_organism = 0
        #keeps track of what generation we are on
        self.current_generation = 0
        #GA gets the application
        self.sequenceType = SEQUENCE
        self.seed = numpy.random.random()
        self.app = TetrisApp(self)
        #GA gets our agent, which needs the organism
        #so it can access weights of the organism
        self.ai = Agent(self.app)
        self.app.ai = self.ai
        self.cycleStart = 0
        self.cycleEnd = 0
        self.fitnessDictionary = {}
        self.lastBest = []

    def RandomOrganism(self):
        nums = []
        for j in range(0, 12):
            a = numpy.random.uniform(LOWERBOUND, UPPERBOUND)
            nums.append(a)
        organism = Organism(nums)
        self.normalize(organism)
        return organism

    def InitPop(self, populationSize):
        #init population with a seed
        #random.seed(7)
        population = []
        #for each organism in the population
        #population.append(Organism([-0.25835108880355967, -0.18873479853738032, -0.6081190254748627, -0.5281331622290867, -0.0936639080926526, -0.10826897335053938, 0.15010957868145391, -0.21161009827721672, -0.04113776799016001, 0.2957493369775496, -0.07093022881256028, -0.2586553756116776]))
        for i in range(0, populationSize):
            organism = self.RandomOrganism()
            population.append(organism)
        #returns a list of a list of 4 bitarraysc\
        return population

    #start running the game
    def Run(self):
        with open(RESULTS, 'w') as f:
            f.write(
                "\n Cross Type: %s, Selection Type: %s, Crossover Rate: %s, Mutation Rate: %s , Replacement Per Cycle: %s\n Theoretical Line limit: %s "
                % (CROSSTYPE, SELECTIONTYPE, self.crossover_rate,
                   self.mutation_rate, self.new_organisms,
                   (NUMGAMES * self.app.limit * 4 / 10)))
            #all heuristics
            #f.write("Weights: Aggregate Height, Bumpiness, Holes, LinesCleared, Connected Holes, Blockades, Altitude Delta, Weighted Blocks, H-Roughness, V-Roughness, Wells, Biggest Well, Total Height.\n Mutation Rate: %s , Replacement Per Cycle: %s\n" % (self.mutation_rate, self.new_organisms))
        self.cycleStart = time.time()
        self.app.run()

    def NextAI(self):
        self.current_organism += 1
        #if we have worked on every organism in the current population, get the next
        #generation
        self.app.piecesPlayed = 0
        if self.current_organism >= self.num_of_organisms:
            self.current_organism = 0
            self.NextGeneration()

        #this is for if we keep the same sequence for every generation and each organism only plays 1 game - we can use this to skip playing the elites
        if SEQUENCE == "fixed" and NUMGAMES == 1:
            while self.current_organism < self.num_of_organisms and tuple(
                    self.population[self.current_organism].heuristics
            ) in self.fitnessDictionary.keys():
                #print("ALREADY SEEN %s" % self.population[self.current_organism].heuristics)
                self.population[
                    self.current_organism].fitness = self.fitnessDictionary[
                        tuple(
                            self.population[self.current_organism].heuristics)]
                self.current_organism += 1

        if self.current_organism >= self.num_of_organisms:
            self.current_organism = 0
            self.NextGeneration()

        #update the heuristics for the organism we are working on
        self.ai.heuristics = self.population[self.current_organism].heuristics

    #handles when a game we are testing the current organism on ends
    def GameOver(self, lines_cleared):
        organism = self.population[self.current_organism]
        organism.fitness += lines_cleared
        #load the next organism into the algo
        organism.played += 1
        if organism.played == NUMGAMES:
            self.fitnessDictionary[tuple(
                organism.heuristics)] = organism.fitness
            self.NextAI()
            if SEQUENCE == "fixed":
                self.app.start_game(self.seed)
            elif SEQUENCE == "random":
                self.app.start_game(numpy.random.random())
        else:
            #restart the game
            self.app.piecesPlayed = 0
            if SEQUENCE == "fixed":
                self.app.start_game(self.seed)
            elif SEQUENCE == "random":
                self.app.start_game(numpy.random.random())

    #add normalization
    def normalize(self, org):
        squared = []
        for h in org.heuristics:
            squared.append(h * h)
        norm = numpy.sqrt(sum(squared))
        for i, weight in enumerate(org.heuristics):
            org.heuristics[i] /= norm

    #check if the population has converged -- TOD0

    #tournament selection might be more valuable. add this
    def tournament(self):
        indices = [i for i in range(0, len(self.population))]
        #since the population is sorted, just select the two smallest indices from the pool.
        v1 = None
        v2 = None

        x = int(self.num_of_organisms * .1)

        for a in range(0, x):
            selected = numpy.random.choice(indices)
            if v1 == None or selected < v1:
                v2 = v1
                v1 = selected
            elif v2 == None or selected < v2:
                v2 = selected

        return self.population[v1], self.population[v2]

    #roulette selection
    def roulette(self):
        fSum = float(sum([org.fitness for org in self.population]))
        relativeFitness = []
        for x in range(0, len(self.population)):
            relativeFitness.append(self.population[x].fitness / fSum)
        #worse organisms are more likey to miss in roulette
        probs = [
            sum(relativeFitness[:i + 1]) for i in range(len(relativeFitness))
        ]
        r = random.random()
        for i, organism in enumerate(self.population):
            if r <= probs[i]:
                return organism

    def NextGeneration(self):
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        averageScore = 0
        elite = self.population[:self.survivors]
        for a in elite:
            averageScore += a.fitness
        averageScore = averageScore / len(elite)

        self.cycleEnd = time.time() - self.cycleStart

        #print the last generation out
        with open(RESULTS, 'a') as f:
            f.write(
                "\nGeneration: %s,  Sequence Type: %s, Cycle Time: %s  Elite Average Lines Cleared in %s Games: %s\n"
                % (self.current_generation, SEQUENCE,
                   str(datetime.timedelta(seconds=self.cycleEnd)), NUMGAMES,
                   averageScore))

            for a in self.population:
                f.write("%s, Age: %s Weights: %s - Lines Cleared:%s\n" %
                        (a.name, a.age, a.heuristics, a.fitness))

        for key in self.fitnessDictionary.keys():
            if key not in [tuple(org.heuristics) for org in self.population]:
                del self.fitnessDictionary[key]

        #increment the generation
        self.current_generation += 1
        #create the new population with only the survivors
        self.SelectSurvivors()

        eliteScores = [org.fitness for org in self.population[:self.survivors]]
        if eliteScores == self.lastBest:
            self.mutation_rate += .05
        #create the new organisms to add to the new_pop

    #roulette selction
        if SELECTIONTYPE == "roulette":
            for x in range(0, self.new_organisms):
                #select two parents
                parent1 = self.roulette()
                parent2 = self.roulette()
                while parent1 == parent2:
                    parent2 = self.roulette()
                #print("p1: %s , p2: %s" % (parent1.name, parent2.name))
                #create the new organism
                a = self.Crossover(parent1, parent2)
                #mutate the children
                if numpy.random.random() < MUTRATE:
                    self.mutate(a)
                #add to population
                self.population.append(a)
        elif SELECTIONTYPE == "tournament":
            #tounament selection
            for x in range(0, self.new_organisms):
                p1, p2 = self.tournament()
                while p1 == p2 or p1.fitness == 0 or p2.fitness == 0:
                    p1, p2 = self.tournament()
                new = self.Crossover(p1, p2)
                if numpy.random.random() < MUTRATE:
                    self.mutate(new)
                self.population.append(new)

        #reset the fitness to 0
        for org in self.population:
            org.played = 0
            org.fitness = 0

        self.lastBest = eliteScores
        self.cycleStart = time.time()

        #check to make sure we have the correct number of organisms in the new
        #population
        assert self.num_of_organisms == len(
            self.population
        ), "ERROR: new population doesnt the correct number of organisms have %s, want %s" % (
            len(self.population), self.num_of_organisms)

    #Will return the survivors of a population, will return self.survivors number of organisms

    def SelectSurvivors(self):
        #sort the population by Organism.fitness
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        #kill off amount needed to introduce specified amount of new organisms
        self.population = self.population[:self.survivors]
        for organism in self.population:
            organism.age += 1

    #takes two parents and does uniform crossover
    #returns an Organism
    def Crossover(self, parent1, parent2):
        child = []
        #two point
        # add other crossover methods that can be specified at launch and crossover using the CROSSRATE
        #uniform crossover
        if CROSSTYPE == "uniform":
            for x in range(0, len(parent1.heuristics)):
                if numpy.random.random() < .5:
                    child.append(parent1.heuristics[x])
                else:
                    child.append(parent2.heuristics[x])

        elif CROSSTYPE == "average":
            #weighted average crossover
            a = parent1.fitness
            b = parent2.fitness
            for x in range(0, len(parent1.heuristics)):
                child.append((a * parent1.heuristics[x]) +
                             (b * parent2.heuristics[x]))

        offspring = Organism(child)
        #print("CROSSOVER NORMALIZING %s" % offspring.heuristics)
        self.normalize(offspring)
        return offspring

    #mutates the weights of a chromosome
    def mutate(self, organism):
        #mutation range of -.2 to +.2
        mutation = numpy.random.random() * .4 - .2
        #choose a random weight to mutate
        x = numpy.random.randint(0, 12)
        organism.heuristics[x] += mutation
        self.normalize(organism)
class QLearningAgent(TetrisApp):
    def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1):
        self.qval=util.Counter()
        self.alpha=alpha
        self.epsilon=epsilon
        self.discount=gamma
        self.Tetris= TetrisApp()
        self.boardprev=0.

    def observeTransition(self, state,action,nextState,deltaReward):
        self.episodeRewards += deltaReward
        self.update(state,action,nextState,deltaReward)

    # returns 0.0 if new state or the q value if we've seen it, and because
    # we cant use tuples as keys in a python dict we hash them
    def getQValue(self, state, action):
        if hash(str((state, action))) not in self.qval:
          self.qval[hash(str((state,action)))]=0.0
        return self.qval[hash(str((state,action)))]

    def computeValueFromQValues(self, state):
        val = 0.0
        action=self.computeActionFromQValues(state)
        if action != None:
          val= self.getQValue(state,action)
        return val

    def computeActionFromQValues(self, state):
        finalaction=None
        legalActions = self.Tetris.get_legal_actions(state[1])

        if len(legalActions)!=0:
          maxval= -999999
          for action in self.Tetris.get_legal_actions(state[1]):
            Qval=self.getQValue(state,action)
            if Qval>=maxval:
              maxval=Qval
              finalaction=action
        return finalaction

    def helperfunction(self, lst, legalactions):
      value, action, new_board = lst
      val = (value + max(self.ideal_place_2(new_board, legalactions,True))[0], action)
      return val


    def getAction(self, state):
        legalActions = self.Tetris.get_legal_actions(state[1])
        action = None
        if len(legalActions)!=0:
              if util.flipCoin(self.epsilon):
                valuedict = {}
                actionlist= self.ideal_place_2(self.Tetris.board, legalActions, False)
                valuelist = map((lambda x: self.helperfunction(x, legalActions)), actionlist)
                return max(valuelist)[1]
              else:
                action = self.computeActionFromQValues(state)
        return action
        
    def update(self, state, action, nextState, reward):
        self.qval[hash(str((state,action)))]+= self.alpha*(reward+self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state,action))
  
    def getPolicy(self, state):
        return self.computeActionFromQValues(state)

    def getValue(self, state):
        return self.computeValueFromQValues(state)

    def run(self,n):
      key_actions = {
        'ESCAPE': self.Tetris.quit,
        'LEFT':   lambda:self.Tetris.move(-1),
        'RIGHT':    lambda:self.Tetris.move(+1),
        'DOWN':   lambda:self.Tetris.drop(True),
        'UP':   self.Tetris.rotate_stone,
        'SPACE':    self.Tetris.toggle_pause,
        'SPACE':  self.Tetris.start_game,
        'RETURN': self.Tetris.insta_drop
      }

      self.Tetris.board = tetris.new_board()
      self.boardprev=self.Tetris.board

      if n< value_iter_rounds:
        self.epsilon = 1
      else:
        self.epsilon = 1/(15.*math.log(float(n)+1))
  
      self.Tetris.gameover = False
      self.Tetris.paused = False
      
      dont_burn_my_cpu = pygame.time.Clock()
      rot, col = self.getAction((self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone))
      prevboard = self.Tetris.board
      n+=1
      while not(self.Tetris.gameover):
        self.update((prevboard,self.Tetris.stone), (rot,col), (self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone), self.Tetris.heuristic(self.Tetris.board)) 
        piece = self.Tetris.stone
        prevboard = tetris.deepishcopy(self.Tetris.board)
        legalactions = self.Tetris.get_legal_actions(self.Tetris.stone)
        rot, col =self.getAction((self.Tetris.get_board_state(self.Tetris.board), self.Tetris.stone))
        i= 1
        while i ==1:
          self.Tetris.screen.fill((0,0,0))
          if self.Tetris.gameover:
            self.Tetris.center_msg("""Game Over!\nYour score: %d
    Press space to continue""" % self.Tetris.score)
            if n< 10000:
              self.Tetris.start_game()
            else: 
              self.Tetris.quit()
          else:
            if self.Tetris.paused:
              self.Tetris.center_msg("Paused")
            else:
              pygame.draw.line(self.Tetris.screen,
                (255,255,255),
                (self.Tetris.rlim+1, 0),
                (self.Tetris.rlim+1, self.Tetris.height-1))
              self.Tetris.disp_msg("Next:", (
                self.Tetris.rlim+cell_size,
                2))
              self.Tetris.disp_msg("Score: %d\n\nLevel: %d\
    \nLines: %d" % (self.Tetris.score, self.Tetris.level, self.Tetris.lines),
                (self.Tetris.rlim+cell_size, cell_size*5))
              self.Tetris.draw_matrix(self.Tetris.bground_grid, (0,0))
              self.Tetris.draw_matrix(self.Tetris.board, (0,0))
              self.Tetris.draw_matrix(self.Tetris.stone,
                (self.Tetris.stone_x, self.Tetris.stone_y))
              self.Tetris.draw_matrix(self.Tetris.next_stone,
                (cols+1,2))
          pygame.display.update()



          self.Tetris.place_brick(rot,col)
          i= 0
          for event in pygame.event.get():
            if event.type == pygame.USEREVENT+1:
              pass
            elif event.type == pygame.QUIT:
              self.Tetris.quit()
            elif event.type == pygame.KEYDOWN:
              for key in key_actions:
                if event.key == eval("pygame.K_"
                +key):
                  key_actions[key]()
Esempio n. 10
0
class TetrisEmulator(BaseEnvironment):
    def __init__(self, actor_id, args):
        self.tetris = TetrisApp(emulator=True)

        self.legal_actions = [0, 1, 2, 3, 4]
        self.screen_width, self.screen_height = 288, 396
        self.lives = 1

        self.random_start = args.random_start
        self.single_life_episodes = args.single_life_episodes
        self.call_on_new_frame = args.visualize
        self.global_step = 0

        self.compteur = 0

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.rgb = args.rgb
        self.depth = 1
        if self.rgb: self.depth = 3
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width, self.depth),
                     dtype=np.uint8), self.__process_frame_pool)
        self.observation_pool = ObservationPool(
            np.zeros((IMG_SIZE_X, IMG_SIZE_Y, self.depth, NR_IMAGES),
                     dtype=np.uint8), self.rgb)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """ Get the current frame luminance. Return: the current frame """
        self.gray_screen = self.tetris.getScreen(rgb=False)
        if self.rgb:
            self.rgb_screen = self.tetris.getScreen()
        if self.call_on_new_frame:
            self.rgb_screen = self.tetris.getScreen()
            self.on_new_frame(self.rgb_screen)
        self.compteur += 1
        if self.rgb:
            return self.rgb_screen
        return self.gray_screen

    def on_new_frame(self, frame):
        pass

    def __new_game(self):
        """ Restart game """
        self.tetris.init_game()
        self.lives = 1
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.tetris.act(0)

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """
        img = np.amax(frame_pool, axis=0)
        if not self.rgb:
            img = np.reshape(img, (self.screen_height, self.screen_width))
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        if not self.rgb:
            img = np.reshape(img, (84, 84, 1))
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.tetris.act(a)
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.tetris.act(a)
            img = self.__get_screen_image()
            if not self.rgb:
                img = np.reshape(img,
                                 (self.screen_height, self.screen_width, 1))
            self.frame_pool.new_frame(img)
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(NR_IMAGES):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward = self.__action_repeat(action)
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = 0 if terminal else 1
        observation = self.observation_pool.get_pooled_observations()
        self.global_step += 1
        return observation, reward, terminal

    def __is_terminal(self):
        return self.tetris.gameover

    def __is_over(self):
        return self.tetris.gameover

    def get_noop(self):
        return [1.0, 0.0]
Esempio n. 11
0
def dqn():
    env = TetrisApp(8, 16, 750, False, 40, 30 * 100)
    episodes = 5000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    # log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    # log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    env.pcrun()
    for episode in tqdm(range(episodes)):
        env.reset()
        current_state = env._get_board_props(env.board)
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.pcplace(best_action[0], best_action[1])

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        # if log_every and episode and episode % log_every == 0:
        #     avg_score = mean(scores[-log_every:])
        #     min_score = min(scores[-log_every:])
        #     max_score = max(scores[-log_every:])

        #     log.log(episode, avg_score=avg_score, min_score=min_score,
        #             max_score=max_score)
    plt.xlabel("Episodes")
    plt.ylabel('Average score over 30 episodes')
    plt.grid()
    plt.plot(np.linspace(30, episodes, episodes - 29),
             moving_average(scores, 30))
    plt.savefig("nlinker.png")
Esempio n. 12
0
    within the `models` directory. An example is given as `ai_rando`.",
    )
    parser.add_argument(
        "-d",
        "--debug",
        action="store_true",
        help="Enable debug mode: The Tetris Engine will wait until input \
                is received from your Model before updating the frame")
    parser.add_argument(
        '-s',
        '--seed',
        dest='seed',
        default=69,
        help=
        'The seed for the random number generator. Affects block generation')
    args = parser.parse_args()
    args.seed = int(args.seed)

    if args.model:

        model = import_player(args.model)
        # Let the games begin
        App = TetrisApp(model, debug=args.debug, seed=args.seed)

    if args.arena:
        players = args.arena
        player_models = [import_player(player) for player in players]
        Arena(player_models, debug=args.debug).run_round_robin(seed=args.seed)

    print("Nothing else to do.")
Esempio n. 13
0
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0],
[1,1,1,1,1,1,1,0,0,0],
[1,1,1,1,1,1,1,1,1,1]],

]

app = TetrisApp()
ai = TetrisAI(app)

# test indavidual function used for score moves
class TestEval(unittest.TestCase):

    def test_get_max_height(self):
      heights = []
      for board in test_boards:
        heights.append(ai.get_max_height(board))
      self.assertEqual(heights, [0,5,5,2]) 

    def test_get_roughness(self):
      roughs = []
      for board in test_boards:
        roughs.append(ai.get_roughness(board))
class GeneticAlgorithms(object):
	def __init__(self):
		self.app = TetrisApp(self)
		self.ai = AI(self.app)
		self.app.ai = self.ai
		self.population = [self.random_chromosome() for _ in range(POPULATION_SIZE)]
		self.current_chromosome = 0
		self.current_generation = 1
		self.ai.heuristics = self.population[self.current_chromosome].heuristics

	def run(self):
		self.app.run()

	def next_ai(self):
		self.current_chromosome += 1
		if self.current_chromosome >= POPULATION_SIZE:
			self.current_chromosome = 0
			self.next_generation()
		self.ai.heuristics = self.population[self.current_chromosome].heuristics
	
	def on_game_over(self, score):
		chromosome = self.population[self.current_chromosome]
		chromosome.games += 1
		chromosome.total_fitness += score
		if chromosome.games % GAMES_TO_AVG == 0:
			self.next_ai()
		self.app.start_game()

	def population_has_converged(self):
		t = CONVERGED_THRESHOLD
		pop = self.population
		return all(all(pop[0].heuristics[f]-t < w < pop[0].heuristics[f]+t for f, w in c.heuristics.items()) for c in pop)

	def next_generation(self):
		print("__________________\n")
		if self.population_has_converged():
			print("Population has converged on generation %s.\n values: %s" 
				% (self.current_generation, [(f.__name__, w) for f, w in self.population[0].heuristics.items()]))
			sys.exit()
		print("GENERATION %s COMPLETE" % self.current_generation)
		print("AVG FITNESS", sum([c.avg_fitness() for c in self.population]) / POPULATION_SIZE)
		self.current_generation += 1
		for c in self.population:
			print("chromosome", c.name, "fitness", c.avg_fitness())
		best_chromosome = max(self.population, key=lambda c: c.avg_fitness())
		print("Fittest chromosome:", best_chromosome.name, "fitness", best_chromosome.avg_fitness(), "\n%s" % [(f.__name__, w) for f, w in best_chromosome.heuristics.items()])

		print("\nEVOLUTION")
		new_population = self.selection(SURVIVORS_PER_GENERATION, SELECTION_METHOD)
		for c in new_population:
			print("chromosome", c.name, "fitness", c.avg_fitness(), "SURVIVED")
		for _ in range(NEWBORNS_PER_GENERATION):
			parents = self.selection(2, SELECTION_METHOD)
			new_population.append(self.crossover(parents[0], parents[1], CROSSOVER_METHOD))
			print(parents[0].name, "and", parents[1].name, "PRODUCED", new_population[-1].name)
		for _ in range(MUTATION_PASSES):
			for chromosome in new_population:
				self.mutation(chromosome, MUTATION_RATE / MUTATION_PASSES)
		print("__________________\n")
		assert len(new_population) == len(self.population), "SURVIVORS_PER_GENERATION + NEWBORNS_PER_GENERATION != POPULATION_SIZE"
		self.population = new_population

	def selection(self, num_selected, method):
		def roulette(population):
			total_fitness = sum([c.avg_fitness() for c in population])
			winner = randrange(int(total_fitness))
			fitness_so_far = 0
			for chromosome in population:
				fitness_so_far += chromosome.avg_fitness()
				if fitness_so_far > winner:
					return chromosome
		
		if method == SelectionMethod.roulette:
			survivors = []
			for _ in range(num_selected):
				survivors.append(roulette([c for c in self.population if c not in survivors]))
			return survivors

		raise ValueError('SelectionMethod %s not implemented' % method)

	def crossover(self, c1, c2, method):
		def random_attributes():
			heuristics = {}
			for fun, _ in c1.heuristics.items():
				heuristics[fun] = random.choice((c1, c2)).heuristics[fun]
			return Chromosome(heuristics)

		def average_attributes():
			heuristics = {}
			for fun, _ in c1.heuristics.items():
				heuristics[fun] = (c1.heuristics[fun] + c2.heuristics[fun]) / 2
			return Chromosome(heuristics)			

		if method == CrossoverMethod.random_attributes:
			return random_attributes()
		if method == CrossoverMethod.average_attributes:
			return average_attributes()
		raise ValueError('CrossoverMethod %s not implemented' % method)

	def mutation(self, chromosome, mutation_rate):
		if randint(0, int(mutation_rate)) == 0:
			h = chromosome.heuristics
			h[random.choice(list(h.keys()))] = randrange(-1000, 1000)
			print(chromosome.name, "MUTATED")

	def random_chromosome(self):
		return Chromosome({fun: randrange(-1000, 1000) for fun, weight in self.ai.heuristics.items()})
Esempio n. 15
0
def fitness(individual, seeds, pieceLimit):
    results = []
    for seed in seeds:
        results.append(TetrisApp(False, seed).run(indiv, pieceLimit))
    return int(sum(results)/len(results))