def getScore(self, weights, actionSequence, bestScore, bestSequence): #print "BEST SEQUENCE", bestSequence original_score = self.score #Maybe need to copy original_board = copy.deepcopy(self.board) original_stone = self.stone original_stone_x = self.stone_x original_stone_y = self.stone_y original_lines = self.lines #print "original" #print original_board for move in actionSequence: self.key_actions[move]() self.key_actions["RETURN"]() features = tetrisai.extractFeatures(self.board, self.next_stone) score = self.test_evaluation_function(weights, features) # After trying the move, we have to revert everything self.score = original_score self.board = original_board self.stone = original_stone self.stone_x = original_stone_x self.stone_y = original_stone_y self.lines = original_lines #print "new board" #print self.board return score
def train_evaluation_function(self): # EDIT THIS LATER!!! lenFeatures = len(tetrisai.extractFeatures(self.board, self.stone)) weights = [0] * lenFeatures numGames = 30 for i in range(numGames): print "GAME", i weights = self.td_learning(weights) #weights = td_learning(self, weights) return weights
def run(self): self.gameover = False self.paused = False dont_burn_my_cpu = pygame.time.Clock() while 1: self.screen.fill((0, 0, 0)) if self.gameover: self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score) else: if self.paused: self.center_msg("Paused") else: pygame.draw.line(self.screen, (255, 255, 255), (self.rlim + 1, 0), (self.rlim + 1, self.height - 1)) self.disp_msg("Next:", (self.rlim + cell_size, 2)) self.disp_msg( "Score: %d\n\nLevel: %d\ \nLines: %d" % (self.score, self.level, self.lines), (self.rlim + cell_size, cell_size * 5)) self.draw_matrix(self.bground_grid, (0, 0)) self.draw_matrix(self.board, (0, 0)) self.draw_matrix(self.stone, (self.stone_x, self.stone_y)) self.draw_matrix(self.next_stone, (cols + 1, 2)) pygame.display.update() for event in pygame.event.get(): print "[numBlocks, totalBlockWeight, bumpiness, maxHeight, minHeight, meanHeight, varianceHeight, maxHoleHeight, numHoles, density, numRowsWithHoles, numColsWithHoles]" print tetrisai.extractFeatures(self.board, self.stone) if event.type == pygame.USEREVENT + 1: self.drop(False) elif event.type == pygame.QUIT: self.quit() elif event.type == pygame.KEYDOWN: for key in self.key_actions: if event.key == eval("pygame.K_" + key): self.key_actions[key]() dont_burn_my_cpu.tick(maxfps)
def trySequence(self, weights, actionSequence, bestScore, bestSequence): #print "BEST SEQUENCE", bestSequence original_score = self.score #Maybe need to copy original_board = copy.deepcopy(self.board) original_stone = self.stone original_stone_x = self.stone_x original_stone_y = self.stone_y original_lines = self.lines #print "original" #print original_board for move in actionSequence: self.key_actions[move]() self.key_actions["RETURN"]() features = tetrisai.extractFeatures(self.board, self.next_stone) #print "Features for attempt", #print features score = self.test_evaluation_function(weights, features) #print "attempted: sequence", actionSequence, "score", score #print "resulting board" #print self.board # After trying the move, we have to revert everything self.score = original_score self.board = original_board self.stone = original_stone self.stone_x = original_stone_x self.stone_y = original_stone_y self.lines = original_lines #print "new board" #print self.board if score < bestScore: return score, list(actionSequence) else: return bestScore, bestSequence
def trySequence2(self, cutoff, weights, depth, actionSequence, bestScore, bestSequence): # Cache all old values original_score = self.score original_board = copy.deepcopy(self.board) original_stone = copy.deepcopy(self.stone) original_stone_x = self.stone_x original_stone_y = self.stone_y original_lines = self.lines original_next_stone = copy.deepcopy(self.next_stone) #print "original" #print original_board for move in actionSequence: self.key_actions[move]() self.key_actions["RETURN"]() features = tetrisai.extractFeatures(self.board, self.next_stone) score = self.test_evaluation_function(weights, features) #print "Features for attempt", #print features if (depth > 1 and score > cutoff): range_rotations = 3 range_left = 5 range_right = 6 #range_left = self.stone_x #range_right = len(self.board[0]) - (self.stone_x + len(self.stone[0])) + 1 # Find best action sequence and its corresponding evaluation functino score nextBestScore = 999999999999999 nextBestSequence = [] nextActionSequence = [] nextBestScore, nextBestSequence = self.trySequence2( cutoff, weights, depth - 1, nextActionSequence, nextBestScore, nextBestSequence) for i in range(range_rotations + 1): nextBestScore, nextBestSequence = self.trySequence2( cutoff, weights, depth - 1, nextActionSequence, nextBestScore, nextBestSequence) for j in range(range_left): nextActionSequence.append("LEFT") nextBestScore, nextBestSequence = self.trySequence2( cutoff, weights, depth - 1, nextActionSequence, nextBestScore, nextBestSequence) nextActionSequence = nextActionSequence[0:len(actionSequence) - range_left] for j in range(range_right): nextActionSequence.append("RIGHT") nextBestScore, nextBestSequence = self.trySequence2( cutoff, weights, depth - 1, nextActionSequence, nextBestScore, nextBestSequence) nextActionSequence = nextActionSequence[0:len(actionSequence) - range_right] nextActionSequence.append("UP") nextBestSequence.append("RETURN") score += nextBestScore # After trying the move sequence, we have to revert everything self.score = original_score self.board = original_board self.stone = original_stone self.stone_x = original_stone_x self.stone_y = original_stone_y self.lines = original_lines self.next_stone = original_next_stone if score < bestScore: return score, list(actionSequence) else: return bestScore, bestSequence
def td_learning(self, weights): self.board = new_board() print self.board ''' pygame.init() self.width = cell_size*(cols+6) self.height = cell_size*rows self.rlim = cell_size*cols self.init_game() ''' self.gameover = False self.paused = False while not self.gameover: self.stone = self.next_stone self.stone_x = int(cols / 2 - len(self.stone[0]) / 2) self.stone_y = 0 if (check_collision(self.board, self.stone, (self.stone_x, self.stone_y))): self.gameover = True return weights features = tetrisai.extractFeatures(self.board, self.stone) eta = 0.01 discount = 1 prev_dot_product = 0 for i in range(len(weights)): prev_dot_product += weights[i] * features[i] prev_score = self.score prev_lines = self.lines moves = baseline.findBestMove(self.board, self.stone, self.stone_x) for move in moves: self.key_actions[move]() ''' # Random number of rotations numRotations = random.randint(0,3) for i in range(numRotations): key_actions["UP"]() # Random left right translation if random.randint(0,1) == 0: range_left = self.stone_x for i in range(random.randint(0,range_left)): key_actions['LEFT']() else: range_right = len(self.board[0]) - (self.stone_x + len(self.stone[0])) for i in range(random.randint(0,range_right)): key_actions['RIGHT']() key_actions['RETURN']() ''' new_score = self.score #score_reward = new_score - prev_score reward = 10000 * (self.lines - prev_lines)**2 if self.gameover: print "GESSSS HERE" reward = -1000 self.next_stone = tetris_shapes[rand(len(tetris_shapes))] newFeatures = tetrisai.extractFeatures(self.board, self.next_stone) new_dot_product = 0 for i in range(len(weights)): new_dot_product += weights[i] * newFeatures[i] print prev_dot_product, new_dot_product for index, weight in enumerate(weights): weights[index] = weight - eta * ( prev_dot_product - (reward + discount * new_dot_product)) * features[index] sumWeights = sum(weights) + .01 weights = [weight * 1.0 / sumWeights for weight in weights] print "Features", features print "WEIGHTS", weights return weights