def get_move_list(self, current_game, mode='avg'): if self.model is None: return num_each_dir = 10 values = {} if mode == "minmax": calcs = [] for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 2.0 for _ in range(num_each_dir): game = Game(current_game) game.move(dir) array = game.to_logarray() calcs.append(array) poss_arrays = np.vstack(calcs) vals = self.model.predict(poss_arrays) index = 0 for dir in list(Direction): if dir not in values.keys(): continue for _ in range(num_each_dir): values[dir] = min(values[dir], vals[index][0]) index += 1 elif mode == "avg": calcs = [] """ for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 0.0 for _ in range(num_each_dir): game = Game(current_game) game.move(dir) array = game.to_logarray() calcs.append(array) """ states = current_game.get_states_each_dir(num_each_dir) for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 0.0 calcs.extend(states[dir]) poss_arrays = np.vstack(calcs) vals = self.model.predict(poss_arrays) index = 0 for dir in list(Direction): if dir not in values.keys(): continue for _ in range(num_each_dir): values[dir] += vals[index][0] index += 1 sorted_dirs = sorted(values.keys(), key=lambda x: values[x]) return sorted_dirs
def play_a_game(total_num=100): vl = ValueLearner() vl.load_latest() avg = 0 states = [] for i in range(total_num): game = Game() num = 0 while not game.end: states.append(game.to_array()) game.move(vl.move(game)) num += 1 print num avg += num Application2(states).mainloop() print float(avg) / total_num
def play_games(self, total_num): min_game = 999999999 avg = 0 histories = [] for i in range(total_num): history = [] game = Game() while not game.end: history.append(game.to_logarray()) game.move(self.move(game)) history.append(game.to_logarray()) min_game = min(min_game, np.sum(game.board)) avg += len(history) histories.append(history) self.log(min_game, float(avg) / total_num) return histories, min_game
def play_games(self, total_num): avg = 0 histories = [] for i in range(total_num): history = [] game = Game() current_sum = np.sum(game.board) model_index = 0 while not game.end: history.append(game.to_logarray()) dir = self.get_move_list_exhaustive(game, model_index)[-1] game.move(dir) current_sum = np.sum(game.board) if current_sum > self.ranges[model_index]: model_index += 1 history.append(game.to_logarray()) avg += len(history) histories.append(history) self.log(float(avg) / total_num) return histories
def play_games_2(self, total_num): min_game = 999999999 avg = 0 histories = [] seconds = [] thirds = [] for i in range(total_num): history = [] game = Game() while not game.end: history.append(game.to_logarray()) moves = self.get_move_list(game) if len(moves) > 1: newgame = Game(game) newgame.move(moves[-2]) if not newgame.end: seconds.append(newgame) if len(moves) > 2: newgame = Game(game) newgame.move(moves[-3]) if not newgame.end: seconds.append(newgame) game.move(moves[-1]) history.append(game.to_logarray()) min_game = min(min_game, np.sum(game.board)) avg += len(history) histories.append(history) self.log(min_game, float(avg) / total_num) return histories, min_game, seconds, thirds
def train_random_moves(self, min_length=300, eps=0.03, max_active_games=30): branches = [] histories = [[]] active_games = [] num_games = 0 eval = 0 while len(histories[0]) < min_length or eval < 0.3: histories[0] = [] active_games = [] game = Game() for _ in range(min_length): game.move(self.move(game)) histories[0].append(game.to_logarray()) if game.end: break active_games.append((game, 0)) eval = self.eval(game) #print self.eval(game) while len(active_games) > 0: new_games = [] for game, index in active_games: move_list = self.get_move_list(game) rand = random.random() if rand < eps and len(move_list) > 1 and len( active_games) < max_active_games: new_node = Game(game) num_games += 1 new_games.append((new_node, num_games)) histories.append(copy.deepcopy(histories[index])) branches.append(np.sum(game.board)) game.move(move_list[-1]) histories[index].append(game.to_logarray()) move = 2 #random.randint(2, min(len(move_list), 3)) new_node.move(move_list[-move]) histories[num_games].append(new_node.to_logarray()) else: game.move(move_list[-1]) histories[index].append(game.to_logarray()) active_games.extend(new_games) active_games = [pair for pair in active_games if not pair[0].end] print len(histories) histl = max(histories, key=lambda x: len(x)) print len(histl), len(histories[0]) if len(histl) > 1500: Application2(histl).mainloop() return histories, branches