def run(self): while True: moves = [] self.enemies_locations = [] self.player_sites = [] self.game_map = getFrame() start_time = time.time() for y in range(self.game_map.height): for x in range(self.game_map.width): if self.game_map.contents[y][x].owner != self.my_id: self.enemies_locations.append(Location(x, y)) else: self.player_sites.append(self.game_map.contents[y][x]) self.player_sites = sorted(self.player_sites, key=lambda s: s.strength, reverse=True) while len(self.player_sites): current_site = self.player_sites.pop() movement = self.get_movement(current_site) moves.append(movement) if movement != STILL: self.player_sites = sorted(self.player_sites, key=lambda s: s.strength, reverse=True) if time.time() - start_time > 0.9: logging.info("AVOIDING TIMEOUT!! %d", len(self.player_sites)) for site in self.player_sites: moves.append(Move(Location(site.x, site.y), STILL)) break sendFrame(moves)
def main(): state = { 'seen_combat': False, } myID, gameMap = networking.getInit() networking.sendInit("Orez[Miner]") while True: gameMap = networking.getFrame() moves = [ hlt.Move( hlt.Location(x, y), direction, ) for (x, y), direction in turn(gameMap, myID, state) ] networking.sendFrame(moves)
allQs = [] # Only pick one random position for easier q-learning while True: position = random.choice(np.transpose(np.nonzero(stack[0]))) area_inputs = stack_to_input(stack, position) possible_moves, Qinputs, Qs = predict_for_pos(area_inputs, model) # Epsilon greedy strategy if random.random() < 0.1: index = np.random.choice(range(len(possible_moves))) else: index = np.argmax(Qs) Q = Qs[index] move = possible_moves[index] allQs.append(Q) Qinput = Qinputs[index] sendFrame([Move(Location(position[1], position[0]), move)]) turn += 1 old_frame = frame old_Qs = Qs frame = getFrame() stack = frame_to_stack(frame, myID) area_inputs = stack_to_input(stack, position) possible_moves, Qinputs, Qs = predict_for_pos(area_inputs, model) reward = get_reward(old_frame, frame, myID, position) def handler(sig, frame): logging.info("Before exit") logging.info("Average chosen Q value: %.2f", np.array(allQs).mean()) sys.exit(0)
FILE_WRITER = open('log-analytics.log', 'a') ANALYTICS = {'s': 0, 'a': 0, 'm': 0} for y in range(GAME_MAP.height): for x in range(GAME_MAP.width): curLoc = Location(x, y) if GAME_MAP.getSite(curLoc).owner == MY_ID: movedPiece = False for d in CARDINALS: if (GAME_MAP.getSite(curLoc, d).owner != MY_ID and GAME_MAP.getSite(curLoc, d).strength < GAME_MAP.getSite(curLoc).strength): MOVES.append(Move(curLoc, d)) ANALYTICS['a'] += 1 movedPiece = True break if not movedPiece: MOVES.append(Move(curLoc, STILL)) ANALYTICS['s'] += 1 movedPiece = True FILE_WRITER.write( str(ANALYTICS) + str(getTerritory(GAME_MAP, MY_ID)) + 't ' + str(getStrength(GAME_MAP, MY_ID)) + 's ' + str(getProduction(GAME_MAP, MY_ID)) + 'p ' + '\n') FILE_WRITER.write( str([ str(move.loc.x) + str(move.loc.y) + str(move.direction) for move in MOVES ]) + '\n') FILE_WRITER.close() sendFrame(MOVES)
for position in positions]) outputs = np.split(model.predict(inputs), len(positions)) # outputs /= sum(outputs) return possible_moves, np.split(inputs, len(positions)), [o.ravel() for o in outputs] while True: np.set_printoptions(precision=3) frame = getFrame() stack = frame_to_stack(frame, myID) positions = np.transpose(np.nonzero(stack[0])) # position = random.choice(positions) moves = [] possible_moves, allQinputs, allQs = predict_for_game(stack, positions, model) for position, Qinputs, Qs in zip(positions, allQinputs, allQs): # Sample a move following Pi(s) def softmax(x): """ Turn Q values into probabilities """ e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0) # only difference def harden(x, e=2): exp = x**e return exp/exp.sum() Ps = harden(softmax(Qs.ravel())) # index = np.random.choice(range(len(possible_moves)), p=Ps) index = np.argmax(Ps) logging.info("%d Qs: %s Ps: %s", index, Qs, Ps) moves.append((position[1], position[0], possible_moves[index])) sendFrame([Move(Location(px,py), move) for (px,py,move) in moves])