def manage_events(self): for e in self.events: if e.tpe == Event.BOMB_HIT_CHARACTER: e.other.done(SensedWorld.from_world(self)) elif e.tpe == Event.CHARACTER_KILLED_BY_MONSTER: self.remove_character(e.character) e.character.done(SensedWorld.from_world(self)) elif e.tpe == Event.CHARACTER_FOUND_EXIT: e.character.done(SensedWorld.from_world(self))
def __calc_next_path(self, wrld): chosen_world = SensedWorld.from_world(wrld) me = chosen_world.me(self) dx = None dy = None x = uniform(0, 1) if x > self.eps: print("EXPLOIT") next_moves = self.__list_next_moves(wrld) max_q = -inf for move in next_moves: c_wrld = SensedWorld.from_world(wrld) c_wrld.me(self).move(move[0], move[1]) (c_wrld, ev) = c_wrld.next() (cur_q, cur_target, cur_fs) = self.calc_q(c_wrld, ev, move) if cur_q > max_q: max_q = cur_q (dx, dy) = move chosen_world = c_wrld chosen_target = cur_target chosen_ev = ev final_state = cur_fs self.q = max_q else: print("EXPLORE") queue = [(me.x, me.y)] visited = {(me.x, me.y): None} while queue: s = queue.pop(0) if s == wrld.exitcell: break for (dx, dy) in self.__list_next_moves(chosen_world, move=s): move = (s[0] + dx, s[1] + dy) if move not in visited: visited[move] = s queue.append(move) end = wrld.exitcell while True: if visited[end] is None or visited[visited[end]] is None: dx = end[0] - me.x dy = end[1] - me.y break end = visited[end] me.move(dx, dy) (chosen_world, chosen_ev) = chosen_world.next() (self.q, chosen_target, final_state) = self.calc_q(chosen_world, chosen_ev, (dx, dy)) if self.training is True: self.training_data.append([chosen_world, chosen_ev, chosen_target]) if final_state is True: print("Training...") self.__update_nn(self.training_data) return (dx, dy)
def getNewWorld(self, char, wrld, action): newWorld = SensedWorld.from_world(wrld) newChar = self.getCharInWorld(newWorld) actionVector = self.getActionVector(action) newChar.x += actionVector[0] newChar.y += actionVector[1] return newWorld
def __calc_next_interactive(self, wrld): # Commands dx, dy = 0, 0 # Handle input for c in input( "How would you like to move (w=up,a=left,s=down,d=right)? "): if 'w' == c: dy -= 1 if 'a' == c: dx -= 1 if 's' == c: dy += 1 if 'd' == c: dx += 1 chosen_world = SensedWorld.from_world(wrld) chosen_world.me(self).move(dx, dy) (chosen_world, chosen_ev) = chosen_world.next() (self.q, chosen_target, final_state) = self.calc_q(chosen_world, chosen_ev, (dx, dy)) print(self.q) if self.training is True: self.training_data.append([chosen_world, chosen_ev, chosen_target]) if final_state is True: print("Training...") self.__update_nn(self.training_data) return (dx, dy)
def monsters_current_path(wrld, character): my_wrld = SensedWorld.from_world(wrld) monsters = findAll(wrld, 2) if len(monsters) == 0: return 0 pos = (character.x, character.y) original_nearest_monster = findNearestEntity(wrld, pos, monsters) next_wrld, next_events = my_wrld.next() delta_coords = (0, 0) if next_wrld.me(character) is None: return 0 monsters = findAll(next_wrld, 2) if len(monsters) == 0: return 0 next_nearest_monster = findNearestEntity(next_wrld, pos, monsters) delta_coords = ((next_nearest_monster[0] - original_nearest_monster[0]), (next_nearest_monster[1] - original_nearest_monster[1])) for i in range(1, 4, 1): newX, newY = original_nearest_monster[0] + (delta_coords[0] * i), original_nearest_monster[1] + (delta_coords[1] * i) if character.x == newX and character.y == newY: return 1 return distanceToMonster(wrld, character)
def __max_a(self, world): ''' @dillon max a assignment, approximate q-learnings ''' self.max_q = -inf possible_actions = self.__possible_actions(world) # list of dx, dy for action in possible_actions: clone = SensedWorld.from_world(world) # clone the current world dx, dy = action # unpack me = clone.me(self) # find me in cloned world if dx == 0 and dy == 0: me.place_bomb() else: me.move(dx, dy) # make the move in cloned world next_clone, ev = clone.next() # simulate the move and clone the next world if next_clone.me(self) is None: # terminal state, q = r q = self.__r(ev, world) else: q = self.__q(next_clone, (0, 0)) # derive q of new world, don't move though if q > self.max_q: self.max_q = q # record q self.max_a = action # record action self.events = ev # record actions return self.max_a # return action corresponding to best q
def updateQ(self, wrld): alpha = 0.3 moves = get_adjacent((self.x, self.y), wrld) for m in moves: if not wrld.wall_at(m[0], m[1]): sim = SensedWorld.from_world(wrld) # creates simulated world c = sim.me(self) # finds character from simulated world c.move(m[0] - self.x, m[1] - self.y) # moves character in simulated world s = sim.next() # updates simulated world c = s[0].me( c ) # gives us character. this is a tuple, we want the board, not the list of elapsed events # Check if game is over if c is None: print("ENDED!") print(s[0]) print(s[1]) print("EVENT 0: ") print(s[1][0]) for event in s[1]: if event.tpe == Event.CHARACTER_KILLED_BY_MONSTER and event.character.name == self.name: self.qtable[calculate_state(wrld.exitcell, wrld), m] = -5 elif event.tpe == Event.CHARACTER_FOUND_EXIT and event.character.name == self.name: self.qtable[("dead"), m] = -5 else: print("Xcoord: " + str(c.x) + ", Ycoord: " + str(c.y)) self.qtable[(calculate_state( (c.x, c.y), wrld), m)] = distance_to_exit((c.x, c.y), wrld)
def __find_max_a(self, wrld, action): ''' max_q = -100 dx, dy = action clone = SensedWorld.from_world(wrld) # clone the world bomb = False #print("ACTION --", action) while True: me = clone.me(self) if not self.__within_bounds(clone,dx+me.x,dy+me.y) or clone.wall_at(dx+me.x,dy+me.y): break if dx == 0 and dy == 0: me.place_bomb() # drop a bomb if we are not moving bomb = True else: me.move(dx, dy) clone, ev = clone.next() #clone.printit() q = self.__approx_q(clone, ev).item() print("A", q) if q > max_q: max_q = q if clone.me(self) is None or bomb: break #print("---") return max_q ''' max_q = -inf for (dx, dy) in self.__list_next_moves(wrld): clone_wrld = SensedWorld.from_world(wrld) me = clone_wrld.me(self) if dx == 0 and dy == 0: me.place_bomb() else: me.move(dx, dy) (clone_wrld, ev) = clone_wrld.next() """ print("TEST --", dx, dy) clone_wrld.printit() print(ev) print("----") """ """ (r, final) = self.__calc_r(clone_wrld,ev) if final: q = r else: a = self.__approx_q(clone_wrld,ev).item() q = r + self.gamma * a """ q = self.__approx_q(clone_wrld, ev).item() #print("CUR A", q) if q > max_q: max_q = q #print("MAX Q", max_q) return max_q
def __calc_next_move(self, wrld): ''' @ray Calculates the next move based on approximate q learning ''' # take a new move using epsilon greedy exploration new_move = None chosen_world = None chosen_ev = None chosen_target = None final_state = False next_moves = self.__list_next_moves(wrld) x = uniform(0, 1) if x < self.eps: # exploration new_move = next_moves[randrange(0, len(next_moves))] chosen_world = SensedWorld.from_world(wrld) chosen_world.me(self).move(new_move[0], new_move[1]) (chosen_world, chosen_ev) = chosen_world.next() (self.q, chosen_target, final_state) = self.calc_q(chosen_world, chosen_ev, new_move) else: # exploitation max_q = -inf for move in next_moves: c_wrld = SensedWorld.from_world(wrld) c_wrld.me(self).move(move[0], move[1]) (c_wrld, ev) = c_wrld.next() (cur_q, cur_target, cur_fs) = self.calc_q(c_wrld, ev, move) if cur_q > max_q: max_q = cur_q new_move = move chosen_world = c_wrld chosen_target = cur_target chosen_ev = ev final_state = cur_fs self.q = max_q if self.training is True: self.training_data.append([chosen_world, chosen_ev, chosen_target]) if final_state is True: print("Training...") self.__update_nn(self.training_data) return new_move
def get_nextworlds(self, wrld): # Return accessible next worlds after character's movement in given world # Possible moves include moving to empty cells, moving to the exit cell, and placing a bomb # List of possible worlds nextworlds = [] """ for i in range (x-1,x+2): for j in range (y-1,y+2): if wrld.empty_at(i,j) and not (x,y) == (i,j): cells.append((i,j)) """ # Adapted from example code on Github # Loop through delta x for dx in [-1, 0, 1]: # Avoid out-of-bound indexing if (self.x + dx >= 0) and (self.x + dx < wrld.width()): # Loop through delta y for dy in [-1, 0, 1]: # Originally wrapped below in a check that the position was not the character's current position # But in some cases, not moving at all may be necessary # Still can't stay still, however; move isn't counted as possible if a character is there # Avoid out-of-bound indexing if (self.y + dy >= 0) and (self.y + dy < wrld.height()) and ( wrld.empty_at(self.x + dx, self.y + dy) or wrld.exit_at(self.x + dx, self.y + dy)): # Add cell to list # cells.append((self.x+dx,self.y+dy)) clonewrld = SensedWorld.from_world(wrld) clonewrld.me(self).move(dx, dy) (newwrld, events) = clonewrld.next() nextworlds.append((newwrld, events, (dx, dy))) # Includes world in which character places bomb # The final value in the tuple, move, contains either the space the character moves to or self.BOMB if they # place a bomb clonewrld = SensedWorld.from_world(wrld) clonewrld.me(self).place_bomb() clonewrld.me(self).move(0, 0) (newwrld, events) = clonewrld.next() nextworlds.append((newwrld, events, self.BOMB)) return nextworlds
def generateCharMoveWorlds(self, char, wrld): ret = [] allActions = self.getAllActions(wrld, char.x, char.y) for i in allActions: newWorld = SensedWorld.from_world(wrld) newChar = self.getCharInWorld(newWorld) actionVector = self.getActionVector(i) newChar.x += actionVector[0] newChar.y += actionVector[1] ret.append((newWorld, i)) return ret
def maxValue(self, wrld, val, level): if level >= self.search_level: return self.get_score(wrld, val) value = -inf for loc in self.look_for_cell(wrld): newWorld = SensedWorld.from_world(loc[0]) character = newWorld.me(self) character.x = loc[1][0] character.y = loc[1][1] value = max(value, self.expValue(newWorld, loc[1], level + 1)) return value
def maxValue(self, wrld, val, level): if level >= 1: return self.fitness(wrld, val) value = -math.inf for loc in self.cell(wrld): newWorld = SensedWorld.from_world(loc[0]) character = newWorld.me(self) character.x = loc[1][0] character.y = loc[1][1] value = max(value, self.expValue(newWorld, loc[1], level + 1)) return value
def get_nextworlds_monster(self, wrld): nextworlds = [] monsterx = -1 monstery = -1 # Adapted from example code on Github # Checks that the map contains a monster. If it does, saves its position. # TODO: In future versions of this (for variants 4 and 5), account for more than one monster. for x in range(0, wrld.width()): for y in range(0, wrld.height()): if wrld.monsters_at(x, y): monsterx = x monstery = y # monster = clonewrld.monsters_at(x, y) # monster.move(dx, dy) if monsterx == -1 and monstery == -1: clonewrld = SensedWorld.from_world(wrld) (newwrld, events) = clonewrld.next() nextworlds.append((newwrld, events, (0, 0))) return nextworlds # Loop through delta x for dx in [-1, 0, 1]: # Avoid out-of-bound indexing if (monsterx + dx >= 0) and (monsterx + dx < wrld.width()): # Loop through delta y for dy in [-1, 0, 1]: # Avoid out-of-bound indexing if (monstery + dy >= 0) and (monstery + dy < wrld.height( )) and (wrld.empty_at(monsterx + dx, monstery + dy) or wrld.characters_at(monsterx + dx, monstery + dy)): """ if clonewrld.monsters_at(self.monster[0],self.monster[1]): monster = clonewrld.monsters_at(self.monster[0],self.monster[1])[0] monster.move(dx, dy) """ clonewrld = SensedWorld.from_world(wrld) monster = clonewrld.monsters_at(monsterx, monstery)[0] monster.move(dx, dy) (newwrld, events) = clonewrld.next() nextworlds.append((newwrld, events, (dx, dy))) return nextworlds
def generateMonsterMoveWorlds(self, wrld): ret = [] ret = [wrld] monster = self.getAllMonstersInWorld(wrld)[0] monsterMoves = self.getMonsterMoves(monster, wrld) for i in monsterMoves: newWorld = SensedWorld.from_world(wrld) newMonster = self.getAllMonstersInWorld(newWorld)[0] newMonster.x += i[0] newMonster.y += i[1] ret.append(newWorld) return ret
def look_for_monster(self,wrld,loc): cells = [] x, y = loc Eight_move = [(x, y - 1), (x, y + 1),(x + 1, y - 1),(x - 1, y), (x + 1, y),(x - 1, y + 1),(x + 1, y + 1),(x - 1, y - 1)] for action in Eight_move: if self.wall_at(action[0], action[1], wrld): newWorld = SensedWorld.from_world(wrld) monster = newWorld.monsters_at(x, y)[0] monster.move(action[0], action[1]) cells.append((newWorld, action)) return cells
def do(self, wrld): if self.approx_net is None: # create the neural network self.__init_nn(wrld, filename=self.nn_file) s_wrld = SensedWorld.from_world(wrld) #(dx,dy) = self.__calc_next_move(s_wrld) #(dx,dy) = self.__calc_next_interactive(s_wrld) (dx, dy) = self.__calc_next_path(s_wrld) self.me_pos = (self.me_pos[0] + dx, self.me_pos[1] + dy) if (dx, dy) == (0, 0): self.place_bomb() else: self.move(dx, dy)
def perform_qLearning(self, wrld): self.prev_wrld = SensedWorld.from_world(wrld) if self.train is True: exploringFlag = False best_wrld = None if random.random() < self.epsilon: exploringFlag = True # choose random move allowed_direction = [-1, 0, 1] bomb_actions = [False, True] x = random.choice(allowed_direction) y = random.choice(allowed_direction) place_bomb = random.choice(bomb_actions) x, y = bomb_handler(wrld, (self.x, self.y), (x, y)) #x, y = explosion_handler2(wrld, (self.x, self.y), (x, y)) self.move(x, y) if place_bomb is True: self.place_bomb() else: maxQ, best_action, best_wrld = self.q_learner.getBestMove(wrld, self) x, y, place_bomb = best_action self.move(x, y) if place_bomb is True: self.place_bomb() else: # use the converged values maxQ, best_action, best_wrld = self.q_learner.getBestMove(wrld, self) x, y, place_bomb = best_action self.move(x, y) if place_bomb is True: self.place_bomb()
def do(self, wrld): # Your code here # action: up, down, left, right, leftup, leftdown, rightup, rightdown, boom if (Wvalues[0] != []): Wvalues = [10, -100, -100, 1000, -10, -100] sWrld = SensedWorld.from_world(wrld) (newWorld, events) = sWrld.next() currscore = sWrld.scores["me"] nextscore = newWorld.scores["me"] currx = self.x curry = self.y nextx = [] nexty = [] # nextx_up, nexty_up = self.nextstep(wrld, currx, curry, "up") # nextx_down, nexty_down = self.nextstep(wrld, currx, curry, "down") # nextx_left, nexty_left = self.nextstep(wrld, currx, curry, "left") # nextx_right, nexty_right = self.nextstep(wrld, currx, curry, "right") # nextx_leftup, nexty_leftup = self.nextstep(wrld, currx, curry, "leftup") # nextx_leftdown, nexty_leftdown = self.nextstep(wrld, currx, curry, "leftdown") # nextx_rightdown, nexty_rightdown = self.nextstep(wrld, currx, curry, "rightdown") # nextx_rightup, nexty_rightup = self.nextstep(wrld, currx, curry, "rightup") direction = [ "up", "down", "left", "right", "leftup", "leftdown", "rightup", "rightdown" ] for i in range(len(direction)): nextdirx, nextdiry = self.nextstep(wrld, currx, curry, direction[i]) nextx.append(nextdirx) nexty.append(nextdiry) currQval = self.Qvalue(wrld, Wvalues[0], Wvalues[1], Wvalues[2], Wvalues[3], Wvalues[4], Wvalues[5], currx, curry) BestQ = -99999 Bestxy = (-99, -99) Bestmove = (-99, -99) for j in range(len(nextx)): Qdirval = self.Qvalue(wrld, Wvalues[0], Wvalues[1], Wvalues[2], Wvalues[3], Wvalues[4], Wvalues[5], nextx[j], nexty[j]) if (Qdirval > BestQ): BestQ = Qdirval Bestxy = (nextx[j], nexty[j]) Bestmove = (Bestxy[0] - self.x, Bestxy[1] - self.y) reward = nextscore - currscore delta = reward + BestQ - currQval for k in range(len(Wvalues)): Wvalues[k] = Wvalues[k] + delta * 0.4 * BestQ self.move(Bestmove[0], Bestmove[1])
def monAction(self, wrld, loc): cells = [] x, y = loc for action in [(x, y - 1), (x, y + 1), (x + 1, y - 1), (x - 1, y), (x + 1, y), (x - 1, y + 1), (x + 1, y + 1), (x - 1, y - 1)]: if isWall(action[0], action[1], wrld): newWorld = SensedWorld.from_world(wrld) monster = newWorld.monsters_at(x, y)[0] monster.move(action[0], action[1]) cells.append((newWorld, action)) return cells
def flook_for_monster(self,wrld,loc): x,y = loc cells = [] if self.wall_at(self.x + 1, self.y, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x + 1, self.y))) if self.wall_at(self.x - 1, self.y, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x - 1, self.y))) if self.wall_at(self.x, self.y + 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x, self.y + 1))) if self.wall_at(self.x, self.y - 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x, self.y - 1))) if self.wall_at(self.x + 1, self.y + 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x + 1, self.y + 1))) if self.wall_at(self.x + 1, self.y - 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x + 1, self.y - 1))) if self.wall_at(self.x - 1, self.y + 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x - 1, self.y + 1))) if self.wall_at(self.x - 1, self.y - 1, wrld): newWorld = SensedWorld.from_world(wrld) mon = newWorld.monsters_at(x,y) mon.move(self.x,self.y) cells.append((newWorld, (self.x - 1, self.y - 1))) return cells
def next_world_state(self, action, wrld): sensed_world = SensedWorld.from_world(wrld) # move character if sensed_world.me(self) is not None: sensed_world.me(self).move(action[0], action[1]) # move closest monster closest_monster_pos, monster_found = self.find_closest_monster((self.x + action[0], self.y + action[1]), wrld) if monster_found: monster_move, monster_pos = self.predict_aggressive_monster_move(closest_monster_pos, wrld) monster = sensed_world.monsters_at(monster_pos[0], monster_pos[1]) if monster is not None: monster[0].move(monster_move[0], monster_move[1]) next_world, events = sensed_world.next() return next_world, events
def expectimax_search(self, wrld): # for Event in event: c_level = 0 search_result = 0 current_val = -math.inf max_val = -math.inf for wrld, val in self.cell(wrld): newWorld = SensedWorld.from_world(wrld) character = newWorld.me(self) character.x, character.y = val[0], val[1] current_val = max(current_val, self.expValue(wrld, val, c_level + 1)) if current_val > max_val: max_val = current_val search_result = val return search_result
def minimax(self, toClone): wrld = SensedWorld.from_world(toClone) cMoves = [] c = next(iter(wrld.characters.values()))[0] # Loop through delta x for dx in [-1, 0, 1]: # Avoid out-of-bound indexing if (c.x + dx >= 0) and (c.x + dx < wrld.width()): # Loop through delta y for dy in [-1, 0, 1]: # Avoid out-of-bound indexing if (c.y + dy >= 0) and (c.y + dy < wrld.height()): # No need to check impossible moves if wrld.empty_at(c.x + dx, c.y + dy): # Set move in wrld c.move(dx, dy) # Get new world (newwrld, events) = wrld.next() b = 0 for e in events: if (e.tpe == Event.CHARACTER_KILLED_BY_MONSTER or Event.BOMB_HIT_CHARACTER): b = 1 if (b): continue value = self.minHelper(newwrld) if (dy == 0 and dx == 0): value = value - 1 moveTuple = (value, dx, dy) cMoves.append(moveTuple) pMax = -100000 i = 0 index = 0 for p in cMoves: print(str(p[0]) + " " + str(p[1]) + " " + str(p[2])) if (p[0] >= pMax): pMax = p[0] index = i i += 1 print(index) self.move(cMoves[index][1], cMoves[index][2])
def expectimax_search(self, wrld, search_level): #for Event in event: c_level = 0 search_result = 0 max_val = -inf current_val = -inf #me_loc = next(iter(wrld.characters.values())) #val is a tuple for wrld, val in self.look_for_cell(wrld): newWorld = SensedWorld.from_world(wrld) character = newWorld.me(self) character.x = val[0] character.y = val[1] current_val = max(current_val, self.expValue(wrld, val, c_level + 1)) if current_val > max_val: max_val = current_val search_result = val return search_result
def update_characters(self): """Update character state""" # Event list ev = [] # Update all the characters ncharacters = {} for i, clist in self.characters.items(): for c in clist: # Call AI c.do(SensedWorld.from_world(self)) # Attempt to place bomb if c.maybe_place_bomb: c.maybe_place_bomb = False can_bomb = True # Make sure this character has not already placed another bomb for k, b in self.bombs.items(): if b.owner == c: can_bomb = False break if can_bomb: self.add_bomb(c.x, c.y, c) # Update position and check for events ev2 = self.update_character_move(c, False) ev = ev + ev2 # Character gets inserted in next step's list unless hit or # escaped if not (ev2 and ev2[0].tpe in [ Event.BOMB_HIT_CHARACTER, Event.CHARACTER_FOUND_EXIT ]): # Update new index ni = self.index(c.x, c.y) np = ncharacters.get(ni, []) np.append(c) ncharacters[ni] = np # Save new index self.characters = ncharacters # Return events return ev
def expectimax(self, toClone): wrld = SensedWorld.from_world(toClone) cMoves = [] c = wrld.me(self) # Loop through delta x for dx in [-1, 0, 1]: # Avoid out-of-bound indexing if (c.x + dx >= 0) and (c.x + dx < wrld.width()): # Loop through delta y for dy in [-1, 0, 1]: # Avoid out-of-bound indexing if (c.y + dy >= 0) and (c.y + dy < wrld.height()): # No need to check impossible moves if not wrld.wall_at(c.x + dx, c.y + dy): # Set move in wrld c.move(dx, dy) # Get new world (newwrld, events) = wrld.next() b = 0 for e in events: if (e.tpe == Event.CHARACTER_KILLED_BY_MONSTER ): b = 1 if (b): continue value = self.expectimaxHelper(newwrld) moveTuple = (value, dx, dy) cMoves.append(moveTuple) pMax = -math.inf i = 0 index = 0 for p in cMoves: if (p[0] > pMax): pMax = p[0] index = i i += 1 self.move(cMoves[index][1], cMoves[index][2])
def update_monsters(self): """Update monster state""" # Event list ev = [] # Update all the monsters nmonsters = {} for i, mlist in self.monsters.items(): for m in mlist: # Call AI m.do(SensedWorld.from_world(self)) # Update position and check for events ev2 = self.update_monster_move(m, False) ev = ev + ev2 # Monster gets inserted in next step's list unless hit if not (ev2 and ev2[0].tpe == Event.BOMB_HIT_MONSTER): # Update new index ni = self.index(m.x, m.y) np = nmonsters.get(ni, []) np.append(m) nmonsters[ni] = np # Save new index self.monsters = nmonsters # Return events return ev
def qLearn(self, wrld, state): # constants alpha = self.alpha gamma = self.gamma epsilon = self.epsilon action = (0, 0, -1) if random.uniform(0, 1) >= epsilon: for i in self.q_table: if i.state == state: if max(i.action_value.values()) == 0: key_list = [] for key, value in i.action_value.items(): if value == 0: # if state[(key[0], key[1])] == 'a': # action = (key[0], key[1], 0) key_list.append(key) action = random.choice(key_list) if self.debug_print: print(action, i.action_value[action]) else: action = max(i.action_value, key=(lambda x: i.action_value[x])) if self.debug_print: print(action, i.action_value[action]) break if self.train: if action == (0, 0, -1): action = random.choice(list(self.q_table[0].action_value.keys())) if self.debug_print: print("rand", action) action_backup = action if 'illegal_move' in self.give_rewards: if ((self.x + action[0]) >= wrld.width() or (self.x + action[0]) < 0) and ( (self.y + action[1]) >= wrld.height() or (self.y + action[1]) < 0): if self.debug_print: print('illegal_move') action = random.choice(list(self.q_table[0].action_value.keys())) elif (self.x + action[0]) >= wrld.width() or (self.x + action[0]) < 0: if self.debug_print: print('illegal_move') action = (0, action[1], action[2]) elif (self.y + action[1]) >= wrld.height() or (self.y + action[1]) < 0: if self.debug_print: print('illegal_move') action = (action[0], 0, action[2]) elif wrld.wall_at((self.x + action[0]), (self.y + action[1])): if self.debug_print: print('wall_move') action = random.choice(list(self.q_table[0].action_value.keys())) elif wrld.wall_at((self.x + action[0]), (self.y)): if self.debug_print: print('wall_move') action = (0, action[1], action[2]) elif wrld.wall_at((self.x), (self.y + action[1])): if self.debug_print: print('wall_move') action = (action[0], 0, action[2]) sensed_world = SensedWorld.from_world(wrld) (sensed_world.me(self)).move(action[0], action[1]) if action[2]: (sensed_world.me(self)).place_bomb() next_state, next_events = sensed_world.next() reward = (5000 / wrld.scores[self.name] * 0.5) if 'life_penalty' in self.give_rewards else 0 if len(next_events) > 0: for i in next_events: if i.tpe == Event.CHARACTER_FOUND_EXIT: print("Found Exit!!!!!!!!!!!!!!!!!") if 'wall_blow' in self.give_rewards and i.tpe == 0 and i.character.name == self.name: if self.debug_print: print('wall_blow') reward += 10 if 'mon_blow' in self.give_rewards and i.tpe == 1 and i.character.name == self.name: if self.debug_print: print('mon_blow') reward += 50 if i.tpe == 2: if 'char_blow' in self.give_rewards and i.character.name == self.name and i.character.name != i.other.name: if self.debug_print: print('char_blow') reward += 100 elif 'self_blow' in self.give_rewards and i.character.name == self.name and i.character.name == i.other.name: if self.debug_print: print('self_blow') reward -= 500 if 'mon_die' in self.give_rewards and i.tpe == 3 and i.character.name == self.name: if self.debug_print: print('mon_die') reward -= 500 elif 'explosion_move' in self.give_rewards and (next_state.explosion_at((self.x + action[0]), (self.y + action[1])) or wrld.explosion_at((self.x + action[0]), (self.y + action[1]))): if self.debug_print: print('explosion_move') reward -= 500 if 'exit' in self.give_rewards and self.heuristic( ((self.x + action[0]), (self.y + action[1])), next_state.exitcell) == 0: if self.debug_print: print('exit') reward += 10000 elif 'a_star_move' in self.give_rewards: temp_for_print = reward if 'a_star' in state: if 'a_star_bomb' in self.give_rewards and wrld.wall_at(self.x + state['a_star'][0], self.y + state['a_star'][1]) and not state['bomb']: if self.debug_print: print('a_star_bomb') reward += action[2] * 20 if (action[0], action[1]) != (0, 0): if (state['a_star'][0] == action[0]) and (state['a_star'][1] == action[1]): reward += 3 elif (state['a_star'][0] == action[0]) and (abs(state['a_star'][1] - action[1]) < 2): reward += 1 elif (state['a_star'][1] == action[1]) and (abs(state['a_star'][0] - action[0]) < 2): reward += 1 else: reward -= 1 if self.debug_print: print('a_star (%d, %d) -> %d' % (state['a_star'][0], state['a_star'][1], reward - temp_for_print)) # Current State curr_state = state # Next state is created here next_state = self.wrld_to_state(next_state) if self.add_monster_move: ######## States are compared ########## # Monster position tracking (mon_curr_x, mon_curr_y) = (0,0) (mon_next_x, mon_next_y) = (0,0) offset_x = 4 offset_y = 4 # Current State curr_state_list = [] for y in range(-3, 4): row = [] for x in range(-3, 4): state_item = state[(x,y)] row.append(state[(x, y)]) if state_item == 'm': (mon_curr_x, mon_curr_y) = (x+offset_x,y+offset_y) curr_state_list.append(row) # Next State next_state_list = [] for y in range(-3, 4): row = [] for x in range(-3, 4): state_item = next_state[(x,y)] row.append(next_state[(x, y)]) if state_item == 'm': (mon_next_x, mon_next_y) = (x+offset_x,y+offset_y) next_state_list.append(row) print("current monster:", mon_curr_x , mon_curr_y) print("next monster:", mon_next_x, mon_next_y) # Check monster movement towards character current_monster_to_character = self.heuristic((offset_x, offset_y), (mon_curr_x, mon_curr_y)) next_monster_to_character = self.heuristic((offset_x, offset_y), (mon_next_x, mon_next_y)) # Check if monster moved in either axis (mon_dx,mon_dy) = (mon_curr_x - mon_next_x, mon_curr_y - mon_next_y) if abs(mon_dx) == 1 or abs(mon_dy) == 1: if self.debug_print: print("MONSTER MOVED") if next_monster_to_character < current_monster_to_character: if self.debug_print: print("MONSTER CLOSER TO ME") reward -= 5 print("monster move: ", mon_dx, mon_dy) print("current mc: ", current_monster_to_character, "next mc: ", next_monster_to_character) print("next action: ", action[0], action[1]) if self.debug_print: print("Current State Character View Map") for row in curr_state_list: print(row) print("Next State Character View Map") for row in next_state_list: print(row) # Manage rewards when a monster is seen if 'mon_move' in self.give_rewards: if 'm' in next_state.values(): for key, value in next_state.items(): if value == 'm': mx = 1 if key[0] > 0 else (-1 if key[0] < 0 else 0) my = 1 if key[1] > 0 else (-1 if key[1] < 0 else 0) if (action[0], action[1]) != (0, 0): if (mx == -action[0]) and (my == -action[1]): reward += 3 elif (mx == -action[0]) and (abs(my - action[1]) < 2): reward += 1 elif (my == -action[1]) and (abs(mx - action[0]) < 2): reward += 1 old_value = 0 old_value_backup = 0 for i in self.q_table: if i.state == state: old_value = i.action_value[action] old_value_backup = i.action_value[action_backup] break next_max = 0 for i in self.q_table: if i.state == next_state: next_max = max(i.action_value.values()) new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max) new_value_backup = (1 - alpha) * old_value_backup + alpha * (reward + gamma * next_max) state_exists = False for i in range(self.q_table.size): if self.q_table[i].state == state: state_exists = True new_dic = self.q_table[i].action_value new_dic[action] = new_value if action != action_backup: new_dic[action_backup] = new_value_backup self.q_table[i] = QEntry(state, new_dic) break if not state_exists: new_entry = QEntry(state) new_entry.action_value[action] = new_value if action != action_backup: new_entry.action_value[action_backup] = new_value_backup self.q_table = np.append(self.q_table, [new_entry]) if self.show_rewards: file = open('rewards.csv', 'a') file.write('(' + str(self.x) + ' ' + str(self.y) + '),(' + str(action[0]) + ' ' + str(action[1]) + ' ' + str(action[2]) + '),' + str(reward) + ',\n') return action
def aientity_do(self, entities): """Call AI to get actions for next step""" for i, elist in entities.items(): for e in elist: # Call AI e.do(SensedWorld.from_world(self))