Example #1
0
 def manage_events(self):
     for e in self.events:
         if e.tpe == Event.BOMB_HIT_CHARACTER:
             e.other.done(SensedWorld.from_world(self))
         elif e.tpe == Event.CHARACTER_KILLED_BY_MONSTER:
             self.remove_character(e.character)
             e.character.done(SensedWorld.from_world(self))
         elif e.tpe == Event.CHARACTER_FOUND_EXIT:
             e.character.done(SensedWorld.from_world(self))
Example #2
0
    def __calc_next_path(self, wrld):
        chosen_world = SensedWorld.from_world(wrld)
        me = chosen_world.me(self)
        dx = None
        dy = None

        x = uniform(0, 1)
        if x > self.eps:
            print("EXPLOIT")
            next_moves = self.__list_next_moves(wrld)
            max_q = -inf
            for move in next_moves:
                c_wrld = SensedWorld.from_world(wrld)
                c_wrld.me(self).move(move[0], move[1])
                (c_wrld, ev) = c_wrld.next()
                (cur_q, cur_target, cur_fs) = self.calc_q(c_wrld, ev, move)
                if cur_q > max_q:
                    max_q = cur_q
                    (dx, dy) = move
                    chosen_world = c_wrld
                    chosen_target = cur_target
                    chosen_ev = ev
                    final_state = cur_fs
            self.q = max_q
        else:
            print("EXPLORE")
            queue = [(me.x, me.y)]
            visited = {(me.x, me.y): None}
            while queue:
                s = queue.pop(0)
                if s == wrld.exitcell:
                    break
                for (dx, dy) in self.__list_next_moves(chosen_world, move=s):
                    move = (s[0] + dx, s[1] + dy)
                    if move not in visited:
                        visited[move] = s
                        queue.append(move)

            end = wrld.exitcell
            while True:
                if visited[end] is None or visited[visited[end]] is None:
                    dx = end[0] - me.x
                    dy = end[1] - me.y
                    break
                end = visited[end]

            me.move(dx, dy)
            (chosen_world, chosen_ev) = chosen_world.next()
            (self.q, chosen_target,
             final_state) = self.calc_q(chosen_world, chosen_ev, (dx, dy))

        if self.training is True:
            self.training_data.append([chosen_world, chosen_ev, chosen_target])
            if final_state is True:
                print("Training...")
                self.__update_nn(self.training_data)
        return (dx, dy)
Example #3
0
 def getNewWorld(self, char, wrld, action):
     newWorld = SensedWorld.from_world(wrld)
     newChar = self.getCharInWorld(newWorld)
     actionVector = self.getActionVector(action)
     newChar.x += actionVector[0]
     newChar.y += actionVector[1]
     return newWorld
    def updateQ(self, wrld):
        alpha = 0.3
        moves = get_adjacent((self.x, self.y), wrld)
        for m in moves:
            if not wrld.wall_at(m[0], m[1]):
                sim = SensedWorld.from_world(wrld)  # creates simulated world
                c = sim.me(self)  # finds character from simulated world
                c.move(m[0] - self.x,
                       m[1] - self.y)  # moves character in simulated world
                s = sim.next()  # updates simulated world
                c = s[0].me(
                    c
                )  # gives us character. this is a tuple, we want the board, not the list of elapsed events

                # Check if game is over
                if c is None:
                    print("ENDED!")
                    print(s[0])
                    print(s[1])
                    print("EVENT 0: ")
                    print(s[1][0])
                    for event in s[1]:
                        if event.tpe == Event.CHARACTER_KILLED_BY_MONSTER and event.character.name == self.name:
                            self.qtable[calculate_state(wrld.exitcell, wrld),
                                        m] = -5
                        elif event.tpe == Event.CHARACTER_FOUND_EXIT and event.character.name == self.name:
                            self.qtable[("dead"), m] = -5
                else:
                    print("Xcoord: " + str(c.x) + ", Ycoord: " + str(c.y))
                    self.qtable[(calculate_state(
                        (c.x, c.y), wrld), m)] = distance_to_exit((c.x, c.y),
                                                                  wrld)
Example #5
0
 def __calc_next_interactive(self, wrld):
     # Commands
     dx, dy = 0, 0
     # Handle input
     for c in input(
             "How would you like to move (w=up,a=left,s=down,d=right)? "):
         if 'w' == c:
             dy -= 1
         if 'a' == c:
             dx -= 1
         if 's' == c:
             dy += 1
         if 'd' == c:
             dx += 1
     chosen_world = SensedWorld.from_world(wrld)
     chosen_world.me(self).move(dx, dy)
     (chosen_world, chosen_ev) = chosen_world.next()
     (self.q, chosen_target,
      final_state) = self.calc_q(chosen_world, chosen_ev, (dx, dy))
     print(self.q)
     if self.training is True:
         self.training_data.append([chosen_world, chosen_ev, chosen_target])
         if final_state is True:
             print("Training...")
             self.__update_nn(self.training_data)
     return (dx, dy)
Example #6
0
 def __max_a(self, world):
     ''' @dillon
     max a assignment, approximate q-learnings
     '''
     self.max_q = -inf
     possible_actions = self.__possible_actions(world) # list of dx, dy
     for action in possible_actions:
         clone = SensedWorld.from_world(world) # clone the current world
         dx, dy = action # unpack
         me = clone.me(self) # find me in cloned world
         if dx == 0 and dy == 0:
             me.place_bomb()
         else:
             me.move(dx, dy) # make the move in cloned world
         next_clone, ev = clone.next() # simulate the move and clone the next world
         if next_clone.me(self) is None:
             # terminal state, q = r
             q = self.__r(ev, world)
         else:
             q = self.__q(next_clone, (0, 0)) # derive q of new world, don't move though
         if q > self.max_q:
            self.max_q = q # record q
            self.max_a = action # record action
            self.events = ev # record actions
     return self.max_a # return action corresponding to best q
Example #7
0
def monsters_current_path(wrld, character):
	my_wrld = SensedWorld.from_world(wrld)
	monsters = findAll(wrld, 2)

	if len(monsters) == 0:
		return 0

	pos = (character.x, character.y)
	original_nearest_monster = findNearestEntity(wrld, pos, monsters)

	next_wrld, next_events = my_wrld.next()
	delta_coords = (0, 0)


	if next_wrld.me(character) is None:
		return 0

	monsters = findAll(next_wrld, 2)

	if len(monsters) == 0:
		return 0

	next_nearest_monster = findNearestEntity(next_wrld, pos, monsters)

	delta_coords = ((next_nearest_monster[0] - original_nearest_monster[0]), (next_nearest_monster[1] - original_nearest_monster[1]))


	for i in range(1, 4, 1):
		newX, newY = original_nearest_monster[0] + (delta_coords[0] * i), original_nearest_monster[1] + (delta_coords[1] * i)

		if character.x == newX and character.y == newY:
			return 1

	return distanceToMonster(wrld, character)
Example #8
0
    def __find_max_a(self, wrld, action):
        '''
        max_q = -100
        dx, dy = action
        clone = SensedWorld.from_world(wrld) # clone the world
        bomb = False
        #print("ACTION --", action)
        while True:
            me = clone.me(self)
            if  not self.__within_bounds(clone,dx+me.x,dy+me.y) or clone.wall_at(dx+me.x,dy+me.y):
                break
            if dx == 0 and dy == 0:
                me.place_bomb() # drop a bomb if we are not moving
                bomb = True
            else:
                me.move(dx, dy)
            clone, ev = clone.next()
            #clone.printit()
            q = self.__approx_q(clone, ev).item()
            print("A", q)
            if q > max_q:
                max_q = q
            if clone.me(self) is None or bomb:
                break
        #print("---")
        return max_q
        '''

        max_q = -inf
        for (dx, dy) in self.__list_next_moves(wrld):
            clone_wrld = SensedWorld.from_world(wrld)
            me = clone_wrld.me(self)
            if dx == 0 and dy == 0:
                me.place_bomb()
            else:
                me.move(dx, dy)
            (clone_wrld, ev) = clone_wrld.next()
            """
            print("TEST --", dx, dy)
            clone_wrld.printit()
            print(ev)
            print("----")
            """
            """
            (r, final) = self.__calc_r(clone_wrld,ev)
            if final:
                q = r
            else:
                a = self.__approx_q(clone_wrld,ev).item()
                q = r + self.gamma * a
            """
            q = self.__approx_q(clone_wrld, ev).item()
            #print("CUR A", q)
            if q > max_q:
                max_q = q
        #print("MAX Q", max_q)
        return max_q
Example #9
0
 def __calc_next_move(self, wrld):
     '''
     @ray
     Calculates the next move based on approximate q learning
     '''
     # take a new move using epsilon greedy exploration
     new_move = None
     chosen_world = None
     chosen_ev = None
     chosen_target = None
     final_state = False
     next_moves = self.__list_next_moves(wrld)
     x = uniform(0, 1)
     if x < self.eps:
         # exploration
         new_move = next_moves[randrange(0, len(next_moves))]
         chosen_world = SensedWorld.from_world(wrld)
         chosen_world.me(self).move(new_move[0], new_move[1])
         (chosen_world, chosen_ev) = chosen_world.next()
         (self.q, chosen_target,
          final_state) = self.calc_q(chosen_world, chosen_ev, new_move)
     else:
         # exploitation
         max_q = -inf
         for move in next_moves:
             c_wrld = SensedWorld.from_world(wrld)
             c_wrld.me(self).move(move[0], move[1])
             (c_wrld, ev) = c_wrld.next()
             (cur_q, cur_target, cur_fs) = self.calc_q(c_wrld, ev, move)
             if cur_q > max_q:
                 max_q = cur_q
                 new_move = move
                 chosen_world = c_wrld
                 chosen_target = cur_target
                 chosen_ev = ev
                 final_state = cur_fs
         self.q = max_q
     if self.training is True:
         self.training_data.append([chosen_world, chosen_ev, chosen_target])
         if final_state is True:
             print("Training...")
             self.__update_nn(self.training_data)
     return new_move
Example #10
0
    def get_nextworlds(self, wrld):
        # Return accessible next worlds after character's movement in given world
        # Possible moves include moving to empty cells, moving to the exit cell, and placing a bomb
        # List of possible worlds
        nextworlds = []
        """
        for i in range (x-1,x+2):
            for j in range (y-1,y+2):
                if wrld.empty_at(i,j) and not (x,y) == (i,j):
                    cells.append((i,j))
        """
        # Adapted from example code on Github
        # Loop through delta x
        for dx in [-1, 0, 1]:
            # Avoid out-of-bound indexing
            if (self.x + dx >= 0) and (self.x + dx < wrld.width()):
                # Loop through delta y
                for dy in [-1, 0, 1]:
                    # Originally wrapped below in a check that the position was not the character's current position
                    # But in some cases, not moving at all may be necessary
                    # Still can't stay still, however; move isn't counted as possible if a character is there
                    # Avoid out-of-bound indexing
                    if (self.y + dy >=
                            0) and (self.y + dy < wrld.height()) and (
                                wrld.empty_at(self.x + dx, self.y + dy)
                                or wrld.exit_at(self.x + dx, self.y + dy)):
                        # Add cell to list
                        # cells.append((self.x+dx,self.y+dy))

                        clonewrld = SensedWorld.from_world(wrld)
                        clonewrld.me(self).move(dx, dy)
                        (newwrld, events) = clonewrld.next()
                        nextworlds.append((newwrld, events, (dx, dy)))
        # Includes world in which character places bomb
        # The final value in the tuple, move, contains either the space the character moves to or self.BOMB if they
        # place a bomb
        clonewrld = SensedWorld.from_world(wrld)
        clonewrld.me(self).place_bomb()
        clonewrld.me(self).move(0, 0)
        (newwrld, events) = clonewrld.next()
        nextworlds.append((newwrld, events, self.BOMB))
        return nextworlds
Example #11
0
 def maxValue(self, wrld, val, level):
     if level >= self.search_level:
         return self.get_score(wrld, val)
     value = -inf
     for loc in self.look_for_cell(wrld):
         newWorld = SensedWorld.from_world(loc[0])
         character = newWorld.me(self)
         character.x = loc[1][0]
         character.y = loc[1][1]
         value = max(value, self.expValue(newWorld, loc[1], level + 1))
     return value
Example #12
0
 def generateCharMoveWorlds(self, char, wrld):
     ret = []
     allActions = self.getAllActions(wrld, char.x, char.y)
     for i in allActions:
         newWorld = SensedWorld.from_world(wrld)
         newChar = self.getCharInWorld(newWorld)
         actionVector = self.getActionVector(i)
         newChar.x += actionVector[0]
         newChar.y += actionVector[1]
         ret.append((newWorld, i))
     return ret
Example #13
0
 def maxValue(self, wrld, val, level):
     if level >= 1:
         return self.fitness(wrld, val)
     value = -math.inf
     for loc in self.cell(wrld):
         newWorld = SensedWorld.from_world(loc[0])
         character = newWorld.me(self)
         character.x = loc[1][0]
         character.y = loc[1][1]
         value = max(value, self.expValue(newWorld, loc[1], level + 1))
     return value
Example #14
0
    def get_nextworlds_monster(self, wrld):
        nextworlds = []
        monsterx = -1
        monstery = -1
        # Adapted from example code on Github

        # Checks that the map contains a monster. If it does, saves its position.
        # TODO: In future versions of this (for variants 4 and 5), account for more than one monster.
        for x in range(0, wrld.width()):
            for y in range(0, wrld.height()):
                if wrld.monsters_at(x, y):
                    monsterx = x
                    monstery = y
                    # monster = clonewrld.monsters_at(x, y)
                    # monster.move(dx, dy)
        if monsterx == -1 and monstery == -1:
            clonewrld = SensedWorld.from_world(wrld)
            (newwrld, events) = clonewrld.next()
            nextworlds.append((newwrld, events, (0, 0)))
            return nextworlds
        # Loop through delta x
        for dx in [-1, 0, 1]:
            # Avoid out-of-bound indexing
            if (monsterx + dx >= 0) and (monsterx + dx < wrld.width()):
                # Loop through delta y
                for dy in [-1, 0, 1]:
                    # Avoid out-of-bound indexing
                    if (monstery + dy >= 0) and (monstery + dy < wrld.height(
                    )) and (wrld.empty_at(monsterx + dx, monstery + dy) or
                            wrld.characters_at(monsterx + dx, monstery + dy)):
                        """
                        if clonewrld.monsters_at(self.monster[0],self.monster[1]):
                            monster = clonewrld.monsters_at(self.monster[0],self.monster[1])[0]
                            monster.move(dx, dy)
                        """
                        clonewrld = SensedWorld.from_world(wrld)
                        monster = clonewrld.monsters_at(monsterx, monstery)[0]
                        monster.move(dx, dy)
                        (newwrld, events) = clonewrld.next()
                        nextworlds.append((newwrld, events, (dx, dy)))
            return nextworlds
Example #15
0
    def look_for_monster(self,wrld,loc):
        cells = []
        x, y = loc
        Eight_move = [(x, y - 1), (x, y + 1),(x + 1, y - 1),(x - 1, y), (x + 1, y),(x - 1, y + 1),(x + 1, y + 1),(x - 1, y - 1)]
        for action in Eight_move:
            if self.wall_at(action[0], action[1], wrld):
                newWorld = SensedWorld.from_world(wrld)
                monster = newWorld.monsters_at(x, y)[0]
                monster.move(action[0], action[1])
                cells.append((newWorld, action))

        return cells
Example #16
0
 def generateMonsterMoveWorlds(self, wrld):
     ret = []
     ret = [wrld]
     monster = self.getAllMonstersInWorld(wrld)[0]
     monsterMoves = self.getMonsterMoves(monster, wrld)
     for i in monsterMoves:
         newWorld = SensedWorld.from_world(wrld)
         newMonster = self.getAllMonstersInWorld(newWorld)[0]
         newMonster.x += i[0]
         newMonster.y += i[1]
         ret.append(newWorld)
     return ret
Example #17
0
 def do(self, wrld):
     if self.approx_net is None:
         # create the neural network
         self.__init_nn(wrld, filename=self.nn_file)
     s_wrld = SensedWorld.from_world(wrld)
     #(dx,dy) = self.__calc_next_move(s_wrld)
     #(dx,dy) = self.__calc_next_interactive(s_wrld)
     (dx, dy) = self.__calc_next_path(s_wrld)
     self.me_pos = (self.me_pos[0] + dx, self.me_pos[1] + dy)
     if (dx, dy) == (0, 0):
         self.place_bomb()
     else:
         self.move(dx, dy)
Example #18
0
	def perform_qLearning(self, wrld):
		self.prev_wrld = SensedWorld.from_world(wrld)

		if self.train is True:
			exploringFlag = False
			best_wrld = None
			if random.random() < self.epsilon:
				exploringFlag = True
				# choose random move
				allowed_direction = [-1, 0, 1]
				bomb_actions = [False, True]

				x = random.choice(allowed_direction)
				y = random.choice(allowed_direction)
				place_bomb = random.choice(bomb_actions)

				x, y = bomb_handler(wrld, (self.x, self.y), (x, y))
				#x, y = explosion_handler2(wrld, (self.x, self.y), (x, y))

				self.move(x, y)

				if place_bomb is True:
					self.place_bomb()

				

			else:
				maxQ, best_action, best_wrld = self.q_learner.getBestMove(wrld, self)

				x, y, place_bomb = best_action

				self.move(x, y)

				if place_bomb is True:
					self.place_bomb()
			

			
		else:
			# use the converged values 

			maxQ, best_action, best_wrld = self.q_learner.getBestMove(wrld, self)

			x, y, place_bomb = best_action


			self.move(x, y)

			if place_bomb is True:
				self.place_bomb()
Example #19
0
 def do(self, wrld):
     # Your code here
     # action: up, down, left, right, leftup, leftdown, rightup, rightdown, boom
     if (Wvalues[0] != []):
         Wvalues = [10, -100, -100, 1000, -10, -100]
     sWrld = SensedWorld.from_world(wrld)
     (newWorld, events) = sWrld.next()
     currscore = sWrld.scores["me"]
     nextscore = newWorld.scores["me"]
     currx = self.x
     curry = self.y
     nextx = []
     nexty = []
     # nextx_up, nexty_up = self.nextstep(wrld, currx, curry, "up")
     # nextx_down, nexty_down =  self.nextstep(wrld, currx, curry, "down")
     # nextx_left, nexty_left =  self.nextstep(wrld, currx, curry, "left")
     # nextx_right, nexty_right =  self.nextstep(wrld, currx, curry, "right")
     # nextx_leftup, nexty_leftup =  self.nextstep(wrld, currx, curry, "leftup")
     # nextx_leftdown, nexty_leftdown =  self.nextstep(wrld, currx, curry, "leftdown")
     # nextx_rightdown, nexty_rightdown =  self.nextstep(wrld, currx, curry, "rightdown")
     # nextx_rightup, nexty_rightup =  self.nextstep(wrld, currx, curry, "rightup")
     direction = [
         "up", "down", "left", "right", "leftup", "leftdown", "rightup",
         "rightdown"
     ]
     for i in range(len(direction)):
         nextdirx, nextdiry = self.nextstep(wrld, currx, curry,
                                            direction[i])
         nextx.append(nextdirx)
         nexty.append(nextdiry)
     currQval = self.Qvalue(wrld, Wvalues[0], Wvalues[1], Wvalues[2],
                            Wvalues[3], Wvalues[4], Wvalues[5], currx,
                            curry)
     BestQ = -99999
     Bestxy = (-99, -99)
     Bestmove = (-99, -99)
     for j in range(len(nextx)):
         Qdirval = self.Qvalue(wrld, Wvalues[0], Wvalues[1], Wvalues[2],
                               Wvalues[3], Wvalues[4], Wvalues[5], nextx[j],
                               nexty[j])
         if (Qdirval > BestQ):
             BestQ = Qdirval
             Bestxy = (nextx[j], nexty[j])
     Bestmove = (Bestxy[0] - self.x, Bestxy[1] - self.y)
     reward = nextscore - currscore
     delta = reward + BestQ - currQval
     for k in range(len(Wvalues)):
         Wvalues[k] = Wvalues[k] + delta * 0.4 * BestQ
     self.move(Bestmove[0], Bestmove[1])
Example #20
0
    def monAction(self, wrld, loc):
        cells = []
        x, y = loc

        for action in [(x, y - 1), (x, y + 1), (x + 1, y - 1), (x - 1, y),
                       (x + 1, y), (x - 1, y + 1), (x + 1, y + 1),
                       (x - 1, y - 1)]:

            if isWall(action[0], action[1], wrld):
                newWorld = SensedWorld.from_world(wrld)
                monster = newWorld.monsters_at(x, y)[0]
                monster.move(action[0], action[1])
                cells.append((newWorld, action))

        return cells
Example #21
0
 def flook_for_monster(self,wrld,loc):
     x,y = loc
     cells = []
     if self.wall_at(self.x + 1, self.y, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x + 1, self.y)))
     if self.wall_at(self.x - 1, self.y, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x - 1, self.y)))
     if self.wall_at(self.x, self.y + 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x, self.y + 1)))
     if self.wall_at(self.x, self.y - 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x, self.y - 1)))
     if self.wall_at(self.x + 1, self.y + 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x + 1, self.y + 1)))
     if self.wall_at(self.x + 1, self.y - 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x + 1, self.y - 1)))
     if self.wall_at(self.x - 1, self.y + 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x - 1, self.y + 1)))
     if self.wall_at(self.x - 1, self.y - 1, wrld):
         newWorld = SensedWorld.from_world(wrld)
         mon = newWorld.monsters_at(x,y)
         mon.move(self.x,self.y)
         cells.append((newWorld, (self.x - 1, self.y - 1)))
     return cells
Example #22
0
	def next_world_state(self, action, wrld):
		sensed_world = SensedWorld.from_world(wrld)

		# move character
		if sensed_world.me(self) is not None:
			sensed_world.me(self).move(action[0], action[1])

		# move closest monster
		closest_monster_pos, monster_found = self.find_closest_monster((self.x + action[0], self.y + action[1]), wrld)
		if monster_found:
			monster_move, monster_pos = self.predict_aggressive_monster_move(closest_monster_pos, wrld)
			monster = sensed_world.monsters_at(monster_pos[0], monster_pos[1])
			if monster is not None:
				monster[0].move(monster_move[0], monster_move[1])

		next_world, events = sensed_world.next()
		return next_world, events
Example #23
0
    def expectimax_search(self, wrld):
        # for Event in event:
        c_level = 0
        search_result = 0
        current_val = -math.inf
        max_val = -math.inf

        for wrld, val in self.cell(wrld):
            newWorld = SensedWorld.from_world(wrld)
            character = newWorld.me(self)
            character.x, character.y = val[0], val[1]

            current_val = max(current_val,
                              self.expValue(wrld, val, c_level + 1))
            if current_val > max_val:
                max_val = current_val
                search_result = val

        return search_result
Example #24
0
 def expectimax_search(self, wrld, search_level):
     #for Event in event:
     c_level = 0
     search_result = 0
     max_val = -inf
     current_val = -inf
     
     #me_loc = next(iter(wrld.characters.values()))
     #val is a tuple
     for wrld, val in self.look_for_cell(wrld):
         newWorld = SensedWorld.from_world(wrld)
         character = newWorld.me(self)
         character.x = val[0]
         character.y = val[1]
         current_val = max(current_val, self.expValue(wrld, val, c_level + 1))
         if current_val > max_val:
             max_val = current_val
             search_result = val
 
     return search_result
Example #25
0
 def update_characters(self):
     """Update character state"""
     # Event list
     ev = []
     # Update all the characters
     ncharacters = {}
     for i, clist in self.characters.items():
         for c in clist:
             # Call AI
             c.do(SensedWorld.from_world(self))
             # Attempt to place bomb
             if c.maybe_place_bomb:
                 c.maybe_place_bomb = False
                 can_bomb = True
                 # Make sure this character has not already placed another bomb
                 for k, b in self.bombs.items():
                     if b.owner == c:
                         can_bomb = False
                         break
                 if can_bomb:
                     self.add_bomb(c.x, c.y, c)
             # Update position and check for events
             ev2 = self.update_character_move(c, False)
             ev = ev + ev2
             # Character gets inserted in next step's list unless hit or
             # escaped
             if not (ev2 and ev2[0].tpe in [
                     Event.BOMB_HIT_CHARACTER, Event.CHARACTER_FOUND_EXIT
             ]):
                 # Update new index
                 ni = self.index(c.x, c.y)
                 np = ncharacters.get(ni, [])
                 np.append(c)
                 ncharacters[ni] = np
     # Save new index
     self.characters = ncharacters
     # Return events
     return ev
Example #26
0
 def update_monsters(self):
     """Update monster state"""
     # Event list
     ev = []
     # Update all the monsters
     nmonsters = {}
     for i, mlist in self.monsters.items():
         for m in mlist:
             # Call AI
             m.do(SensedWorld.from_world(self))
             # Update position and check for events
             ev2 = self.update_monster_move(m, False)
             ev = ev + ev2
             # Monster gets inserted in next step's list unless hit
             if not (ev2 and ev2[0].tpe == Event.BOMB_HIT_MONSTER):
                 # Update new index
                 ni = self.index(m.x, m.y)
                 np = nmonsters.get(ni, [])
                 np.append(m)
                 nmonsters[ni] = np
     # Save new index
     self.monsters = nmonsters
     # Return events
     return ev
Example #27
0
    def qLearn(self, wrld, state):

        # constants
        alpha = self.alpha
        gamma = self.gamma
        epsilon = self.epsilon
        action = (0, 0, -1)

        if random.uniform(0, 1) >= epsilon:
            for i in self.q_table:
                if i.state == state:
                    if max(i.action_value.values()) == 0:
                        key_list = []
                        for key, value in i.action_value.items():
                            if value == 0:
                                # if state[(key[0], key[1])] == 'a':
                                #     action = (key[0], key[1], 0)
                                key_list.append(key)

                        action = random.choice(key_list)
                        if self.debug_print:
                            print(action, i.action_value[action])

                    else:
                        action = max(i.action_value, key=(lambda x: i.action_value[x]))
                        if self.debug_print:
                            print(action, i.action_value[action])
                        break

        if self.train:
            if action == (0, 0, -1):
                action = random.choice(list(self.q_table[0].action_value.keys()))
                if self.debug_print:
                    print("rand", action)

            action_backup = action

            if 'illegal_move' in self.give_rewards:
                if ((self.x + action[0]) >= wrld.width() or (self.x + action[0]) < 0) and (
                        (self.y + action[1]) >= wrld.height() or (self.y + action[1]) < 0):
                    if self.debug_print:
                        print('illegal_move')
                    action = random.choice(list(self.q_table[0].action_value.keys()))
                elif (self.x + action[0]) >= wrld.width() or (self.x + action[0]) < 0:
                    if self.debug_print:
                        print('illegal_move')
                    action = (0, action[1], action[2])
                elif (self.y + action[1]) >= wrld.height() or (self.y + action[1]) < 0:
                    if self.debug_print:
                        print('illegal_move')
                    action = (action[0], 0, action[2])
                elif wrld.wall_at((self.x + action[0]), (self.y + action[1])):
                    if self.debug_print:
                        print('wall_move')
                    action = random.choice(list(self.q_table[0].action_value.keys()))
                elif wrld.wall_at((self.x + action[0]), (self.y)):
                    if self.debug_print:
                        print('wall_move')
                    action = (0, action[1], action[2])
                elif wrld.wall_at((self.x), (self.y + action[1])):
                    if self.debug_print:
                        print('wall_move')
                    action = (action[0], 0, action[2])

            sensed_world = SensedWorld.from_world(wrld)
            (sensed_world.me(self)).move(action[0], action[1])
            if action[2]:
                (sensed_world.me(self)).place_bomb()
            next_state, next_events = sensed_world.next()

            reward = (5000 / wrld.scores[self.name] * 0.5) if 'life_penalty' in self.give_rewards else 0

            if len(next_events) > 0:
                for i in next_events:
                    if i.tpe == Event.CHARACTER_FOUND_EXIT:
                        print("Found Exit!!!!!!!!!!!!!!!!!")
                    if 'wall_blow' in self.give_rewards and i.tpe == 0 and i.character.name == self.name:
                        if self.debug_print:
                            print('wall_blow')
                        reward += 10
                    if 'mon_blow' in self.give_rewards and i.tpe == 1 and i.character.name == self.name:
                        if self.debug_print:
                            print('mon_blow')
                        reward += 50
                    if i.tpe == 2:
                        if 'char_blow' in self.give_rewards and i.character.name == self.name and i.character.name != i.other.name:
                            if self.debug_print:
                                print('char_blow')
                            reward += 100
                        elif 'self_blow' in self.give_rewards and i.character.name == self.name and i.character.name == i.other.name:
                            if self.debug_print:
                                print('self_blow')
                            reward -= 500
                    if 'mon_die' in self.give_rewards and i.tpe == 3 and i.character.name == self.name:
                        if self.debug_print:
                            print('mon_die')
                        reward -= 500

            elif 'explosion_move' in self.give_rewards and (next_state.explosion_at((self.x + action[0]), (self.y + action[1])) or wrld.explosion_at((self.x + action[0]), (self.y + action[1]))):
                if self.debug_print:
                    print('explosion_move')
                reward -= 500

            if 'exit' in self.give_rewards and self.heuristic(
                    ((self.x + action[0]), (self.y + action[1])), next_state.exitcell) == 0:
                if self.debug_print:
                    print('exit')
                reward += 10000

            elif 'a_star_move' in self.give_rewards:
                temp_for_print = reward
                if 'a_star' in state:
                    if 'a_star_bomb' in self.give_rewards and wrld.wall_at(self.x + state['a_star'][0], self.y + state['a_star'][1]) and not state['bomb']:
                        if self.debug_print:
                            print('a_star_bomb')
                        reward += action[2] * 20

                    if (action[0], action[1]) != (0, 0):
                        if (state['a_star'][0] == action[0]) and (state['a_star'][1] == action[1]):
                            reward += 3

                        elif (state['a_star'][0] == action[0]) and (abs(state['a_star'][1] - action[1]) < 2):
                            reward += 1

                        elif (state['a_star'][1] == action[1]) and (abs(state['a_star'][0] - action[0]) < 2):
                            reward += 1

                        else:
                            reward -= 1

                    if self.debug_print:
                        print('a_star (%d, %d) -> %d' % (state['a_star'][0], state['a_star'][1], reward - temp_for_print))

            # Current State
            curr_state = state

            # Next state is created here
            next_state = self.wrld_to_state(next_state)

            if self.add_monster_move:
                ######## States are compared ##########
                # Monster position tracking
                (mon_curr_x, mon_curr_y) = (0,0)
                (mon_next_x, mon_next_y) = (0,0)
                offset_x = 4
                offset_y = 4

                # Current State
                curr_state_list = []
                for y in range(-3, 4):
                    row = []
                    for x in range(-3, 4):
                        state_item = state[(x,y)]
                        row.append(state[(x, y)])

                        if state_item == 'm':
                            (mon_curr_x, mon_curr_y) = (x+offset_x,y+offset_y)
                    curr_state_list.append(row)

                # Next State
                next_state_list = []
                for y in range(-3, 4):
                    row = []
                    for x in range(-3, 4):
                        state_item = next_state[(x,y)]
                        row.append(next_state[(x, y)])
                        if state_item == 'm':
                            (mon_next_x, mon_next_y) = (x+offset_x,y+offset_y)
                    next_state_list.append(row)

                print("current monster:", mon_curr_x , mon_curr_y)
                print("next monster:", mon_next_x, mon_next_y)

                # Check monster movement towards character
                current_monster_to_character = self.heuristic((offset_x, offset_y), (mon_curr_x, mon_curr_y))
                next_monster_to_character = self.heuristic((offset_x, offset_y), (mon_next_x, mon_next_y))

                # Check if monster moved in either axis
                (mon_dx,mon_dy) = (mon_curr_x - mon_next_x, mon_curr_y - mon_next_y)
                if abs(mon_dx) == 1 or abs(mon_dy) == 1:
                    if self.debug_print:
                        print("MONSTER MOVED")
                    if next_monster_to_character < current_monster_to_character:
                        if self.debug_print:
                            print("MONSTER CLOSER TO ME")
                        reward -= 5

                print("monster move: ", mon_dx, mon_dy)
                print("current mc: ", current_monster_to_character, "next mc: ", next_monster_to_character)
                print("next action: ", action[0], action[1])


                if self.debug_print:
                    print("Current State Character View Map")
                    for row in curr_state_list:
                        print(row)

                    print("Next State Character View Map")
                    for row in next_state_list:
                        print(row)



            # Manage rewards when a monster is seen
            if 'mon_move' in self.give_rewards:
                if 'm' in next_state.values():
                    for key, value in next_state.items():
                        if value == 'm':
                            mx = 1 if key[0] > 0 else (-1 if key[0] < 0 else 0)
                            my = 1 if key[1] > 0 else (-1 if key[1] < 0 else 0)

                            if (action[0], action[1]) != (0, 0):
                                if (mx == -action[0]) and (my == -action[1]):
                                    reward += 3

                                elif (mx == -action[0]) and (abs(my - action[1]) < 2):
                                    reward += 1

                                elif (my == -action[1]) and (abs(mx - action[0]) < 2):
                                    reward += 1

            old_value = 0
            old_value_backup = 0
            for i in self.q_table:
                if i.state == state:
                    old_value = i.action_value[action]
                    old_value_backup = i.action_value[action_backup]
                    break

            next_max = 0
            for i in self.q_table:
                if i.state == next_state:
                    next_max = max(i.action_value.values())

            new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
            new_value_backup = (1 - alpha) * old_value_backup + alpha * (reward + gamma * next_max)

            state_exists = False
            for i in range(self.q_table.size):
                if self.q_table[i].state == state:
                    state_exists = True
                    new_dic = self.q_table[i].action_value
                    new_dic[action] = new_value
                    if action != action_backup:
                        new_dic[action_backup] = new_value_backup
                    self.q_table[i] = QEntry(state, new_dic)
                    break

            if not state_exists:
                new_entry = QEntry(state)
                new_entry.action_value[action] = new_value
                if action != action_backup:
                    new_entry.action_value[action_backup] = new_value_backup
                self.q_table = np.append(self.q_table, [new_entry])

            if self.show_rewards:
                file = open('rewards.csv', 'a')
                file.write('(' + str(self.x) + ' ' + str(self.y) + '),(' + str(action[0]) + ' ' + str(action[1]) + ' ' + str(action[2]) + '),' + str(reward) + ',\n')

        return action
Example #28
0
 def aientity_do(self, entities):
     """Call AI to get actions for next step"""
     for i, elist in entities.items():
         for e in elist:
             # Call AI
             e.do(SensedWorld.from_world(self))