예제 #1
0
    def explore_and_exploit(self,ant):
        '''
        Update weights and decide whether to explore or exploit here.  Where all the magic happens.
        YOUR CODE HERE
        '''

        actions = self.world.get_passable_directions(ant.location, AIM.keys())
        random.shuffle(actions)
        if len(actions)==0:
            return 'halt'
        
        # if we have a newborn baby ant, init its rewards and quality fcns
        if 'prev_value' not in ant.__dict__:
            ant.prev_value = 0
            ant.previous_reward_events = RewardEvents()
            ant.prev_features = self.features.extract(self.world, self.state, ant.location, actions[0])
            return actions[0]
        
        # step 1, update Q(s,a) based on going from last state, taking
        # the action issued last round, and getting to current state
        R = self.get_reward(ant.previous_reward_events)
        
        # step size.  it's good to make this inversely proportional to the
        # number of features, so you don't bounce out of the bowl we're trying
        # to descend via gradient descent

        alpha = 0.00001

        
        # totally greedy default value, future rewards count for nothing, do not want

        discount = 0.00001

        # should be max_a' Q(s',a'), where right now we are in state s' and the
        # previous state was s.  You can use
        # self.value(self.state,ant.location,action) here
        max_next_action = 'halt'
        max_next_value = self.value(self.state, ant.location, actions[0])
        for action in actions:
            value = self.value(self.state, ant.location, action)
            if value > max_next_value:
                max_next_value = value
                max_next_action = action
        # should be argmax_a' Q(s',a')

        # now that we have all the quantities needed, adjust the weights
        self.update_weights(alpha,discount,R,max_next_value,ant.prev_value,ant.prev_features)

                
        # step 2, explore or exploit? you should replace decide_to_explore with
        # something sensible based on the number of games played so far, self.ngames

        explore = 0.7 / self.ngames
        decision = random.random()
        if explore >= decision:
            return actions[0]
        else:      
            return max_next_action
예제 #2
0
파일: qlearner.py 프로젝트: zilehuda/CIS521
    def explore_and_exploit(self, ant):
        '''
        Update weights and decide whether to explore or exploit here.  Where all the magic happens.
        YOUR CODE HERE
        '''

        actions = self.world.get_passable_directions(ant.location, AIM.keys())
        random.shuffle(actions)
        if len(actions) == 0:
            return 'halt'

        # if we have a newborn baby ant, init its rewards and quality fcns
        if 'prev_value' not in ant.__dict__:
            ant.prev_value = 0
            ant.previous_reward_events = RewardEvents()
            ant.prev_features = self.features.extract(self.world, self.state,
                                                      ant.location, actions[0])
            return actions[0]

        # step 1, update Q(s,a) based on going from last state, taking
        # the action issued last round, and getting to current state
        R = self.get_reward(ant.previous_reward_events)

        # step size.  it's good to make this inversely proportional to the
        # number of features, so you don't bounce out of the bowl we're trying
        # to descend via gradient descent

        alpha = 0.00001

        # totally greedy default value, future rewards count for nothing, do not want

        discount = 0.00001

        # should be max_a' Q(s',a'), where right now we are in state s' and the
        # previous state was s.  You can use
        # self.value(self.state,ant.location,action) here
        max_next_action = 'halt'
        max_next_value = self.value(self.state, ant.location, actions[0])
        for action in actions:
            value = self.value(self.state, ant.location, action)
            if value > max_next_value:
                max_next_value = value
                max_next_action = action
        # should be argmax_a' Q(s',a')

        # now that we have all the quantities needed, adjust the weights
        self.update_weights(alpha, discount, R, max_next_value, ant.prev_value,
                            ant.prev_features)

        # step 2, explore or exploit? you should replace decide_to_explore with
        # something sensible based on the number of games played so far, self.ngames

        explore = 0.7 / self.ngames
        decision = random.random()
        if explore >= decision:
            return actions[0]
        else:
            return max_next_action
예제 #3
0
    def explore_and_exploit(self,ant):
        '''
        Update weights and decide whether to explore or exploit here.  Where all the magic happens.
        YOUR CODE HERE
        '''

        actions = self.world.get_passable_directions(ant.location, AIM.keys())
        random.shuffle(actions)
        if len(actions)==0:
            return 'halt'
        
        # if we have a newborn baby ant, init its rewards and quality fcns
        if 'prev_value' not in ant.__dict__:
            ant.prev_value = 0
            ant.previous_reward_events = RewardEvents()
            ant.prev_features = self.features.extract(self.world, self.state, ant.location, actions[0])
            return actions[0]
        
        # step 1, update Q(s,a) based on going from last state, taking
        # the action issued last round, and getting to current state
        R = self.get_reward(ant.previous_reward_events)
        
        # step size.  it's good to make this inversely proportional to the
        # number of features, so you don't bounce out of the bowl we're trying
        # to descend via gradient descent
        alpha = 0.01 / (len(self.weights))
        
        # totally greedy default value, future rewards count for nothing, do not want
        discount = 1.0
        
        # should be max_a' Q(s',a'), where right now we are in state s' and the
        # previous state was s.  You can use
        # self.value(self.state,ant.location,action) here

        # SO WHY NOT JUST PUT THAT IN THE CODE INSTEAD OF LEAVING A CRYPTIC COMMENT!?
        (max_next_value, max_next_action) = max_by(actions, lambda x: self.value(self.state,ant.location,x))
        
        # now that we have all the quantities needed, adjust the weights
        self.update_weights(alpha,discount,R,max_next_value,ant.prev_value,ant.prev_features)

                
        # step 2, explore or exploit? you should replace decide_to_explore with
        # something sensible based on the number of games played so far, self.ngames
        decide_to_explore = None 
        if self.ngames < explore_start:
          decide_to_explore = True
        elif self.ngames < explore_stop:
          p = 1.0 * (explore_stop - self.ngames) / (explore_stop - explore_start)
          decide_to_explore = random.random() < p
        else:
          decide_to_explore = False

        if decide_to_explore:
            return actions[0]
        else:      
            return max_next_action
 def get_direction(self, ant):
     '''Finds a direction for this ant to move in according to the food, enemy, exploration routine.'''
     
     # Get the list of directions towards food, enemy, and random
     rand_dirs = AIM.keys()
     random.shuffle(rand_dirs)
     dirs = (ant.toward(ant.closest_food()) + ant.toward(ant.closest_enemy()) + rand_dirs)
     
     # Get the first passable direction from that long list.
     d = ant.get_passable_direction(dirs)
     return d
예제 #5
0
 def get_direction(self, ant):
     '''Finds a direction for this ant to move in according to the food, enemy, exploration routine.'''
     
     # Get the list of directions towards food, enemy, and random
     rand_dirs = AIM.keys()
     random.shuffle(rand_dirs)
     dirs = (ant.toward(ant.closest_food()) + ant.toward(ant.closest_enemy()) + rand_dirs)
     
     # Get the first passable direction from that long list.
     d = ant.get_passable_direction(dirs)
     return d
예제 #6
0
 def get_successors(self,loc):
     ''' 
     Returns a list of valid next reachable locations from the input LOC.
     All derived classes should use this function, otherwise testing your implementation might fail.        
     '''
     
     alldirs = AIM.keys()
     s = []
     for d in alldirs:
         l = self.world.next_position(loc, d)
         if self.world.passable(l):
             s.append(l)
     return s
예제 #7
0
 def get_direction(self, ant):
     """Evaluates each of the currently passable directions and picks the one with maximum value."""
     
     # get the passable directions, in random order to break ties
     rand_dirs = self.world.get_passable_directions(ant.location, AIM.keys())
     random.shuffle(rand_dirs)
     
     # evaluate the value function for each possible direction
     value = [0 for i in range(0, len(rand_dirs))]
     max_value = float("-inf")
     max_dir = None
     for i in range(0, len(rand_dirs)):
         value[i] = self.value(self.state, ant.location, rand_dirs[i])
         if value[i] > max_value:
             max_value = value[i]
             max_dir = rand_dirs[i]
             
     # take direction with maximum value
     # Get the first passable direction from that long list.
     self.world.L.info("Chose: %s, value: %.2f" % (max_dir, max_value))
     return max_dir
예제 #8
0
파일: dfabot.py 프로젝트: zilehuda/CIS521
    def get_direction(self, state, ant):
        '''Returns the ant's least-visited adjacent location, prioritizing by
           food direction when multiple adjacent locations are equally explored.'''
        # Of the 4 possible squares to move to, determine which don't currently
        # contain an ant and are the least-visited.
        min_visits = float('Inf')
        min_visits_directions = []
        for direction in AIM.keys():
            test_position = ant.world.next_position(ant.location, direction)

            # Ignore water.
            if not ant.world.passable(test_position):
                continue

            # Don't move to a currently occupied location;
            # this helps somewhat mitigate collisions.
            if ant.world.ant_lookup[test_position] != -1:
                continue

            # Check to see how frequently this candidate location has been visited
            # in the past.
            num_visits = state[test_position] if state.has_key(
                test_position) else 1
            if num_visits < min_visits:
                min_visits = num_visits
                min_visits_directions = [direction]
            elif num_visits == min_visits:
                min_visits_directions.append(direction)

        if not min_visits_directions:
            # Will only reach here if ant is boxed in by water on all sides.
            return None
        elif len(min_visits_directions) > 1:
            # Try to break ties by considering food direction.
            food_directions = ant.toward(ant.closest_food())
            for fd in food_directions:
                if fd in min_visits_directions:
                    return fd

        return min_visits_directions[0]
예제 #9
0
파일: dfabot.py 프로젝트: Akolyte01/CIS521
    def get_direction(self, state, ant):
        '''Returns the ant's least-visited adjacent location, prioritizing by
           food direction when multiple adjacent locations are equally explored.''' 
        # Of the 4 possible squares to move to, determine which don't currently
        # contain an ant and are the least-visited.
        min_visits = float('Inf')
        min_visits_directions = []
        for direction in AIM.keys():
            test_position = ant.world.next_position(ant.location, direction)
            
            # Ignore water.
            if not ant.world.passable(test_position):
                continue
            
            # Don't move to a currently occupied location;
            # this helps somewhat mitigate collisions.
            if ant.world.ant_lookup[test_position] != -1:
                continue
            
            # Check to see how frequently this candidate location has been visited
            # in the past.
            num_visits = state[test_position] if state.has_key(test_position) else 1
            if num_visits < min_visits:
                min_visits = num_visits
                min_visits_directions = [direction]
            elif num_visits == min_visits:
                min_visits_directions.append(direction)

        if not min_visits_directions:
            # Will only reach here if ant is boxed in by water on all sides.
            return None
        elif len(min_visits_directions) > 1:
            # Try to break ties by considering food direction.
            food_directions = ant.toward(ant.closest_food())
            for fd in food_directions:
                if fd in min_visits_directions:
                    return fd

        return min_visits_directions[0]