예제 #1
0
 def go_and_push(self, next_block_loc, actions, states):
     b = self.items_bytype['pushable_block'][0]
     pl = self.get_push_location(b.attr['loc'], next_block_loc)
     p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], pl)
     if cost < dut.get_big_cost():
         path_to_push = dut.collect_path(p, pl)
     else:
         return actions, states, False
     to_push_loc = dut.path_to_actions(path_to_push)
     actions.extend(to_push_loc)
     for a in to_push_loc:
         states.append(self.featurizer.featurize(self))
         self.act(a)
         self.update()
     if pl[0] < b.attr['loc'][0]:
         a = 'push_right'
     elif pl[1] < b.attr['loc'][1]:
         a = 'push_up'
     elif pl[0] > b.attr['loc'][0]:
         a = 'push_left'
     else:
         a = 'push_down'
     actions.append(a)
     states.append(self.featurizer.featurize(self))
     self.act(a)
     self.update()
     return actions, states, True
예제 #2
0
 def get_supervision(self, featurizer):
     states = []
     actions = []
     # this is greedy, not optimal:
     goal_remaining = True
     while goal_remaining:
         mincost = dut.get_big_cost() + 1
         goal_remaining = False
         for j in range(self.ngoals_active):
             aloc = self.agent.attr['loc']
             g = self.items_bytype['goal'][self.goal_order[j]]
             gloc = g.attr['loc']
             v = g.attr.get('_visited')
             if v is None or v is False:#why is v False?
                 goal_remaining = True
                 p, cost = dut.dijkstra_touch_cost(self, aloc, gloc)
                 if cost < mincost:
                     mincost = cost
                     mingloc = gloc
                     minp = p
         if mincost >= dut.get_big_cost(): # necessary goal is unreachable
             return [[featurizer.featurize(self), 'stop']]
         else:
             path = dut.collect_path(minp, mingloc)
             thisgoal_actions = dut.path_to_actions(path)
             actions.extend(thisgoal_actions)
             for a in thisgoal_actions:
                 states.append(featurizer.featurize(self))
                 self.act(a)
                 self.update()
     return list(zip(states, actions))
예제 #3
0
 def get_supervision(self, featurizer):
     gloc = self.goal_loc
     p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc)
     if cost >= dut.get_big_cost():
         return [[featurizer.featurize(self), 'stop']]
     path = dut.collect_path(p, gloc)
     actions = dut.path_to_actions(path)
     states = []
     for a in actions:
         states.append(featurizer.featurize(self))
         self.act(a)
         self.update()
     return list(zip(states, actions))
예제 #4
0
 def get_supervision(self, featurizer):
     states = []
     actions = []
     for i in range(self.ngoals_active):
         current_goal = 'goal' + str(self.goal_order[i])
         for j in self.items_bytype['goal']:
             if j.attr['@goal'] == current_goal:
                 gloc = j.attr['loc']
         p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc)
         if cost >= dut.get_big_cost():
             return [[featurizer.featurize(self), 'stop']]
         else:
             path = dut.collect_path(p, gloc)
             thisgoal_actions = dut.path_to_actions(path)
             actions.extend(thisgoal_actions)
             for a in thisgoal_actions:
                 states.append(featurizer.featurize(self))
                 self.act(a)
                 self.update()
     return list(zip(states, actions))
예제 #5
0
    def get_supervision(self, featurizer):
        a = self.agent
        b = self.items_bytype['pushable_block'][0]
        p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], self.goal_loc)
        states = []
        actions = []
        if b.attr['loc'] == self.goal_loc:
            return [[featurizer.featurize(self), 'stop']]
        if cost < dut.get_big_cost():  # switch is not blocked
            path = dut.collect_path(p, self.goal_loc)
            path = path[1:]
            for loc in path:
                pl = b.get_push_location(loc)
                p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'],
                                                  pl)
                if cost < dut.get_big_cost():
                    path_to_push = dut.collect_path(p, pl)
                else:
                    return [[featurizer.featurize(self), 'stop']]
                to_push_loc = dut.path_to_actions(path_to_push)
                actions.extend(to_push_loc)
                for a in to_push_loc:
                    states.append(featurizer.featurize(self))
                    self.act(a)
                    self.update()
                if pl[0] < b.attr['loc'][0]:
                    a = 'push_right'
                elif pl[1] < b.attr['loc'][1]:
                    a = 'push_up'
                elif pl[0] > b.attr['loc'][0]:
                    a = 'push_left'
                else:
                    a = 'push_down'
                actions.append(a)
                states.append(featurizer.featurize(self))
                self.act(a)
                self.update()

        return list(zip(states, actions))
예제 #6
0
    def get_supervision(self, featurizer):
        gloc = self.items_bytype['goal'][0].attr['loc']
        aloc = self.agent.attr['loc']
        p, cost = dut.dijkstra_touch_cost(self, aloc, gloc)
        states = []
        actions = []
        if cost < dut.get_big_cost():  # goal is on this side...
            path = dut.collect_path(p, gloc)
            actions = dut.path_to_actions(path)
            for a in actions:
                states.append(featurizer.featurize(self))
                self.act(a)
                self.update()
        else:  # gonna have to move the block.
            #find where to push:
            bloc = self.items_bytype['pushable_block'][0].attr['loc']
            dw = round((aloc[0] - bloc[0]) / (abs(aloc[0] - bloc[0]) + .01))
            dh = round((aloc[1] - bloc[1]) / (abs(aloc[1] - bloc[1]) + .01))
            bw = bloc[0]
            bh = bloc[1]
            if self.is_loc_reachable((bw + dw, bh)):
                push_loc = (bw + dw, bh)
                hasspace = (self.is_loc_reachable((bw - dw, bh))
                            and self.is_loc_reachable((bw - 2 * dw, bh)))
                if dw > 0:
                    push_direction = 'left'
                else:
                    push_direction = 'right'
            elif self.is_loc_reachable((bloc[0], bloc[1] + dh)):
                push_loc = (bloc[0], bloc[1] + dh)
                hasspace = (self.is_loc_reachable((bw, bh - dh))
                            and self.is_loc_reachable((bw, bh - 2 * dh)))
                if dh > 0:
                    push_direction = 'down'
                else:
                    push_direction = 'up'
            else:  #push loc is blocked, return with only 'stop'
                return [[featurizer.featurize(self), 'stop']]
            if not hasspace:
                return [[featurizer.featurize(self), 'stop']]
            p, bcost = dut.dijkstra_touch_cost(self, aloc, push_loc)
            path = dut.collect_path(p, push_loc)
            to_block = dut.path_to_actions(path)
            actions.extend(to_block)
            for a in to_block:
                states.append(featurizer.featurize(self))
                self.act(a)
                self.update()

            states.append(featurizer.featurize(self))
            self.act('push_' + push_direction)
            actions.append('push_' + push_direction)
            self.update()

            states.append(featurizer.featurize(self))
            self.act(push_direction)
            actions.append(push_direction)
            self.update()

            states.append(featurizer.featurize(self))
            self.act('push_' + push_direction)
            actions.append('push_' + push_direction)
            self.update()

            aloc = self.agent.attr['loc']
            p, cost = dut.dijkstra_touch_cost(self, aloc, gloc)
            if cost >= dut.get_big_cost():
                # goal still unreachable (e.g. blocks next to door or pushed block onto goal)
                # todo FIXME
                return [[states[0], 'stop']]
            path = dut.collect_path(p, gloc)
            togoal = dut.path_to_actions(path)
            actions.extend(togoal)
            for a in togoal:
                states.append(featurizer.featurize(self))
                self.act(a)
                self.update()

        return list(zip(states, actions))
예제 #7
0
    def get_supervision(self, featurizer, additional_featurizers=[]):
        gloc = self.items_bytype['goal'][0].attr['loc']
        path = []
        all_actions = []
        all_states = []
        num_featurizers = len(additional_featurizers)
        if num_featurizers > 0:
            additional_rep = [[]] * num_featurizers

        door = self.items_bytype['cycle_switch_opened_door'][0]
        p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc)
        if cost >= dut.get_big_cost():
            # switch
            # note: order issue when multiple switches exist
            for switch in self.items_bytype['cycle_switch']:
                switch_loc = switch.attr['loc']
                p_switch, cost_switch = dut.dijkstra_touch_cost(
                    self, self.agent.attr['loc'], switch_loc)
                if cost_switch >= dut.get_big_cost():
                    if num_featurizers == 0:
                        return [[featurizer.featurize(self), 'stop']]
                    else:
                        feat_list = [
                            f.featurize(self) for f in additional_featurizers
                        ]
                        return [[
                            featurizer.featurize(self), 'stop', *feat_list
                        ]]
                path = dut.collect_path(p_switch, switch_loc)
                actions = dut.path_to_actions(path)
                for a in actions:
                    all_states.append(featurizer.featurize(self))
                    for i_f in range(num_featurizers):
                        additional_rep[i_f].append(
                            additional_featurizers[i_f].featurize(self))
                    self.act(a)
                    self.update()
                all_actions += actions
                while switch.color != door.color:
                    all_states.append(featurizer.featurize(self))
                    for i_f in range(num_featurizers):
                        additional_rep[i_f].append(
                            additional_featurizers[i_f].featurize(self))
                    all_actions.append('toggle_close')
                    self.act('toggle_close')
                    self.update()
        # go to destination after toggling all switches to the same color as door
        p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc)
        path = dut.collect_path(p, gloc)
        actions = dut.path_to_actions(path)
        for a in actions:
            all_states.append(featurizer.featurize(self))
            for i_f in range(num_featurizers):
                additional_rep[i_f].append(
                    additional_featurizers[i_f].featurize(self))
            self.act(a)
            self.update()
        all_actions += actions
        if num_featurizers == 0:
            return list(zip(all_states, all_actions))
        else:
            return list(zip(all_states, all_actions, *additional_rep))