def get_supervision(self, featurizer): states = [] actions = [] # this is greedy, not optimal: goal_remaining = True while goal_remaining: mincost = dut.get_big_cost() + 1 goal_remaining = False for j in range(self.ngoals_active): aloc = self.agent.attr['loc'] g = self.items_bytype['goal'][self.goal_order[j]] gloc = g.attr['loc'] v = g.attr.get('_visited') if v is None or v is False:#why is v False? goal_remaining = True p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) if cost < mincost: mincost = cost mingloc = gloc minp = p if mincost >= dut.get_big_cost(): # necessary goal is unreachable return [[featurizer.featurize(self), 'stop']] else: path = dut.collect_path(minp, mingloc) thisgoal_actions = dut.path_to_actions(path) actions.extend(thisgoal_actions) for a in thisgoal_actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def go_and_push(self, next_block_loc, actions, states): b = self.items_bytype['pushable_block'][0] pl = self.get_push_location(b.attr['loc'], next_block_loc) p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], pl) if cost < dut.get_big_cost(): path_to_push = dut.collect_path(p, pl) else: return actions, states, False to_push_loc = dut.path_to_actions(path_to_push) actions.extend(to_push_loc) for a in to_push_loc: states.append(self.featurizer.featurize(self)) self.act(a) self.update() if pl[0] < b.attr['loc'][0]: a = 'push_right' elif pl[1] < b.attr['loc'][1]: a = 'push_up' elif pl[0] > b.attr['loc'][0]: a = 'push_left' else: a = 'push_down' actions.append(a) states.append(self.featurizer.featurize(self)) self.act(a) self.update() return actions, states, True
def get_supervision(self, featurizer): a = self.agent b = self.items_bytype['pushable_block'][0] p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], self.goal_loc) states = [] actions = [] if b.attr['loc'] == self.goal_loc: return [[featurizer.featurize(self), 'stop']] if cost < dut.get_big_cost(): # switch is not blocked path = dut.collect_path(p, self.goal_loc) path = path[1:] for loc in path: pl = b.get_push_location(loc) p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], pl) if cost < dut.get_big_cost(): path_to_push = dut.collect_path(p, pl) else: return [[featurizer.featurize(self), 'stop']] to_push_loc = dut.path_to_actions(path_to_push) actions.extend(to_push_loc) for a in to_push_loc: states.append(featurizer.featurize(self)) self.act(a) self.update() if pl[0] < b.attr['loc'][0]: a = 'push_right' elif pl[1] < b.attr['loc'][1]: a = 'push_up' elif pl[0] > b.attr['loc'][0]: a = 'push_left' else: a = 'push_down' actions.append(a) states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): gloc = self.goal_loc p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): return [[featurizer.featurize(self), 'stop']] path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) states = [] for a in actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): states = [] actions = [] for i in range(self.ngoals_active): current_goal = 'goal' + str(self.goal_order[i]) for j in self.items_bytype['goal']: if j.attr['@goal'] == current_goal: gloc = j.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): return [[featurizer.featurize(self), 'stop']] else: path = dut.collect_path(p, gloc) thisgoal_actions = dut.path_to_actions(path) actions.extend(thisgoal_actions) for a in thisgoal_actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): self.featurizer = featurizer a = self.agent b = self.items_bytype['pushable_block'][0] cost, goal_loc, p = self.find_good_goal() states = [] actions = [] if cost < dut.get_big_cost(): path = dut.collect_path(p, goal_loc) if len(path) <= 1: return [[featurizer.featurize(self), 'stop']] path = path[1:] #TODO fix easy mistakes for loc in path: #TODO put get_push_location into pushable_block object actions, states, success = self.go_and_push( loc, actions, states) if not success: return [[featurizer.featurize(self), 'stop']] return list(zip(states, actions))
def get_supervision(self, featurizer): gloc = self.items_bytype['goal'][0].attr['loc'] aloc = self.agent.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) states = [] actions = [] if cost < dut.get_big_cost(): # goal is on this side... path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) for a in actions: states.append(featurizer.featurize(self)) self.act(a) self.update() else: # gonna have to move the block. #find where to push: bloc = self.items_bytype['pushable_block'][0].attr['loc'] dw = round((aloc[0] - bloc[0]) / (abs(aloc[0] - bloc[0]) + .01)) dh = round((aloc[1] - bloc[1]) / (abs(aloc[1] - bloc[1]) + .01)) bw = bloc[0] bh = bloc[1] if self.is_loc_reachable((bw + dw, bh)): push_loc = (bw + dw, bh) hasspace = (self.is_loc_reachable((bw - dw, bh)) and self.is_loc_reachable((bw - 2 * dw, bh))) if dw > 0: push_direction = 'left' else: push_direction = 'right' elif self.is_loc_reachable((bloc[0], bloc[1] + dh)): push_loc = (bloc[0], bloc[1] + dh) hasspace = (self.is_loc_reachable((bw, bh - dh)) and self.is_loc_reachable((bw, bh - 2 * dh))) if dh > 0: push_direction = 'down' else: push_direction = 'up' else: #push loc is blocked, return with only 'stop' return [[featurizer.featurize(self), 'stop']] if not hasspace: return [[featurizer.featurize(self), 'stop']] p, bcost = dut.dijkstra_touch_cost(self, aloc, push_loc) path = dut.collect_path(p, push_loc) to_block = dut.path_to_actions(path) actions.extend(to_block) for a in to_block: states.append(featurizer.featurize(self)) self.act(a) self.update() states.append(featurizer.featurize(self)) self.act('push_' + push_direction) actions.append('push_' + push_direction) self.update() states.append(featurizer.featurize(self)) self.act(push_direction) actions.append(push_direction) self.update() states.append(featurizer.featurize(self)) self.act('push_' + push_direction) actions.append('push_' + push_direction) self.update() aloc = self.agent.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) if cost >= dut.get_big_cost(): # goal still unreachable (e.g. blocks next to door or pushed block onto goal) # todo FIXME return [[states[0], 'stop']] path = dut.collect_path(p, gloc) togoal = dut.path_to_actions(path) actions.extend(togoal) for a in togoal: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer, additional_featurizers=[]): gloc = self.items_bytype['goal'][0].attr['loc'] path = [] all_actions = [] all_states = [] num_featurizers = len(additional_featurizers) if num_featurizers > 0: additional_rep = [[]] * num_featurizers door = self.items_bytype['cycle_switch_opened_door'][0] p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): # switch # note: order issue when multiple switches exist for switch in self.items_bytype['cycle_switch']: switch_loc = switch.attr['loc'] p_switch, cost_switch = dut.dijkstra_touch_cost( self, self.agent.attr['loc'], switch_loc) if cost_switch >= dut.get_big_cost(): if num_featurizers == 0: return [[featurizer.featurize(self), 'stop']] else: feat_list = [ f.featurize(self) for f in additional_featurizers ] return [[ featurizer.featurize(self), 'stop', *feat_list ]] path = dut.collect_path(p_switch, switch_loc) actions = dut.path_to_actions(path) for a in actions: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) self.act(a) self.update() all_actions += actions while switch.color != door.color: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) all_actions.append('toggle_close') self.act('toggle_close') self.update() # go to destination after toggling all switches to the same color as door p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) for a in actions: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) self.act(a) self.update() all_actions += actions if num_featurizers == 0: return list(zip(all_states, all_actions)) else: return list(zip(all_states, all_actions, *additional_rep))