def find_good_goal(self): b = self.items_bytype['pushable_block'][0] mincost = dut.get_big_cost() if self.direction == 'up': for i in range(self.mapsize[0]): p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], (i, self.mapsize[1] - 1)) if cost < mincost: mincost = cost goal_loc = (i, self.mapsize[1] - 1) minp = p elif self.direction == 'down': for i in range(self.mapsize[0]): p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], (i, 0)) if cost < mincost: mincost = cost goal_loc = (i, 0) minp = p elif self.direction == 'right': for i in range(self.mapsize[1]): p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], (self.mapsize[0] - 1, i)) if cost < mincost: mincost = cost goal_loc = (self.mapsize[0] - 1, i) minp = p else: for i in range(self.mapsize[1]): p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], (0, i)) if cost < mincost: mincost = cost goal_loc = (0, i) minp = p return mincost, goal_loc, minp
def get_supervision(self, featurizer): states = [] actions = [] # this is greedy, not optimal: goal_remaining = True while goal_remaining: mincost = dut.get_big_cost() + 1 goal_remaining = False for j in range(self.ngoals_active): aloc = self.agent.attr['loc'] g = self.items_bytype['goal'][self.goal_order[j]] gloc = g.attr['loc'] v = g.attr.get('_visited') if v is None or v is False:#why is v False? goal_remaining = True p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) if cost < mincost: mincost = cost mingloc = gloc minp = p if mincost >= dut.get_big_cost(): # necessary goal is unreachable return [[featurizer.featurize(self), 'stop']] else: path = dut.collect_path(minp, mingloc) thisgoal_actions = dut.path_to_actions(path) actions.extend(thisgoal_actions) for a in thisgoal_actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def go_and_push(self, next_block_loc, actions, states): b = self.items_bytype['pushable_block'][0] pl = self.get_push_location(b.attr['loc'], next_block_loc) p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], pl) if cost < dut.get_big_cost(): path_to_push = dut.collect_path(p, pl) else: return actions, states, False to_push_loc = dut.path_to_actions(path_to_push) actions.extend(to_push_loc) for a in to_push_loc: states.append(self.featurizer.featurize(self)) self.act(a) self.update() if pl[0] < b.attr['loc'][0]: a = 'push_right' elif pl[1] < b.attr['loc'][1]: a = 'push_up' elif pl[0] > b.attr['loc'][0]: a = 'push_left' else: a = 'push_down' actions.append(a) states.append(self.featurizer.featurize(self)) self.act(a) self.update() return actions, states, True
def get_supervision(self, featurizer): a = self.agent b = self.items_bytype['pushable_block'][0] p, cost = dut.dijkstra_touch_cost(self, b.attr['loc'], self.goal_loc) states = [] actions = [] if b.attr['loc'] == self.goal_loc: return [[featurizer.featurize(self), 'stop']] if cost < dut.get_big_cost(): # switch is not blocked path = dut.collect_path(p, self.goal_loc) path = path[1:] for loc in path: pl = b.get_push_location(loc) p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], pl) if cost < dut.get_big_cost(): path_to_push = dut.collect_path(p, pl) else: return [[featurizer.featurize(self), 'stop']] to_push_loc = dut.path_to_actions(path_to_push) actions.extend(to_push_loc) for a in to_push_loc: states.append(featurizer.featurize(self)) self.act(a) self.update() if pl[0] < b.attr['loc'][0]: a = 'push_right' elif pl[1] < b.attr['loc'][1]: a = 'push_up' elif pl[0] > b.attr['loc'][0]: a = 'push_left' else: a = 'push_down' actions.append(a) states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): gloc = self.goal_loc p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): return [[featurizer.featurize(self), 'stop']] path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) states = [] for a in actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): states = [] actions = [] for i in range(self.ngoals_active): current_goal = 'goal' + str(self.goal_order[i]) for j in self.items_bytype['goal']: if j.attr['@goal'] == current_goal: gloc = j.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): return [[featurizer.featurize(self), 'stop']] else: path = dut.collect_path(p, gloc) thisgoal_actions = dut.path_to_actions(path) actions.extend(thisgoal_actions) for a in thisgoal_actions: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer): gloc = self.items_bytype['goal'][0].attr['loc'] aloc = self.agent.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) states = [] actions = [] if cost < dut.get_big_cost(): # goal is on this side... path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) for a in actions: states.append(featurizer.featurize(self)) self.act(a) self.update() else: # gonna have to move the block. #find where to push: bloc = self.items_bytype['pushable_block'][0].attr['loc'] dw = round((aloc[0] - bloc[0]) / (abs(aloc[0] - bloc[0]) + .01)) dh = round((aloc[1] - bloc[1]) / (abs(aloc[1] - bloc[1]) + .01)) bw = bloc[0] bh = bloc[1] if self.is_loc_reachable((bw + dw, bh)): push_loc = (bw + dw, bh) hasspace = (self.is_loc_reachable((bw - dw, bh)) and self.is_loc_reachable((bw - 2 * dw, bh))) if dw > 0: push_direction = 'left' else: push_direction = 'right' elif self.is_loc_reachable((bloc[0], bloc[1] + dh)): push_loc = (bloc[0], bloc[1] + dh) hasspace = (self.is_loc_reachable((bw, bh - dh)) and self.is_loc_reachable((bw, bh - 2 * dh))) if dh > 0: push_direction = 'down' else: push_direction = 'up' else: #push loc is blocked, return with only 'stop' return [[featurizer.featurize(self), 'stop']] if not hasspace: return [[featurizer.featurize(self), 'stop']] p, bcost = dut.dijkstra_touch_cost(self, aloc, push_loc) path = dut.collect_path(p, push_loc) to_block = dut.path_to_actions(path) actions.extend(to_block) for a in to_block: states.append(featurizer.featurize(self)) self.act(a) self.update() states.append(featurizer.featurize(self)) self.act('push_' + push_direction) actions.append('push_' + push_direction) self.update() states.append(featurizer.featurize(self)) self.act(push_direction) actions.append(push_direction) self.update() states.append(featurizer.featurize(self)) self.act('push_' + push_direction) actions.append('push_' + push_direction) self.update() aloc = self.agent.attr['loc'] p, cost = dut.dijkstra_touch_cost(self, aloc, gloc) if cost >= dut.get_big_cost(): # goal still unreachable (e.g. blocks next to door or pushed block onto goal) # todo FIXME return [[states[0], 'stop']] path = dut.collect_path(p, gloc) togoal = dut.path_to_actions(path) actions.extend(togoal) for a in togoal: states.append(featurizer.featurize(self)) self.act(a) self.update() return list(zip(states, actions))
def get_supervision(self, featurizer, additional_featurizers=[]): gloc = self.items_bytype['goal'][0].attr['loc'] path = [] all_actions = [] all_states = [] num_featurizers = len(additional_featurizers) if num_featurizers > 0: additional_rep = [[]] * num_featurizers door = self.items_bytype['cycle_switch_opened_door'][0] p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) if cost >= dut.get_big_cost(): # switch # note: order issue when multiple switches exist for switch in self.items_bytype['cycle_switch']: switch_loc = switch.attr['loc'] p_switch, cost_switch = dut.dijkstra_touch_cost( self, self.agent.attr['loc'], switch_loc) if cost_switch >= dut.get_big_cost(): if num_featurizers == 0: return [[featurizer.featurize(self), 'stop']] else: feat_list = [ f.featurize(self) for f in additional_featurizers ] return [[ featurizer.featurize(self), 'stop', *feat_list ]] path = dut.collect_path(p_switch, switch_loc) actions = dut.path_to_actions(path) for a in actions: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) self.act(a) self.update() all_actions += actions while switch.color != door.color: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) all_actions.append('toggle_close') self.act('toggle_close') self.update() # go to destination after toggling all switches to the same color as door p, cost = dut.dijkstra_touch_cost(self, self.agent.attr['loc'], gloc) path = dut.collect_path(p, gloc) actions = dut.path_to_actions(path) for a in actions: all_states.append(featurizer.featurize(self)) for i_f in range(num_featurizers): additional_rep[i_f].append( additional_featurizers[i_f].featurize(self)) self.act(a) self.update() all_actions += actions if num_featurizers == 0: return list(zip(all_states, all_actions)) else: return list(zip(all_states, all_actions, *additional_rep))