def maybe_get_location_memory(interpreter, speaker, d): location_type = d.get("location_type", "SPEAKER_LOOK") if location_type == "REFERENCE_OBJECT" or d.get( "reference_object") is not None: if d.get("relative_direction") == "BETWEEN": if d.get("reference_object_1"): mem1 = interpret_reference_object(interpreter, speaker, d["reference_object_1"], loose_speakerlook=True)[0] mem2 = interpret_reference_object(interpreter, speaker, d["reference_object_2"], loose_speakerlook=True)[0] mems = [mem1, mem2] else: mems = interpret_reference_object(interpreter, speaker, d["reference_object"], limit=2, loose_speakerlook=True) if len(mems) < 2: mem1 = None else: mem1, mem2 = mems if not mem1: # TODO specify the ref object in the error message raise ErrorWithResponse( "I don't know what you're referring to") loc = (np.add(mem1.get_pos(), mem2.get_pos())) / 2 loc = (loc[0], loc[1], loc[2]) else: mems = interpret_reference_object(interpreter, speaker, d["reference_object"]) if len(mems) == 0: tags = set(tags_from_dict(d["reference_object"])) cands = interpreter.memory.get_recent_entities("Mobs") mems = [ c for c in cands if any(set.intersection(set(c.get_tags()), tags)) ] if len(mems) == 0: cands = interpreter.memory.get_recent_entities( "BlockObjects") mems = [ c for c in cands if any(set.intersection(set(c.get_tags()), tags)) ] if len(mems) == 0: raise ErrorWithResponse( "I don't know what you're referring to") assert len(mems) == 1, mems interpreter.memory.update_recent_entities(mems) mem = mems[0] loc = mem.get_pos() mems = [mem] return loc, mems return None, None
def interpret_reference_location(interpreter, speaker, d): """ Location dict -> coordinates of reference objc and maybe a list of ref obj memories. Side effect: adds mems to agent_memory.recent_entities """ loose_speakerlook = False expected_num = 1 if d.get("relative_direction") == "BETWEEN": loose_speakerlook = True expected_num = 2 ref_obj_1 = d.get("reference_object_1") ref_obj_2 = d.get("reference_object_2") if ref_obj_1 and ref_obj_2: mem1 = interpret_reference_object( interpreter, speaker, ref_obj_1, loose_speakerlook=loose_speakerlook)[0] mem2 = interpret_reference_object(interpreter, speaker, ref_obj_2, loose_speakerlook=True)[0] if mem1 is None or mem2 is None: raise ErrorWithResponse( "I don't know what you're referring to") mems = [mem1, mem2] interpreter.memory.update_recent_entities(mems) return mems ref_obj = d.get("reference_object", SPEAKERLOOK["reference_object"]) mems = interpret_reference_object(interpreter, speaker, ref_obj, limit=expected_num, loose_speakerlook=loose_speakerlook) if len(mems) < expected_num: tags = set(tags_from_dict(ref_obj)) cands = interpreter.memory.get_recent_entities("Mob") mems = [ c for c in cands if any(set.intersection(set(c.get_tags()), tags)) ] if len(mems) < expected_num: cands = interpreter.memory.get_recent_entities("BlockObject") mems = [ c for c in cands if any(set.intersection(set(c.get_tags()), tags)) ] if len(mems) < expected_num: raise ErrorWithResponse("I don't know what you're referring to") mems = mems[:expected_num] interpreter.memory.update_recent_entities(mems) # TODO: are there any memories where get_pos() doesn't return something? return mems
def compute_location_heuristic(interpreter, speaker, d, mems): # handle relative direction reldir = d.get("relative_direction") loc = mems[0].get_pos() if reldir is not None: if reldir == "BETWEEN": loc = (np.add(mems[0].get_pos(), mems[1].get_pos())) / 2 loc = (loc[0], loc[1], loc[2]) elif reldir == "INSIDE": ref_obj_dict = d.get("reference_object", SPEAKERLOOK["reference_object"]) special = ref_obj_dict.get("special_reference") if not special: for i in range(len(mems)): mem = mems[i] locs = heuristic_perception.find_inside(mem) if len(locs) > 0: break if len(locs) == 0: raise ErrorWithResponse("I don't know how to go inside there") else: interpreter.memory.update_recent_entities([mem]) loc = locs[0] else: raise ErrorWithResponse("I don't know how to go inside there") elif reldir == "AWAY": apos = pos_to_np(interpreter.agent.get_player().pos) dir_vec = (apos - loc) / np.linalg.norm(apos - loc) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) elif reldir == "NEAR": pass else: # LEFT, RIGHT, etc... reldir_vec = rotation.DIRECTIONS[reldir] look = ( interpreter.agent.perception_modules["low_level"] .get_player_struct_by_name(speaker) .look ) # this should be an inverse transform so we set inverted=True dir_vec = rotation.transform(reldir_vec, look.yaw, 0, inverted=True) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) # if steps without relative direction elif "steps" in d: num_steps = word_to_num(d.get("steps", "5")) loc = to_block_center(loc) + [0, 0, num_steps] return post_process_loc(loc, interpreter)
def interpret_facing(interpreter, speaker, d): current_pitch = interpreter.agent.get_player().look.pitch current_yaw = interpreter.agent.get_player().look.yaw if d.get("yaw_pitch"): span = d["yaw_pitch"] # for now assumed in (yaw, pitch) or yaw, pitch or yaw pitch formats yp = span.replace("(", "").replace(")", "").split() return {"head_yaw_pitch": (int(yp[0]), int(yp[1]))} elif d.get("yaw"): # for now assumed span is yaw as word or number w = d["yaw"].strip(" degrees").strip(" degree") return {"head_yaw_pitch": (word_to_num(w), current_pitch)} elif d.get("pitch"): # for now assumed span is pitch as word or number w = d["pitch"].strip(" degrees").strip(" degree") return {"head_yaw_pitch": (current_yaw, word_to_num(w))} elif d.get("relative_yaw"): # TODO in the task use turn angle if d["relative_yaw"].get("angle"): return {"relative_yaw": int(d["relative_yaw"]["angle"])} else: pass elif d.get("relative_pitch"): if d["relative_pitch"].get("angle"): # TODO in the task make this relative! return {"relative_pitch": int(d["relative_pitch"]["angle"])} else: pass elif d.get("location"): loc, _ = interpret_location(interpreter, speaker, d["location"]) return {"head_xyz": loc} else: raise ErrorWithResponse("I am not sure where you want me to turn")
def get_special_reference_object(interpreter, speaker, S): # TODO/FIXME! add things to workspace memory if S == "SPEAKER_LOOK": player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) loc = capped_line_of_sight(interpreter.agent, player_struct) memid = interpreter.memory.add_location( (int(loc[0]), int(loc[1]), int(loc[2]))) mem = interpreter.memory.get_location_by_id(memid) elif S == "SPEAKER": p = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) mem = interpreter.memory.get_player_by_eid(p.entityId) elif S == "AGENT": p = interpreter.agent.get_player() mem = interpreter.memory.get_player_by_eid(p.entityId) elif type(S) is dict: coord_span = S["coordinates_span"] loc = cast( XYZ, tuple(int(float(w)) for w in re.findall("[-0-9.]+", coord_span))) if len(loc) != 3: logging.error("Bad coordinates: {}".format(coord_span)) raise ErrorWithResponse( "I don't understand what location you're referring to") memid = interpreter.memory.add_location( (int(loc[0]), int(loc[1]), int(loc[2]))) mem = interpreter.memory.get_location_by_id(memid) return mem
def interpret_point_target(interpreter, speaker, d): if d.get("location") is None: # TODO other facings raise ErrorWithResponse("I am not sure where you want me to point") loc, mem = interpret_location(interpreter, speaker, d["location"]) if mem is not None: return mem.get_point_at_target() else: return (loc[0], loc[1] + 1, loc[2], loc[0], loc[1] + 1, loc[2])
def interpret_point_target(interpreter, speaker, d): if d.get("location") is None: # TODO other facings raise ErrorWithResponse("I am not sure where you want me to point") # TODO: We might want to specifically check for BETWEEN/INSIDE, I'm not sure # what the +1s are in the return value mems = interpret_reference_location(interpreter, speaker, d["location"]) loc, _ = compute_locations(interpreter, speaker, d, mems) return (loc[0], loc[1] + 1, loc[2], loc[0], loc[1] + 1, loc[2])
def interpret_named_schematic( interpreter, speaker, d) -> Tuple[List[Block], Optional[str], List[Tuple[str, str]]]: """Return a tuple of 3 values: - the schematic blocks, list[(xyz, idm)] - a SchematicNode memid, or None - a list of (pred, val) tags """ if "has_name" not in d: raise ErrorWithResponse("I don't know what you want me to build.") name = d["has_name"] stemmed_name = name shapename = SPECIAL_SHAPES_CANONICALIZE.get( name) or SPECIAL_SHAPES_CANONICALIZE.get(stemmed_name) if shapename: shape_blocks, tags = interpret_shape_schematic(interpreter, speaker, d, shapename=shapename) return shape_blocks, None, tags schematic = interpreter.memory.get_schematic_by_name(name) if schematic is None: schematic = interpreter.memory.get_schematic_by_name(stemmed_name) if schematic is None: raise ErrorWithResponse("I don't know what you want me to build.") tags = [(p, v) for (_, p, v) in interpreter.memory.get_triples(subj=schematic.memid)] blocks = schematic.blocks # TODO generalize to more general block properties # Longer term: remove and put a call to the modify model here if d.get("has_colour"): old_idm = most_common_idm(blocks.values()) c = block_data.COLOR_BID_MAP.get(d["has_colour"]) if c is not None: new_idm = random.choice(c) for l in blocks: if blocks[l] == old_idm: blocks[l] = new_idm return list(blocks.items()), schematic.memid, tags
def interpret_facing(interpreter, speaker, d): current_pitch = interpreter.agent.get_player().look.pitch current_yaw = interpreter.agent.get_player().look.yaw if d.get("yaw_pitch"): span = d["yaw_pitch"] # for now assumed in (yaw, pitch) or yaw, pitch or yaw pitch formats yp = span.replace("(", "").replace(")", "").split() return {"head_yaw_pitch": (int(yp[0]), int(yp[1]))} elif d.get("yaw"): # for now assumed span is yaw as word or number w = d["yaw"].strip(" degrees").strip(" degree") return {"head_yaw_pitch": (word_to_num(w), current_pitch)} elif d.get("pitch"): # for now assumed span is pitch as word or number w = d["pitch"].strip(" degrees").strip(" degree") return {"head_yaw_pitch": (current_yaw, word_to_num(w))} elif d.get("relative_yaw"): # TODO in the task use turn angle if d["relative_yaw"].get("angle"): return {"relative_yaw": int(d["relative_yaw"]["angle"])} elif d["relative_yaw"].get("yaw_span"): span = d["relative_yaw"].get("yaw_span") left = "left" in span or "leave" in span # lemmatizer :) degrees = number_from_span(span) or 90 if degrees > 0 and left: print(-degrees) return {"relative_yaw": -degrees} else: print(degrees) return {"relative_yaw": degrees} else: pass elif d.get("relative_pitch"): if d["relative_pitch"].get("angle"): # TODO in the task make this relative! return {"relative_pitch": int(d["relative_pitch"]["angle"])} elif d["relative_pitch"].get("pitch_span"): span = d["relative_pitch"].get("pitch_span") down = "down" in span degrees = number_from_span(span) or 90 if degrees > 0 and down: return {"relative_pitch": -degrees} else: return {"relative_pitch": degrees} else: pass elif d.get("location"): mems = interpret_reference_location(interpreter, speaker, d["location"]) loc, _ = compute_locations(interpreter, speaker, d, mems) return {"head_xyz": loc} else: raise ErrorWithResponse("I am not sure where you want me to turn")
def filter_by_sublocation( interpreter, speaker, candidates: List[Tuple[XYZ, T]], location: Dict, limit=1, all_proximity=10, loose=False, ) -> List[Tuple[XYZ, T]]: """Select from a list of candidate (xyz, object) tuples given a sublocation If limit == 'ALL', return all matching candidates Returns a list of (xyz, mem) tuples """ # handle SPEAKER_LOOK separately due to slightly different semantics # (proximity to ray instead of point) if location.get("location_type") == "SPEAKER_LOOK": player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) return object_looked_at(interpreter.agent, candidates, player_struct, limit=limit, loose=loose) reldir = location.get("relative_direction") if reldir: if reldir == "INSIDE": if location.get("reference_object"): # this is ugly, should probably return from interpret_location... ref_mems = interpret_reference_object( interpreter, speaker, location["reference_object"]) for l, candidate_mem in candidates: if heuristic_perception.check_inside( [candidate_mem, ref_mems[0]]): return [(l, candidate_mem)] raise ErrorWithResponse("I can't find something inside that") elif reldir == "AWAY": raise ErrorWithResponse("I don't know which object you mean") elif reldir == "NEAR": pass # fall back to no reference direction elif reldir == "BETWEEN": ref_loc, _ = interpret_location(interpreter, speaker, location) candidates.sort(key=lambda c: euclid_dist(c[0], ref_loc)) return candidates[:limit] else: # reference object location, i.e. the "X" in "left of X" ref_loc, _ = interpret_location(interpreter, speaker, location, ignore_reldir=True) # relative direction, i.e. the "LEFT" in "left of X" reldir_vec = rotation.DIRECTIONS[reldir] # transform each object into the speaker look coordinate system, # and project onto the reldir vector look = (interpreter.agent.perception_modules["low_level"]. get_player_struct_by_name(speaker).look) proj = [ rotation.transform(np.array(l) - ref_loc, look.yaw, 0) @ reldir_vec for (l, _) in candidates ] # filter by relative dir, e.g. "left of Y" proj_cands = [(p, c) for (p, c) in zip(proj, candidates) if p > 0] # "the X left of Y" = the right-most X that is left of Y if limit == "ALL": limit = len(proj_cands) return [c for (_, c) in sorted(proj_cands, key=lambda p: p[0]) ][:limit] else: # is it even possible to end up in this branch? FIXME? # no reference direction: choose the closest ref_loc, _ = interpret_location(interpreter, speaker, location, ignore_reldir=True) if limit == "ALL": return list( filter(lambda c: euclid_dist(c[0], ref_loc) <= all_proximity, candidates)) else: candidates.sort(key=lambda c: euclid_dist(c[0], ref_loc)) return candidates[:limit] return [] # this fixes flake but seems awful?
def interpret_reference_object( interpreter, speaker, d, ignore_mobs=False, limit=1, loose_speakerlook=False) -> List[ReferenceObjectNode]: if d.get("contains_coreference", "NULL") != "NULL": mem = d["contains_coreference"] if isinstance(mem, ReferenceObjectNode): return [mem] else: logging.error("bad coref_resolve -> {}".format(mem)) if len(interpreter.progeny_data) == 0: tags = tags_from_dict(d) # TODO Add ignore_player maybe? candidates = (get_reference_objects(interpreter, *tags) if not ignore_mobs else get_objects(interpreter, *tags)) if len(candidates) > 0: location_d = d.get("location", {"location_type": "SPEAKER_LOOK"}) if limit == 1: # override with input value limit = get_repeat_num(d) r = filter_by_sublocation(interpreter, speaker, candidates, location_d, limit=limit, loose=loose_speakerlook) return [mem for _, mem in r] else: # no candidates found; ask Clarification # TODO: move ttad call to dialogue manager and remove this logic interpreter.action_dict_frozen = True player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) confirm_candidates = get_objects(interpreter) # no tags objects = object_looked_at(interpreter.agent, confirm_candidates, player_struct, limit=1) if len(objects) == 0: raise ErrorWithResponse( "I don't know what you're referring to") _, mem = objects[0] blocks = list(mem.blocks.keys()) interpreter.provisional["object_mem"] = mem interpreter.provisional["object"] = blocks interpreter.provisional["d"] = d interpreter.dialogue_stack.append_new(ConfirmReferenceObject, blocks) raise NextDialogueStep() else: # clarification answered r = interpreter.progeny_data[-1].get("response") if r == "yes": # TODO: learn from the tag! put it in memory! return [interpreter.provisional.get("object_mem")] * limit else: # TODO: error handling here ? return []
def interpret_location(interpreter, speaker, d, ignore_reldir=False) -> Tuple[XYZ, Any]: """Location dict -> coordinates, maybe ref obj memory Side effect: adds mems to agent_memory.recent_entities if a reference object is interpreted; and loc to memory """ mem = None location_type = d.get("location_type", "SPEAKER_LOOK") if location_type == "SPEAKER_LOOK": player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) loc = capped_line_of_sight(interpreter.agent, player_struct) elif location_type == "SPEAKER_POS": loc = pos_to_np(interpreter.agent.perception_modules["low_level"]. get_player_struct_by_name(speaker).pos) elif location_type == "AGENT_POS": loc = pos_to_np(interpreter.agent.get_player().pos) elif location_type == "COORDINATES": loc = cast( XYZ, tuple( int(float(w)) for w in re.findall("[-0-9.]+", d["coordinates"]))) if len(loc) != 3: logging.error("Bad coordinates: {}".format(d["coordinates"])) raise ErrorWithResponse( "I don't understand what location you're referring to") else: loc, mems = maybe_get_location_memory(interpreter, speaker, d) mem = mems[0] if loc is None: raise ValueError( "Can't handle Location type: {}".format(location_type)) # handle relative direction reldir = d.get("relative_direction") if reldir is not None and not ignore_reldir: if reldir == "BETWEEN": pass # loc already handled when getting mems above elif reldir == "INSIDE": if location_type == "REFERENCE_OBJECT": mem = mems[0] locs = heuristic_perception.find_inside(mem) if len(locs) == 0: raise ErrorWithResponse( "I don't know how to go inside there") else: loc = locs[0] mem = None elif reldir == "AWAY": apos = pos_to_np(interpreter.agent.get_player().pos) dir_vec = (apos - loc) / np.linalg.norm(apos - loc) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) elif reldir == "NEAR": pass else: # LEFT, RIGHT, etc... reldir_vec = rotation.DIRECTIONS[reldir] look = (interpreter.agent.perception_modules["low_level"]. get_player_struct_by_name(speaker).look) # this should be an inverse transform so we set inverted=True dir_vec = rotation.transform(reldir_vec, look.yaw, 0, inverted=True) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) # if steps without relative direction elif "steps" in d: num_steps = word_to_num(d.get("steps", "5")) loc = to_block_center(loc) + [0, 0, num_steps] return to_block_pos(loc), mem
def interpret_reference_object( interpreter, speaker, d, only_voxels=False, only_physical=False, only_destructible=False, not_location=False, limit=1, loose_speakerlook=False, ) -> List[ReferenceObjectNode]: """this tries to find a ref obj memory matching the criteria from the ref_obj_dict """ F = d.get("filters") special = d.get("special_reference") # F can be empty... assert ( F is not None ) or special, "no filters or special_reference sub-dicts {}".format(d) if special: mem = get_special_reference_object(interpreter, speaker, special) return [mem] if F.get("contains_coreference", "NULL") != "NULL": mem = F["contains_coreference"] if isinstance(mem, ReferenceObjectNode): return [mem] else: logging.error("bad coref_resolve -> {}".format(mem)) if len(interpreter.progeny_data) == 0: tags = tags_from_dict(F) if only_voxels: tags.append("_voxel_object") if only_physical: tags.append("_physical_object") if only_destructible: tags.append("_destructible") # FIXME hack until memory_filters supprts "not" if not_location: tags.append("_not_location") # TODO Add ignore_player maybe? candidates = get_reference_objects(interpreter, *tags) if len(candidates) > 0: r = filter_by_sublocation(interpreter, speaker, candidates, d, limit=limit, loose=loose_speakerlook) return [mem for _, mem in r] else: # no candidates found; ask Clarification # TODO: move ttad call to dialogue manager and remove this logic interpreter.action_dict_frozen = True player_struct = interpreter.agent.perception_modules[ "low_level"].get_player_struct_by_name(speaker) tags = [] if only_voxels: tags.append("_voxel_object") if only_physical: tags.append("_physical_object") if only_destructible: tags.append("_destructible") confirm_candidates = get_reference_objects(interpreter, *tags) objects = object_looked_at(interpreter.agent, confirm_candidates, player_struct, limit=1) if len(objects) == 0: raise ErrorWithResponse( "I don't know what you're referring to") _, mem = objects[0] interpreter.provisional["object_mem"] = mem interpreter.provisional["F"] = F interpreter.dialogue_stack.append_new(ConfirmReferenceObject, mem) raise NextDialogueStep() else: # clarification answered r = interpreter.progeny_data[-1].get("response") if r == "yes": # TODO: learn from the tag! put it in memory! return [interpreter.provisional.get("object_mem")] * limit else: # TODO: error handling here ? return []