예제 #1
0
    def __init__(self,
                 env_list=None,
                 instr_negatives=False,
                 instr_negatives_similar_only=False,
                 seg_level=False,
                 img_scale=1,
                 yaw_rand_range=0,
                 pos_rand_range=0
                 ):
        # If data is already loaded in memory, use it
        self.env_list = env_list
        self.train_instr, self.dev_instr, self.test_instr, corpus = get_all_instructions()
        self.all_instr = {**self.train_instr, **self.dev_instr, **self.test_instr}
        self.token2term, self.word2token = get_word_to_token_map(corpus)
        self.thesaurus = load_thesaurus()
        self.include_instr_negatives = instr_negatives
        if instr_negatives:
            self.similar_instruction_map = load_similar_instruction_map()
        self.instr_negatives_similar_only = instr_negatives_similar_only
        self.img_scale = img_scale

        self.yaw_rand_range = yaw_rand_range
        self.pos_rand_range = pos_rand_range
        self.pos_rand_image = 0

        # If the data is supposed to be at seg level (not nested envs + segs), then we can support batching
        # but we need to correctly infer the dataset size
        self.seg_level = seg_level
        if seg_level:
            self.seg_list = []
            for env in self.env_list:
                for set_idx, set in enumerate(self.all_instr[env]):
                    for seg_idx, seg in enumerate(set["instructions"]):
                        self.seg_list.append([env, set_idx, seg_idx])
예제 #2
0
    def __init__(self,
                 env_list=None,
                 instr_negatives=False,
                 instr_negatives_similar_only=False,
                 seg_level=False,
                 yaw_rand_range=0,
                 img_w=512,
                 img_h=512,
                 map_w=None,
                 map_h=None,
                 incl_path=True,
                 incl_endpoint=False,
                 use_semantic_maps=False):

        # If data is already loaded in memory, use it
        self.cuda = False
        self.env_list = env_list
        self.train_instr, self.dev_instr, self.test_instr, corpus = get_all_instructions()
        self.all_instr = {**self.train_instr, **self.dev_instr, **self.test_instr}
        self.token2term, self.word2token = get_word_to_token_map(corpus)
        self.thesaurus = load_thesaurus()
        self.include_instr_negatives = instr_negatives
        #if instr_negatives:
        #    self.similar_instruction_map = load_similar_instruction_map()
        self.instr_negatives_similar_only = instr_negatives_similar_only

        self.use_semantic_maps = use_semantic_maps

        self.img_w = img_w
        self.img_h = img_h

        if map_w is None:
            self.map_w = self.img_w
            self.map_h = self.img_h
        else:
            self.map_w = map_w
            self.map_h = map_h

        self.yaw_rand_range = yaw_rand_range
        self.latest_img_dbg = None
        self.latest_rot_img_dbg = None

        self.incl_endpoint = incl_endpoint
        self.incl_path = incl_path

        # If the data is supposed to be at seg level (not nested envs + segs), then we can support batching
        # but we need to correctly infer the dataset size
        self.seg_level = seg_level
        if seg_level:
            self.seg_list = []
            for env in self.env_list:
                for set_idx, set in enumerate(self.all_instr[env]):
                    for seg_idx, seg in enumerate(set["instructions"]):
                        self.seg_list.append([env, set_idx, seg_idx])

        print("Initialzied dataset!")
        print("   yaw range : " + str(self.yaw_rand_range))
        print("   map size: ", self.map_w, self.map_h)
        print("   img size: ", self.img_w, self.img_h)
예제 #3
0
 def __init__(self):
     super(AuxLabelsNL, self).__init__()
     self.thesaurus = load_thesaurus()
     train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
     )
     self.all_instructions = {
         **train_instructions,
         **dev_instructions,
         **test_instructions
     }
     self.corpus = corpus
     self.word2token, self.token2term = get_word_to_token_map(corpus)
예제 #4
0
def get_mentioned_landmarks_nl(str_instruction):
    thesaurus = load_thesaurus()
    if thesaurus is None:
        return [], []
    split_instr = split_instruction(clean_instruction(str_instruction))
    word2term = thesaurus["word2term"]
    term_groundings = thesaurus["term_groundings"]
    lm_name2index = get_landmark_name_to_index()

    # Map each word in the instruction to it's corresponding term:
    split_instr_terms = words_to_terms(split_instr, word2term)

    mentioned_landmark_names = set()

    # For each term, find all the landmarks that have been mentioned
    for term in split_instr_terms:
        for landmark_name in term_groundings[term]["landmarks"]:
            mentioned_landmark_names.add(landmark_name)

    mentioned_landmark_names = list(mentioned_landmark_names)
    mentioned_landmark_indices = [
        lm_name2index[name] for name in mentioned_landmark_names
    ]
    return mentioned_landmark_names, mentioned_landmark_indices