Example #1
0
    def __init__(self,
                 data_dir,
                 set,
                 goldstandard_features=True,
                 resnet_features=False,
                 fasttext_features=False,
                 T=2):
        self.data_dir = data_dir
        self.map = Map(data_dir, neighborhoods, include_empty_corners=True)
        self.T = T
        self.act_dict = ActionAgnosticDictionary()

        self.configs = json.load(
            open(os.path.join(data_dir, 'configurations.{}.json'.format(set))))
        self.feature_loaders = dict()
        self.data = {}
        if fasttext_features:
            textfeatures = dict()
            for n in neighborhoods:
                textfeatures[n] = json.load(
                    open(os.path.join(data_dir, n, "text.json")))
            self.feature_loaders['fasttext'] = FasttextFeatures(
                textfeatures, os.path.join(data_dir, 'wiki.en.bin'))
            self.data['fasttext'] = list()
        if resnet_features:
            self.feature_loaders['resnet'] = ResnetFeatures(
                os.path.join(data_dir, 'resnetfeat.json'))
            self.data['fasttext'] = list()
        if goldstandard_features:
            self.feature_loaders['goldstandard'] = GoldstandardFeatures(
                self.map)
            self.data['goldstandard'] = list()
        assert (len(self.feature_loaders) > 0)

        self.data['actions'] = list()
        self.data['landmarks'] = list()
        self.data['target'] = list()

        action_set = [['UP', 'DOWN', 'LEFT', 'RIGHT']] * self.T
        all_possible_actions = list(itertools.product(*action_set))

        for config in self.configs:
            for a in all_possible_actions:
                neighborhood = config['neighborhood']
                target_loc = config['target_location']
                boundaries = config['boundaries']

                obs = {k: list() for k in self.feature_loaders.keys()}
                actions = list()
                loc = copy.deepcopy(config['target_location'])
                for p in range(self.T + 1):
                    for k, feature_loader in self.feature_loaders.items():
                        obs[k].append(feature_loader.get(neighborhood, loc))

                    if p != self.T:
                        sampled_act = self.act_dict.encode(a[p])
                        actions.append(sampled_act)
                        loc = step_agnostic(a[p], loc, boundaries)

                if self.T == 0:
                    actions.append(0)

                for k in self.feature_loaders.keys():
                    self.data[k].append(obs[k])

                self.data['actions'].append(actions)
                landmarks, label_index = self.map.get_landmarks(
                    neighborhood, boundaries, target_loc)
                self.data['landmarks'].append(landmarks)
                self.data['target'].append(label_index)
Example #2
0
class TalkTheWalkEmergent(Dataset):
    """Dataset loading for emergent language experiments
    Generates all tourist trajectories of length T"""
    def __init__(self,
                 data_dir,
                 set,
                 goldstandard_features=True,
                 resnet_features=False,
                 fasttext_features=False,
                 T=2):
        self.data_dir = data_dir
        self.map = Map(data_dir, neighborhoods, include_empty_corners=True)
        self.T = T
        self.act_dict = ActionAgnosticDictionary()

        self.configs = json.load(
            open(os.path.join(data_dir, 'configurations.{}.json'.format(set))))
        self.feature_loaders = dict()
        self.data = {}
        if fasttext_features:
            textfeatures = dict()
            for n in neighborhoods:
                textfeatures[n] = json.load(
                    open(os.path.join(data_dir, n, "text.json")))
            self.feature_loaders['fasttext'] = FasttextFeatures(
                textfeatures, os.path.join(data_dir, 'wiki.en.bin'))
            self.data['fasttext'] = list()
        if resnet_features:
            self.feature_loaders['resnet'] = ResnetFeatures(
                os.path.join(data_dir, 'resnetfeat.json'))
            self.data['fasttext'] = list()
        if goldstandard_features:
            self.feature_loaders['goldstandard'] = GoldstandardFeatures(
                self.map)
            self.data['goldstandard'] = list()
        assert (len(self.feature_loaders) > 0)

        self.data['actions'] = list()
        self.data['landmarks'] = list()
        self.data['target'] = list()

        action_list = ['UP', 'DOWN', 'LEFT', 'RIGHT']
        action_set = [action_list] * self.T
        all_possible_actions = list(itertools.product(*action_set))

        for config in self.configs:
            for a in all_possible_actions:
                neighborhood = config['neighborhood']
                target_loc = config['target_location']
                boundaries = config['boundaries']

                obs = {k: list() for k in self.feature_loaders.keys()}
                actions = list()
                loc = copy.deepcopy(config['target_location'])
                for p in range(self.T + 1):
                    for k, feature_loader in self.feature_loaders.items():
                        obs[k].append(feature_loader.get(neighborhood, loc))

                    if p != self.T:
                        sampled_act = random.choice(action_list)
                        sampled_enc = self.act_dict.encode(sampled_act)
                        actions.append(sampled_enc)
                        loc = step_agnostic(sampled_act, loc, boundaries)

                if self.T == 0:
                    actions.append(0)

                for k in self.feature_loaders.keys():
                    self.data[k].append(obs[k])

                self.data['actions'].append(actions)
                landmarks, label_index = self.map.get_landmarks(
                    neighborhood, boundaries, target_loc)
                self.data['landmarks'].append(landmarks)
                self.data['target'].append(label_index)

    def __getitem__(self, index):
        return {key: self.data[key][index] for key in self.data.keys()}

    def __len__(self):
        return len(self.data['actions'])
Example #3
0
    def __init__(self,
                 data_dir,
                 set,
                 last_turns=1,
                 min_freq=3,
                 min_sent_len=2,
                 orientation_aware=False,
                 include_guide_utterances=True):
        self.dialogues = json.load(
            open(os.path.join(data_dir, 'talkthewalk.{}.json'.format(set))))
        self.dict = Dictionary(file=os.path.join(data_dir, 'dict.txt'),
                               min_freq=min_freq)
        self.map = Map(data_dir, neighborhoods, include_empty_corners=True)
        self.act_dict = ActionAgnosticDictionary()
        self.act_aware_dict = ActionAwareDictionary()

        self.feature_loader = GoldstandardFeatures(self.map)

        self.data = dict()
        self.data['actions'] = list()
        self.data['goldstandard'] = list()
        self.data['landmarks'] = list()
        self.data['target'] = list()
        self.data['utterance'] = list()

        for config in self.dialogues:
            loc = config['start_location']
            neighborhood = config['neighborhood']
            boundaries = config['boundaries']
            act_memory = list()
            obs_memory = [self.feature_loader.get(neighborhood, loc)]

            dialogue_context = list()
            for msg in config['dialog']:
                if msg['id'] == 'Tourist':
                    act = msg['text']
                    act_id = self.act_aware_dict.encode(act)
                    if act_id >= 0:
                        new_loc = step_aware(act, loc, boundaries)
                        old_loc = loc
                        loc = new_loc

                        if orientation_aware:
                            act_memory.append(act_id)
                            obs_memory.append(
                                self.feature_loader.get(neighborhood, new_loc))
                        else:
                            if act == 'ACTION:FORWARD':  # went forward
                                act_dir = self.act_dict.encode_from_location(
                                    old_loc, new_loc)
                                act_memory.append(act_dir)
                                obs_memory.append(
                                    self.feature_loader.get(neighborhood, loc))
                    elif len(msg['text'].split(' ')) > min_sent_len:
                        dialogue_context.append(self.dict.encode(msg['text']))
                        utt = self.dict.encode(START_TOKEN) + [y for x in dialogue_context[-last_turns:] for y in x] \
                              + self.dict.encode(END_TOKEN)
                        self.data['utterance'].append(utt)

                        landmarks, tgt = self.map.get_landmarks(
                            config['neighborhood'], boundaries, loc)
                        self.data['landmarks'].append(landmarks)
                        self.data['target'].append(tgt)

                        self.data['actions'].append(act_memory)
                        self.data['goldstandard'].append(obs_memory)

                        act_memory = list()
                        obs_memory = [
                            self.feature_loader.get(neighborhood, loc)
                        ]
                elif include_guide_utterances:
                    dialogue_context.append(self.dict.encode(msg['text']))
class TalkTheWalkLanguage(Dataset):
    """Dataset loading for natural language experiments.

    Only contains trajectories taken by human annotators
    """
    def __init__(self,
                 data_dir,
                 set,
                 last_turns=1,
                 min_freq=3,
                 min_sent_len=2,
                 orientation_aware=False,
                 include_guide_utterances=True):
        self.dialogues = json.load(
            open(os.path.join(data_dir, 'talkthewalk.{}.json'.format(set))))
        self.dict = Dictionary(file=os.path.join(data_dir, 'dict.txt'),
                               min_freq=min_freq)
        self.map = Map(data_dir, neighborhoods, include_empty_corners=True)
        self.map2 = Map(data_dir,
                        neighborhoods,
                        include_empty_corners=True,
                        imperf=False)
        self.act_dict = ActionAgnosticDictionary()
        self.act_aware_dict = ActionAwareDictionary()

        self.feature_loader = GoldstandardFeatures(self.map)
        self.feature_loader2 = GoldstandardFeatures(self.map2)

        self.data = dict()
        self.data['actions'] = list()
        self.data['goldstandard'] = list()
        self.data['landmarks'] = list()
        self.data['target'] = list()
        self.data['utterance'] = list()

        self.data2 = dict()
        self.data2['actions'] = list()
        self.data2['goldstandard'] = list()
        self.data2['landmarks'] = list()
        self.data2['target'] = list()
        self.data2['utterance'] = list()

        for config in self.dialogues:
            loc = config['start_location']
            neighborhood = config['neighborhood']
            boundaries = config['boundaries']
            act_memory = list()
            obs_memory = [self.feature_loader.get(neighborhood, loc)]
            obs_memory2 = [self.feature_loader2.get(neighborhood, loc)]

            dialogue_context = list()
            for msg in config['dialog']:
                if msg['id'] == 'Tourist':
                    act = msg['text']
                    act_id = self.act_aware_dict.encode(act)
                    if act_id >= 0:
                        new_loc = step_aware(act, loc, boundaries)
                        old_loc = loc
                        loc = new_loc

                        if orientation_aware:
                            act_memory.append(act_id)
                            obs_memory.append(
                                self.feature_loader.get(neighborhood, new_loc))
                            obs_memory2.append(
                                self.feature_loader2.get(
                                    neighborhood, new_loc))
                        else:
                            if act == 'ACTION:FORWARD':  # went forward
                                act_dir = self.act_dict.encode_from_location(
                                    old_loc, new_loc)
                                act_memory.append(act_dir)
                                obs_memory.append(
                                    self.feature_loader.get(neighborhood, loc))
                                obs_memory2.append(
                                    self.feature_loader2.get(
                                        neighborhood, loc))
                    elif len(msg['text'].split(' ')) > min_sent_len:
                        dialogue_context.append(self.dict.encode(msg['text']))
                        utt = self.dict.encode(START_TOKEN) + [y for x in dialogue_context[-last_turns:] for y in x] \
                              + self.dict.encode(END_TOKEN)
                        self.data['utterance'].append(utt)

                        landmarks, tgt = self.map.get_landmarks(
                            config['neighborhood'], boundaries, loc)
                        self.data['landmarks'].append(landmarks)
                        self.data['target'].append(tgt)

                        self.data['actions'].append(act_memory)
                        self.data['goldstandard'].append(obs_memory)

                        self.data2['utterance'].append(utt)

                        landmarks, tgt = self.map2.get_landmarks(
                            config['neighborhood'], boundaries, loc)
                        self.data2['landmarks'].append(landmarks)
                        self.data2['target'].append(tgt)

                        self.data2['actions'].append(act_memory)
                        self.data2['goldstandard'].append(obs_memory2)

                        act_memory = list()
                        obs_memory = [
                            self.feature_loader.get(neighborhood, loc)
                        ]
                        obs_memory2 = [
                            self.feature_loader2.get(neighborhood, loc)
                        ]
                elif include_guide_utterances:
                    dialogue_context.append(self.dict.encode(msg['text']))

    def __getitem__(self, index):
        return ({key: self.data[key][index]
                 for key in self.data.keys()},
                {key: self.data2[key][index]
                 for key in self.data2.keys()})

    def __len__(self):
        return len(self.data['target'])