parser.add_argument('--beam-width', type=int, default=4, help='Beam-width of beam search (only applicable when `decoding-strategy` is beam_search)') args = parser.parse_args() exp_dir = os.path.join(args.exp_dir, args.exp_name) if not os.path.exists(exp_dir): os.mkdir(exp_dir) logger = create_logger(os.path.join(exp_dir, 'log.txt')) logger.info(args) data_dir = args.data_dir if args.trajectories == 'all': dictionary = Dictionary(file=os.path.join(data_dir, 'dict.txt'), min_freq=3) train_data = TalkTheWalkEmergent(data_dir, 'train', T=args.T) train_data.dict = dictionary valid_data = TalkTheWalkEmergent(data_dir, 'valid', T=args.T) valid_data.dict = dictionary test_data = TalkTheWalkEmergent(data_dir, 'test', T=args.T) test_data.dict = dictionary elif args.trajectories == 'human': train_data = TalkTheWalkLanguage(data_dir, 'train') valid_data = TalkTheWalkLanguage(data_dir, 'valid') test_data = TalkTheWalkLanguage(data_dir, 'test') train_loader = DataLoader(train_data, args.batch_sz, collate_fn=get_collate_fn(args.cuda)) valid_loader = DataLoader(valid_data, args.batch_sz, collate_fn=get_collate_fn(args.cuda)) test_loader = DataLoader(test_data, args.batch_sz, collate_fn=get_collate_fn(args.cuda))
def __init__(self, data_dir, set, last_turns=1, min_freq=3, min_sent_len=2, orientation_aware=False, include_guide_utterances=True): self.dialogues = json.load( open(os.path.join(data_dir, 'talkthewalk.{}.json'.format(set)))) self.dict = Dictionary(file=os.path.join(data_dir, 'dict.txt'), min_freq=min_freq) self.map = Map(data_dir, neighborhoods, include_empty_corners=True) self.act_dict = ActionAgnosticDictionary() self.act_aware_dict = ActionAwareDictionary() self.feature_loader = GoldstandardFeatures(self.map) self.data = dict() self.data['actions'] = list() self.data['goldstandard'] = list() self.data['landmarks'] = list() self.data['target'] = list() self.data['utterance'] = list() for config in self.dialogues: loc = config['start_location'] neighborhood = config['neighborhood'] boundaries = config['boundaries'] act_memory = list() obs_memory = [self.feature_loader.get(neighborhood, loc)] dialogue_context = list() for msg in config['dialog']: if msg['id'] == 'Tourist': act = msg['text'] act_id = self.act_aware_dict.encode(act) if act_id >= 0: new_loc = step_aware(act, loc, boundaries) old_loc = loc loc = new_loc if orientation_aware: act_memory.append(act_id) obs_memory.append( self.feature_loader.get(neighborhood, new_loc)) else: if act == 'ACTION:FORWARD': # went forward act_dir = self.act_dict.encode_from_location( old_loc, new_loc) act_memory.append(act_dir) obs_memory.append( self.feature_loader.get(neighborhood, loc)) elif len(msg['text'].split(' ')) > min_sent_len: dialogue_context.append(self.dict.encode(msg['text'])) utt = self.dict.encode(START_TOKEN) + [y for x in dialogue_context[-last_turns:] for y in x] \ + self.dict.encode(END_TOKEN) self.data['utterance'].append(utt) landmarks, tgt = self.map.get_landmarks( config['neighborhood'], boundaries, loc) self.data['landmarks'].append(landmarks) self.data['target'].append(tgt) self.data['actions'].append(act_memory) self.data['goldstandard'].append(obs_memory) act_memory = list() obs_memory = [ self.feature_loader.get(neighborhood, loc) ] elif include_guide_utterances: dialogue_context.append(self.dict.encode(msg['text']))
guide = GuideDiscrete.load(args.guide_model) if args.cuda: tourist = tourist.cuda() guide = guide.cuda() T = tourist.T def _predict_location(batch): t_out = tourist(batch) if args.cuda: t_out['comms'] = [x.cuda() for x in t_out['comms']] g_out = guide(t_out['comms'], batch) return g_out['prob'], t_out['comms'] elif args.communication == 'natural': tourist = TouristLanguage.load(args.tourist_model) guide = GuideLanguage.load(args.guide_model) dictionary = Dictionary(os.path.join(args.data_dir, 'dict.txt'), min_freq=0) if args.cuda: tourist = tourist.cuda() guide = guide.cuda() T = args.T def _predict_location(batch): t_out = tourist(batch, train=False, decoding_strategy=args.decoding_strategy) batch['utterance'] = t_out['utterance'] batch['utterance_mask'] = t_out['utterance_mask'] g_out = guide(batch, add_rl_loss=False) return g_out['prob'], batch['utterance'] collate_fn = get_collate_fn2(args.cuda)
class TalkTheWalkLanguage(Dataset): """Dataset loading for natural language experiments. Only contains trajectories taken by human annotators """ def __init__(self, data_dir, set, last_turns=1, min_freq=3, min_sent_len=2, orientation_aware=False, include_guide_utterances=True): self.dialogues = json.load( open(os.path.join(data_dir, 'talkthewalk.{}.json'.format(set)))) self.dict = Dictionary(file=os.path.join(data_dir, 'dict.txt'), min_freq=min_freq) self.map = Map(data_dir, neighborhoods, include_empty_corners=True) self.map2 = Map(data_dir, neighborhoods, include_empty_corners=True, imperf=False) self.act_dict = ActionAgnosticDictionary() self.act_aware_dict = ActionAwareDictionary() self.feature_loader = GoldstandardFeatures(self.map) self.feature_loader2 = GoldstandardFeatures(self.map2) self.data = dict() self.data['actions'] = list() self.data['goldstandard'] = list() self.data['landmarks'] = list() self.data['target'] = list() self.data['utterance'] = list() self.data2 = dict() self.data2['actions'] = list() self.data2['goldstandard'] = list() self.data2['landmarks'] = list() self.data2['target'] = list() self.data2['utterance'] = list() for config in self.dialogues: loc = config['start_location'] neighborhood = config['neighborhood'] boundaries = config['boundaries'] act_memory = list() obs_memory = [self.feature_loader.get(neighborhood, loc)] obs_memory2 = [self.feature_loader2.get(neighborhood, loc)] dialogue_context = list() for msg in config['dialog']: if msg['id'] == 'Tourist': act = msg['text'] act_id = self.act_aware_dict.encode(act) if act_id >= 0: new_loc = step_aware(act, loc, boundaries) old_loc = loc loc = new_loc if orientation_aware: act_memory.append(act_id) obs_memory.append( self.feature_loader.get(neighborhood, new_loc)) obs_memory2.append( self.feature_loader2.get( neighborhood, new_loc)) else: if act == 'ACTION:FORWARD': # went forward act_dir = self.act_dict.encode_from_location( old_loc, new_loc) act_memory.append(act_dir) obs_memory.append( self.feature_loader.get(neighborhood, loc)) obs_memory2.append( self.feature_loader2.get( neighborhood, loc)) elif len(msg['text'].split(' ')) > min_sent_len: dialogue_context.append(self.dict.encode(msg['text'])) utt = self.dict.encode(START_TOKEN) + [y for x in dialogue_context[-last_turns:] for y in x] \ + self.dict.encode(END_TOKEN) self.data['utterance'].append(utt) landmarks, tgt = self.map.get_landmarks( config['neighborhood'], boundaries, loc) self.data['landmarks'].append(landmarks) self.data['target'].append(tgt) self.data['actions'].append(act_memory) self.data['goldstandard'].append(obs_memory) self.data2['utterance'].append(utt) landmarks, tgt = self.map2.get_landmarks( config['neighborhood'], boundaries, loc) self.data2['landmarks'].append(landmarks) self.data2['target'].append(tgt) self.data2['actions'].append(act_memory) self.data2['goldstandard'].append(obs_memory2) act_memory = list() obs_memory = [ self.feature_loader.get(neighborhood, loc) ] obs_memory2 = [ self.feature_loader2.get(neighborhood, loc) ] elif include_guide_utterances: dialogue_context.append(self.dict.encode(msg['text'])) def __getitem__(self, index): return ({key: self.data[key][index] for key in self.data.keys()}, {key: self.data2[key][index] for key in self.data2.keys()}) def __len__(self): return len(self.data['target'])