Esempio n. 1
0
    def score_results(self, results, verbose=False):
        # results should be a dictionary mapping instr_ids to dictionaries,
        # with each dictionary containing (at least) a 'words' field
        instr_ids = set(self.instr_ids)
        instr_count = 0
        results_by_base_id = {}
        mismatches = []
        for instr_id, result in results.items():
            if instr_id in instr_ids:
                instr_ids.remove(instr_id)

                base_id = int(instr_id.split('_')[0])

                if base_id in results_by_base_id:
                    old_predicted = results_by_base_id[base_id]['words']
                    new_predicted = result['words']
                    if old_predicted != new_predicted:
                        mismatches.append((old_predicted, new_predicted))
                else:
                    results_by_base_id[base_id] = result

        if mismatches:
            print("mismatching outputs for sentences:")
            for old_pred, new_pred in mismatches:
                print(old_pred)
                print(new_pred)
                print()

        assert len(instr_ids) == 0, \
            'Missing %d of %d instruction ids from %s' % (
            len(instr_ids), len(self.instr_ids), ",".join(self.splits))

        all_refs = []
        all_hyps = []

        model_scores = []

        instruction_replaced_gt = []

        skip_count = 0
        skipped_refs = set()
        for base_id, result in sorted(results_by_base_id.items()):
            instr_count += 1
            gt = self.gt[base_id]
            tokenized_refs = [
                Tokenizer.split_sentence(ref) for ref in gt['instructions']
            ]
            tokenized_hyp = result['words']

            replaced_gt = gt.copy()
            replaced_gt['instructions'] = [' '.join(tokenized_hyp)]
            instruction_replaced_gt.append(replaced_gt)

            if 'score' in result:
                model_scores.append(result['score'])

            if len(tokenized_refs) != self.instructions_per_path:
                skip_count += 1
                skipped_refs.add(base_id)
                continue
            all_refs.append(tokenized_refs)
            all_hyps.append(tokenized_hyp)

            if verbose and instr_count % 100 == 0:
                for i, ref in enumerate(tokenized_refs):
                    print("ref {}:\t{}".format(i, ' '.join(ref)))
                print("pred  :\t{}".format(' '.join(tokenized_hyp)))
                print()

        if skip_count != 0:
            print("skipped {} instructions without {} refs: {}".format(
                skip_count, self.instructions_per_path,
                ' '.join(str(i) for i in skipped_refs)))

        model_score = np.mean(model_scores)
        bleu, unpenalized_bleu = multi_bleu(all_refs, all_hyps)

        score_summary = {
            'model_score': model_score,
            'bleu': bleu,
            'unpenalized_bleu': unpenalized_bleu,
        }
        return score_summary, instruction_replaced_gt
Esempio n. 2
0
# for item in data:
#     for instr in item['instructions']:
#         count.update(Tokenizer.split_sentence(instr))
# vocab = list(start_vocab)
# for word, num in count.most_common():
#     if num >= min_count:
#         vocab.append(word)
#     else:
#         break

all_results = pickle.load(open("results.pkl", 'rb'))
for env_name, results in all_results.items():
    print(env_name)
    for path_id, result in results.items():
        print(result)
        inf = tok.split_sentence(result['inference'])
        inf_count.update(inf)
        ref = np.random.choice([tok.split_sentence(x) for x in result['gt']])
        ref_count.update(ref)

infs = sorted(inf_count.values(), reverse=True)
refs = sorted(ref_count.values(), reverse=True)

plt.plot(infs, label="Inferred Language")
plt.plot(refs, label="Train Language")
plt.title(f"Distribution of Vocabulary")
plt.xlabel("Words")
plt.legend(loc="upper right")
plt.ylabel('Amount of Usage')
plt.savefig("vocab_dist.png")
plt.cla()
Esempio n. 3
0
d_4 = ['front','back']
d_5 = ['above','under']
d_6 = ['enter','exit']
d_7 = ['backward','forward']
d_8 = ['away from', 'towards']
d_9 = ['into','out of']
d_10 = ['inside','outside']
#d_ls = [d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10]
d_ls = d_1+d_2+d_3+d_4+d_5+d_6+d_7+d_8+d_9+d_10

direct = []                 
while len(direct) < perturb_num:
    i = np.random.randint(len(pairs_idx))
    t_i = pairs_idx[i]
    ins_i = data[t_i[0]]['instructions'][t_i[1]].lower()
    words = Tokenizer.split_sentence(ins_i)
    if any(word in words for word in d_ls):   
        direct.append(pairs_idx.pop(i))
        
# preselect viewpoint swap  
upper = 0.6
lower = 0.3

import networkx as nx
from ndtw import DTW,load_nav_graphs
     # Load connectiviy graph
scans = []
for traj in data:
    if traj['scan'] not in scans:
        scans.append(traj['scan'])
graphs = load_nav_graphs(scans)