Exemple #1
0
def segment(ex, vocab, threshold=0.25):
    s, e = ex['span']
    span = ex['inp'][s:e + 1]
    span_str = detokenize(span)
    ques = ex['question']

    best_i, best_j, best_score = None, None, -1
    for i in range(len(ques)):
        for j in range(i, len(ques)):
            chunk = detokenize(ques[i:j + 1])
            score = compute_f1(span_str, chunk)
            if score > best_score:
                best_score, best_i, best_j = score, i, j
    if best_score > threshold:
        before = ex['question'][:best_i]
        after = ex['question'][best_j + 1:]
        ret = {
            'before': get_orig(before),
            'after': get_orig(after),
        }
        ret.update({
            k + '_vids': torch.tensor(vocab.word2index(v + ['eos']),
                                      dtype=torch.long)
            for k, v in ret.items()
        })
        return ret
    else:
        return None
Exemple #2
0
 def compute_metrics(self, preds, data):
     metrics = compute_metrics(preds, data)
     f1s = []
     for p, ex in zip(preds, data):
         pspans = [
             detokenize(ex['feat']['inp'][s:e + 1]) for s, e in p['spans']
         ]
         gspans = [
             detokenize(ex['feat']['inp'][s:e + 1])
             for s, e in ex['feat']['spans']
         ]
         f1s.append(compute_f1('\n'.join(gspans), '\n'.join(pspans)))
     metrics['span_f1'] = sum(f1s) / len(f1s)
     return metrics
Exemple #3
0
 def compute_entailment(self, spans, ex):
     chunks = [detokenize(ex['feat']['inp'][s:e + 1]) for s, e in spans]
     history = [0] * len(chunks)
     scenario = [0] * len(chunks)
     # history
     for i, c in enumerate(chunks):
         for q in ex['ann']['hquestion']:
             history[i] = max(history[i], compute_f1(c, detokenize(q)))
         scenario[i] = max(scenario[i],
                           compute_f1(c, detokenize(ex['ann']['scenario'])))
     entail = torch.tensor([history, scenario],
                           dtype=torch.float,
                           device=self.device).t()
     return entail
Exemple #4
0
    def extract_preds(self, out, batch, top_k=20):
        scores = out['scores']
        ystart, yend = scores.split(1, dim=-1)
        pstart = F.softmax(ystart.squeeze(-1), dim=1)
        pend = F.softmax(yend.squeeze(-1), dim=1)

        preds = []
        for pstart_i, pend_i, ex in zip(pstart, pend, batch):
            top_start = self.get_top_k(pstart_i, top_k)
            top_end = self.get_top_k(pend_i, top_k)
            top_preds = []
            for s, ps in top_start:
                for e, pe in top_end:
                    if e >= s:
                        top_preds.append((s, e, ps * pe))
            top_preds = sorted(top_preds,
                               key=lambda tup: tup[-1],
                               reverse=True)[:top_k]
            top_answers = [(detokenize(ex['feat']['inp'][s:e + 1]), s, e, p)
                           for s, e, p in top_preds]
            top_ans, top_s, top_e, top_p = top_answers[0]
            preds.append({
                'utterance_id': ex['utterance_id'],
                'top_k': top_answers,
                'answer': top_ans,
                'spans': [(top_s, top_e)],
                'retrieve_span': 0,
            })
        return preds
Exemple #5
0
 def extract_preds(self, out, batch, top_k=20):
     preds = []
     for ex, clf_i, retrieve_i, spans_i, edit_scores_i in zip(
             batch, out['clf_scores'].max(1)[1].tolist(),
             out['retrieve_scores'].max(1)[1].tolist(), out['spans'],
             out['edit_scores']):
         a = CLASSES[clf_i]
         edit_ids = edit_scores_i.max(2)[1].tolist()
         edits = []
         for ids in edit_ids:
             words = self.vocab.index2word(ids)
             if 'eos' in words:
                 words = words[:words.index('eos')]
             edits.append(' '.join(words))
         r = None
         if a == 'more':
             s, e = spans_i[retrieve_i]
             r = detokenize(ex['feat']['inp'][s:e + 1])
             a = edits[retrieve_i]
         preds.append({
             'utterance_id': ex['utterance_id'],
             'retrieval': r,
             'answer': a,
             'spans': spans_i,
         })
     return preds
Exemple #6
0
 def extract_bullets(self, spans, ex):
     mask = ex['feat']['pointer_mask'].tolist()
     classes_start = mask.index(1)
     snippet_start = classes_start + 5
     snippet_end = snippet_start + mask[snippet_start:].index(0)
     bullet_inds = [
         i for i in range(snippet_start, snippet_end)
         if ex['feat']['inp'][i]['sub'] == '*'
     ]
     if bullet_inds:
         bullets = [
             (s + 1, e - 1)
             for s, e in zip(bullet_inds, bullet_inds[1:] + [snippet_end])
             if e - 1 >= s + 1
         ]
         non_bullet_spans = []
         for s, e in spans:
             gloss = detokenize(ex['feat']['inp'])
             if '*' not in gloss and '\n' not in gloss:
                 non_bullet_spans.append((s, e))
         all_spans = bullets + non_bullet_spans
         all_spans.sort(key=lambda tup: tup[1] - tup[0], reverse=True)
         covered = [False] * len(ex['feat']['inp'])
         keep = []
         for s, e in all_spans:
             if not all(covered[s:e + 1]):
                 for i in range(s, e + 1):
                     covered[i] = True
                 keep.append((s, e))
         return keep
     else:
         return spans
Exemple #7
0
 def extract_preds(self, out, batch, top_k=20):
     preds = super().extract_preds(out, batch, top_k=top_k)
     for ex, p, span_i, clf_i, retrieve_i, entail_i in zip(
             batch, preds, out['span_scores'], out['clf_scores'],
             out['retrieve_scores'], out['entail']):
         p['clf_scores'] = dict(
             list(zip(CLASSES,
                      F.softmax(clf_i, dim=0).tolist())))
         spans = [
             detokenize(ex['feat']['inp'][s:e + 1]) for s, e in p['spans']
         ]
         p['span_scores'] = dict(
             list(zip(spans,
                      F.softmax(retrieve_i, dim=0).tolist())))
         p['words'] = [
             w['sub'] for w in ex['feat']['inp'] if w['orig'] != 'pad'
         ]
         p['og'] = {
             k: v
             for k, v in ex.items() if k in
             ['snippet', 'scenario', 'question', 'history', 'answer']
         }
         p['start_scores'] = span_i[:, 0].tolist()
         p['end_scores'] = span_i[:, 1].tolist()
         p['entail_hist_scores'] = dict(
             list(zip(spans, entail_i[:, 0].tolist())))
         p['entail_scen_scores'] = dict(
             list(zip(spans, entail_i[:, 1].tolist())))
     return preds
Exemple #8
0
 def extract_preds(self, out, batch, top_k=20):
     preds = []
     for ex, clf_i, retrieve_i, span_i in zip(
             batch, out['clf_scores'].max(1)[1].tolist(),
             out['retrieve_scores'].max(1)[1].tolist(), out['spans']):
         a = CLASSES[clf_i]
         if a == 'more':
             s, e = span_i[retrieve_i]
             a = detokenize(ex['feat']['inp'][s:e + 1])
         preds.append({
             'utterance_id': ex['utterance_id'],
             'answer': a,
             'spans': span_i,
             'retrieve_span': retrieve_i,
         })
     return preds
Exemple #9
0
 def extract_preds(self, out, batch):
     preds = []
     for before, after, ex in zip(out['before'].max(2)[1].tolist(),
                                  out['after'].max(2)[1].tolist(), batch):
         before = self.vocab.index2word(before)
         if 'eos' in before:
             before = before[:before.index('eos')]
         after = self.vocab.index2word(after)
         if 'eos' in after:
             after = after[:after.index('eos')]
         s, e = ex['span']
         middle = detokenize(ex['inp'][s:e + 1])
         preds.append({
             'utterance_id':
             ex['utterance_id'],
             'answer':
             '{} {} {}'.format(' '.join(before), middle, ' '.join(after)),
         })
     return preds
Exemple #10
0
 def extract_spans(self, span_scores, batch):
     pstart, pend = span_scores.split(1, dim=-1)
     spans = []
     for pstart_i, pend_i, ex in zip(pstart.squeeze(-1), pend.squeeze(-1),
                                     batch):
         spans_i = []
         sthresh = min(pstart_i.max(), self.args.thresh)
         start = pstart_i.ge(sthresh).tolist()
         for si, strig in enumerate(start):
             if strig:
                 ethresh = min(pend_i[si:].max(), self.args.thresh)
                 end = pend_i[si:].ge(ethresh).tolist()
                 for ei, etrig in enumerate(end):
                     ei += si
                     if etrig:
                         spans_i.append(
                             (si, ei,
                              detokenize(ex['feat']['inp'][si:ei + 1]),
                              pstart_i[si].item(), pend_i[ei].item()))
                         break
         spans.append(spans_i)
     return spans
Exemple #11
0
 def compute_metrics(self, preds, batch):
     f1s = [compute_f1(p['answer'], detokenize(e['question'])) for p, e in zip(preds, batch)]
     return {'f1': sum(f1s) / len(f1s)}