예제 #1
0
def get_score():
    if not request.json["s0"]:
        return jsonify({"matches": []}), 200
    s0toks = request.json["s0"].split(" ")
    s1toks = [s1["text"].split(" ") for s1 in request.json["s1"]]

    l_s0 = [s0toks for s1 in s1toks]
    l_s1 = s1toks
    l_y = [0.5 for s1 in s1toks]
    l_qids = [0 for s1 in s1toks]
    if len(request.json["s1"][0]) > 1:
        l_xtra = loader.load_hypev_xtra(request.json["s1"])
    else:
        l_xtra = None
    l_types = None
    lists = l_s0, l_s1, l_y, l_qids, l_xtra, l_types

    s0, s1, y, qids, xtra, types = lists
    gr, y, _ = task.load_set(None, lists=lists)

    cl, rel, sc = [], [], []
    for ogr in task.sample_pairs(gr, 16384, shuffle=False, once=True):
        cl += list(model.predict(ogr)["class"])
        rel += list(model.predict(ogr)["rel"])
        sc += list(model.predict(ogr)["score"])
    return (
        jsonify(
            {
                "score": sc[0].tolist()[0],
                "class": [x[0] for x in cl[0].tolist()],
                "rel": [x[0] for x in rel[0].tolist()],
            }
        ),
        200,
    )
예제 #2
0
def get_score():
    if not request.json['s0']:
        return jsonify({'matches': []}), 200
    s0toks = request.json['s0'].split(' ')
    s1toks = [s1['text'].split(' ') for s1 in request.json['s1']]

    l_s0 = [s0toks for s1 in s1toks]
    l_s1 = s1toks
    l_y = [0.5 for s1 in s1toks]
    l_qids = [0 for s1 in s1toks]
    if len(request.json['s1'][0]) > 1:
        l_xtra = loader.load_hypev_xtra(request.json['s1'])
    else:
        l_xtra = None
    l_types = None
    lists = l_s0, l_s1, l_y, l_qids, l_xtra, l_types

    s0, s1, y, qids, xtra, types = lists
    gr, y, _ = task.load_set(None, lists=lists)

    cl, rel, sc = [], [], []
    for ogr in task.sample_pairs(gr, 16384, shuffle=False, once=True):
        cl += list(model.predict(ogr)['class'])
        rel += list(model.predict(ogr)['rel'])
        sc += list(model.predict(ogr)['score'])
    return jsonify({'score': sc[0].tolist()[0], 'class': [x[0] for x in cl[0].tolist()], 'rel': [x[0] for x in rel[0].tolist()]}), 200
예제 #3
0
    def load_set(self, fname, cache_dir=None, lists=None):
        # TODO: Make the cache-handling generic,
        # and offer a way to actually pass cache_dir
        save_cache = False
        if cache_dir:
            import os.path
            fname_abs = os.path.abspath(fname)
            from hashlib import md5
            cache_filename = "%s/%s.p" % (
                cache_dir, md5(fname_abs.encode("utf-8")).hexdigest())
            try:
                with open(cache_filename, "rb") as f:
                    return pickle.load(f)
            except (IOError, TypeError, KeyError):
                save_cache = True

        if lists is not None:
            s0, s1, y, qids, xtra, types = lists
        else:
            xtra = None
            if '/mc' in fname:
                s0, s1, y, qids, types = loader.load_mctest(fname)
            else:
                s0, s1, y, qids = loader.load_hypev(fname)
                try:
                    dsfile = re.sub('\.([^.]*)$', '_aux.tsv',
                                    fname)  # train.tsv -> train_aux.tsv
                    with open(dsfile) as f:
                        rows = csv.DictReader(f, delimiter='\t')
                        xtra = loader.load_hypev_xtra(rows)
                        print(dsfile + ' loaded and available')
                except Exception as e:
                    if self.c['aux_r'] or self.c['aux_c']:
                        raise e
                types = None

        if self.vocab is None:
            vocab = Vocabulary(s0 + s1,
                               prune_N=self.c['embprune'],
                               icase=self.c['embicase'])
        else:
            vocab = self.vocab

        # mcqtypes pruning must happen *after* Vocabulary has been constructed!
        if types is not None:
            s0 = [x for x, t in zip(s0, types) if t in self.c['mcqtypes']]
            s1 = [x for x, t in zip(s1, types) if t in self.c['mcqtypes']]
            y = [x for x, t in zip(y, types) if t in self.c['mcqtypes']]
            qids = [x for x, t in zip(qids, types) if t in self.c['mcqtypes']]
            print(
                'Retained %d questions, %d hypotheses (%s types)' %
                (len(set(qids)), len(set([' '.join(s)
                                          for s in s0])), self.c['mcqtypes']))

        si0, sj0 = vocab.vectorize(s0, self.emb, spad=self.s0pad)
        si1, sj1 = vocab.vectorize(s1, self.emb, spad=self.s1pad)
        f0, f1 = nlp.sentence_flags(s0, s1, self.s0pad, self.s1pad)
        gr = graph_input_anssel(si0, si1, sj0, sj1, None, None, y, f0, f1, s0,
                                s1)
        if qids is not None:
            gr['qids'] = qids
        if xtra is not None:
            gr['#'] = xtra['#']
            gr['@'] = xtra['@']
        gr, y = self.merge_questions(gr)
        if save_cache:
            with open(cache_filename, "wb") as f:
                pickle.dump((s0, s1, y, vocab, gr), f)
                print("save")

        return (gr, y, vocab)
예제 #4
0
파일: hypev.py 프로젝트: brmson/dataset-sts
    def load_set(self, fname, cache_dir=None, lists=None):
        # TODO: Make the cache-handling generic,
        # and offer a way to actually pass cache_dir
        save_cache = False
        if cache_dir:
            import os.path
            fname_abs = os.path.abspath(fname)
            from hashlib import md5
            cache_filename = "%s/%s.p" % (cache_dir, md5(fname_abs.encode("utf-8")).hexdigest())
            try:
                with open(cache_filename, "rb") as f:
                    return pickle.load(f)
            except (IOError, TypeError, KeyError):
                save_cache = True

        if lists is not None:
            s0, s1, y, qids, xtra, types = lists
        else:
            xtra = None
            if '/mc' in fname:
                s0, s1, y, qids, types = loader.load_mctest(fname)
            else:
                s0, s1, y, qids = loader.load_hypev(fname)
                try:
                    dsfile = re.sub('\.([^.]*)$', '_aux.tsv', fname)  # train.tsv -> train_aux.tsv
                    with open(dsfile) as f:
                        rows = csv.DictReader(f, delimiter='\t')
                        xtra = loader.load_hypev_xtra(rows)
                        print(dsfile + ' loaded and available')
                except Exception as e:
                    if self.c['aux_r'] or self.c['aux_c']:
                        raise e
                types = None

        if self.vocab is None:
            vocab = Vocabulary(s0 + s1, prune_N=self.c['embprune'], icase=self.c['embicase'])
        else:
            vocab = self.vocab

        # mcqtypes pruning must happen *after* Vocabulary has been constructed!
        if types is not None:
            s0 = [x for x, t in zip(s0, types) if t in self.c['mcqtypes']]
            s1 = [x for x, t in zip(s1, types) if t in self.c['mcqtypes']]
            y = [x for x, t in zip(y, types) if t in self.c['mcqtypes']]
            qids = [x for x, t in zip(qids, types) if t in self.c['mcqtypes']]
            print('Retained %d questions, %d hypotheses (%s types)' % (len(set(qids)), len(set([' '.join(s) for s in s0])), self.c['mcqtypes']))

        si0, sj0 = vocab.vectorize(s0, self.emb, spad=self.s0pad)
        si1, sj1 = vocab.vectorize(s1, self.emb, spad=self.s1pad)
        f0, f1 = nlp.sentence_flags(s0, s1, self.s0pad, self.s1pad)
        gr = graph_input_anssel(si0, si1, sj0, sj1, None, None, y, f0, f1, s0, s1)
        if qids is not None:
            gr['qids'] = qids
        if xtra is not None:
            gr['#'] = xtra['#']
            gr['@'] = xtra['@']
        gr, y = self.merge_questions(gr)
        if save_cache:
            with open(cache_filename, "wb") as f:
                pickle.dump((s0, s1, y, vocab, gr), f)
                print("save")

        return (gr, y, vocab)