def get_score(): if not request.json["s0"]: return jsonify({"matches": []}), 200 s0toks = request.json["s0"].split(" ") s1toks = [s1["text"].split(" ") for s1 in request.json["s1"]] l_s0 = [s0toks for s1 in s1toks] l_s1 = s1toks l_y = [0.5 for s1 in s1toks] l_qids = [0 for s1 in s1toks] if len(request.json["s1"][0]) > 1: l_xtra = loader.load_hypev_xtra(request.json["s1"]) else: l_xtra = None l_types = None lists = l_s0, l_s1, l_y, l_qids, l_xtra, l_types s0, s1, y, qids, xtra, types = lists gr, y, _ = task.load_set(None, lists=lists) cl, rel, sc = [], [], [] for ogr in task.sample_pairs(gr, 16384, shuffle=False, once=True): cl += list(model.predict(ogr)["class"]) rel += list(model.predict(ogr)["rel"]) sc += list(model.predict(ogr)["score"]) return ( jsonify( { "score": sc[0].tolist()[0], "class": [x[0] for x in cl[0].tolist()], "rel": [x[0] for x in rel[0].tolist()], } ), 200, )
def get_score(): if not request.json['s0']: return jsonify({'matches': []}), 200 s0toks = request.json['s0'].split(' ') s1toks = [s1['text'].split(' ') for s1 in request.json['s1']] l_s0 = [s0toks for s1 in s1toks] l_s1 = s1toks l_y = [0.5 for s1 in s1toks] l_qids = [0 for s1 in s1toks] if len(request.json['s1'][0]) > 1: l_xtra = loader.load_hypev_xtra(request.json['s1']) else: l_xtra = None l_types = None lists = l_s0, l_s1, l_y, l_qids, l_xtra, l_types s0, s1, y, qids, xtra, types = lists gr, y, _ = task.load_set(None, lists=lists) cl, rel, sc = [], [], [] for ogr in task.sample_pairs(gr, 16384, shuffle=False, once=True): cl += list(model.predict(ogr)['class']) rel += list(model.predict(ogr)['rel']) sc += list(model.predict(ogr)['score']) return jsonify({'score': sc[0].tolist()[0], 'class': [x[0] for x in cl[0].tolist()], 'rel': [x[0] for x in rel[0].tolist()]}), 200
def load_set(self, fname, cache_dir=None, lists=None): # TODO: Make the cache-handling generic, # and offer a way to actually pass cache_dir save_cache = False if cache_dir: import os.path fname_abs = os.path.abspath(fname) from hashlib import md5 cache_filename = "%s/%s.p" % ( cache_dir, md5(fname_abs.encode("utf-8")).hexdigest()) try: with open(cache_filename, "rb") as f: return pickle.load(f) except (IOError, TypeError, KeyError): save_cache = True if lists is not None: s0, s1, y, qids, xtra, types = lists else: xtra = None if '/mc' in fname: s0, s1, y, qids, types = loader.load_mctest(fname) else: s0, s1, y, qids = loader.load_hypev(fname) try: dsfile = re.sub('\.([^.]*)$', '_aux.tsv', fname) # train.tsv -> train_aux.tsv with open(dsfile) as f: rows = csv.DictReader(f, delimiter='\t') xtra = loader.load_hypev_xtra(rows) print(dsfile + ' loaded and available') except Exception as e: if self.c['aux_r'] or self.c['aux_c']: raise e types = None if self.vocab is None: vocab = Vocabulary(s0 + s1, prune_N=self.c['embprune'], icase=self.c['embicase']) else: vocab = self.vocab # mcqtypes pruning must happen *after* Vocabulary has been constructed! if types is not None: s0 = [x for x, t in zip(s0, types) if t in self.c['mcqtypes']] s1 = [x for x, t in zip(s1, types) if t in self.c['mcqtypes']] y = [x for x, t in zip(y, types) if t in self.c['mcqtypes']] qids = [x for x, t in zip(qids, types) if t in self.c['mcqtypes']] print( 'Retained %d questions, %d hypotheses (%s types)' % (len(set(qids)), len(set([' '.join(s) for s in s0])), self.c['mcqtypes'])) si0, sj0 = vocab.vectorize(s0, self.emb, spad=self.s0pad) si1, sj1 = vocab.vectorize(s1, self.emb, spad=self.s1pad) f0, f1 = nlp.sentence_flags(s0, s1, self.s0pad, self.s1pad) gr = graph_input_anssel(si0, si1, sj0, sj1, None, None, y, f0, f1, s0, s1) if qids is not None: gr['qids'] = qids if xtra is not None: gr['#'] = xtra['#'] gr['@'] = xtra['@'] gr, y = self.merge_questions(gr) if save_cache: with open(cache_filename, "wb") as f: pickle.dump((s0, s1, y, vocab, gr), f) print("save") return (gr, y, vocab)
def load_set(self, fname, cache_dir=None, lists=None): # TODO: Make the cache-handling generic, # and offer a way to actually pass cache_dir save_cache = False if cache_dir: import os.path fname_abs = os.path.abspath(fname) from hashlib import md5 cache_filename = "%s/%s.p" % (cache_dir, md5(fname_abs.encode("utf-8")).hexdigest()) try: with open(cache_filename, "rb") as f: return pickle.load(f) except (IOError, TypeError, KeyError): save_cache = True if lists is not None: s0, s1, y, qids, xtra, types = lists else: xtra = None if '/mc' in fname: s0, s1, y, qids, types = loader.load_mctest(fname) else: s0, s1, y, qids = loader.load_hypev(fname) try: dsfile = re.sub('\.([^.]*)$', '_aux.tsv', fname) # train.tsv -> train_aux.tsv with open(dsfile) as f: rows = csv.DictReader(f, delimiter='\t') xtra = loader.load_hypev_xtra(rows) print(dsfile + ' loaded and available') except Exception as e: if self.c['aux_r'] or self.c['aux_c']: raise e types = None if self.vocab is None: vocab = Vocabulary(s0 + s1, prune_N=self.c['embprune'], icase=self.c['embicase']) else: vocab = self.vocab # mcqtypes pruning must happen *after* Vocabulary has been constructed! if types is not None: s0 = [x for x, t in zip(s0, types) if t in self.c['mcqtypes']] s1 = [x for x, t in zip(s1, types) if t in self.c['mcqtypes']] y = [x for x, t in zip(y, types) if t in self.c['mcqtypes']] qids = [x for x, t in zip(qids, types) if t in self.c['mcqtypes']] print('Retained %d questions, %d hypotheses (%s types)' % (len(set(qids)), len(set([' '.join(s) for s in s0])), self.c['mcqtypes'])) si0, sj0 = vocab.vectorize(s0, self.emb, spad=self.s0pad) si1, sj1 = vocab.vectorize(s1, self.emb, spad=self.s1pad) f0, f1 = nlp.sentence_flags(s0, s1, self.s0pad, self.s1pad) gr = graph_input_anssel(si0, si1, sj0, sj1, None, None, y, f0, f1, s0, s1) if qids is not None: gr['qids'] = qids if xtra is not None: gr['#'] = xtra['#'] gr['@'] = xtra['@'] gr, y = self.merge_questions(gr) if save_cache: with open(cache_filename, "wb") as f: pickle.dump((s0, s1, y, vocab, gr), f) print("save") return (gr, y, vocab)