def ensemble3(context, wordss, y1_list, y2_list): d = defaultdict(float) for y1, y2 in zip(y1_list, y2_list): span, score = get_best_span(y1, y2) phrase = get_phrase(context, wordss, span) d[phrase] += score return max(d.items(), key=lambda pair: pair[1])[0]
def get_evaluation(self, sess, batch): idxs, data_set = batch assert isinstance(data_set, DataSet) feed_dict = self.model.get_feed_dict(data_set, False) global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict) yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples] spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) id2answer_dict = {id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])} id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)} id2answer_dict['scores'] = id2score_dict tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, tensor_dict=tensor_dict) return e
def get_evaluation(self, sess, batch): idxs, data_set = self._split_batch(batch) assert isinstance(data_set, DataSet) feed_dict = self._get_feed_dict(batch) global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict) y = data_set.data['y'] if self.config.squash: new_y = [] for xi, yi in zip(data_set.data['x'], y): new_yi = [] for start, stop in yi: start_offset = sum(map(len, xi[:start[0]])) stop_offset = sum(map(len, xi[:stop[0]])) new_start = 0, start_offset + start[1] new_stop = 0, stop_offset + stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y if self.config.single: new_y = [] for yi in y: new_yi = [] for start, stop in yi: new_start = 0, start[1] new_stop = 0, stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples] spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) id2answer_dict = {id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])} id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)} id2answer_dict['scores'] = id2score_dict correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)] f1s = [20 * self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)] tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y, correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict) return e
def get_evaluation(self, sess, batch): idxs, data_set = self._split_batch(batch) assert isinstance(data_set, DataSet) feed_dict = self._get_feed_dict(batch) global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict) y = data_set.data['y'] if self.config.squash: new_y = [] for xi, yi in zip(data_set.data['x'], y): new_yi = [] for start, stop in yi: start_offset = sum(map(len, xi[:start[0]])) stop_offset = sum(map(len, xi[:stop[0]])) new_start = 0, start_offset + start[1] new_stop = 0, stop_offset + stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y if self.config.single: new_y = [] for yi in y: new_yi = [] for start, stop in yi: new_start = 0, start[1] new_stop = 0, stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples] spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) id2answer_dict = {id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])} id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)} id2answer_dict['scores'] = id2score_dict correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)] f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)] tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y, correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict) return e
def ensemble2(context, wordss, y1_list, y2_list): start_dict = defaultdict(float) stop_dict = defaultdict(float) for y1, y2 in zip(y1_list, y2_list): span, score = get_best_span(y1, y2) start_dict[span[0]] += y1[span[0][0]][span[0][1]] stop_dict[span[1]] += y2[span[1][0]][span[1][1]] start = max(start_dict.items(), key=lambda pair: pair[1])[0] stop = max(stop_dict.items(), key=lambda pair: pair[1])[0] best_span = (start, stop) return get_phrase(context, wordss, best_span)
def ensemble1(context, wordss, y1_list, y2_list): """ :param context: Original context :param wordss: tokenized words (nested 2D list) :param y1_list: list of start index probs (each element corresponds to probs form single model) :param y2_list: list of stop index probs :return: """ sum_y1 = combine_y_list(y1_list) sum_y2 = combine_y_list(y2_list) span, score = get_best_span(sum_y1, sum_y2) return get_phrase(context, wordss, span)
def accuracy2_visualizer(args): model_name = args.model_name data_type = args.data_type num_per_page = args.num_per_page data_dir = args.data_dir run_id = args.run_id.zfill(2) step = args.step eval_path = os.path.join( "out", model_name, run_id, "eval", "{}-{}.pklz".format(data_type, str(step).zfill(6))) print("loading {}".format(eval_path)) eval_ = pickle.load(gzip.open(eval_path, 'r')) _id = 0 html_dir = "/tmp/list_results%d" % _id while os.path.exists(html_dir): _id += 1 html_dir = "/tmp/list_results%d" % _id if os.path.exists(html_dir): shutil.rmtree(html_dir) os.mkdir(html_dir) cur_dir = os.path.dirname(os.path.realpath(__file__)) templates_dir = os.path.join(cur_dir, 'templates') env = Environment(loader=FileSystemLoader(templates_dir)) env.globals.update(zip=zip, reversed=reversed) template = env.get_template(args.template_name) data_path = os.path.join(data_dir, "data_{}.json".format(data_type)) shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type)) print("loading {}".format(data_path)) data = json.load(open(data_path, 'r')) print("loading {}".format(shared_path)) shared = json.load(open(shared_path, 'r')) rows = [] for i, (idx, yi, ypi, yp2i, wypi) in tqdm(enumerate( zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2', 'wyp')])), total=len(eval_['idxs'])): id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss')) x = shared['x'][rx[0]][rx[1]] ques = [" ".join(q)] para = [[word for word in sent] for sent in x] span, score = get_best_span_wy( wypi, 0.5) if args.wy else get_best_span(ypi, yp2i) ap = get_segment(para, span) # score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1]) row = { 'id': id_, 'title': "Hello world!", 'ques': ques, 'para': para, 'y': yi[0][0], 'y2': yi[0][1], 'yp': wypi if args.wy else ypi, 'yp2': wypi if args.wy else yp2i, 'a': answers, 'ap': ap, 'score': score } rows.append(row) if i % num_per_page == 0: html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8)) if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']): var_dict = {'title': "Accuracy Visualization", 'rows': rows} with open(html_path, "wb") as f: f.write(template.render(**var_dict).encode('UTF-8')) rows = [] os.chdir(html_dir) port = args.port host = args.host # Overriding to suppress log message class MyHandler(http.server.SimpleHTTPRequestHandler): def log_message(self, format, *args): pass handler = MyHandler httpd = socketserver.TCPServer((host, port), handler) if args.open == 'True': os.system("open http://%s:%d" % (args.host, args.port)) print("serving at %s:%d" % (host, port)) httpd.serve_forever()
def get_evaluation(self, sess, batch): idxs, data_set = self._split_batch(batch) assert isinstance(data_set, DataSet) feed_dict = self._get_feed_dict(batch) if self.config.na: global_step, gen_q, yp, yp2, wyp, loss, na, vals = sess.run([self.global_step, self.gen_q, self.yp, self.yp2, self.wyp, self.loss, self.na, list(self.tensor_dict.values())], feed_dict=feed_dict) else: global_step, gen_q, yp, yp2, wyp, loss, vals = sess.run([self.global_step, self.gen_q, self.yp, self.yp2, self.wyp, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict) y = data_set.data['y'] if self.config.squash: new_y = [] for xi, yi in zip(data_set.data['x'], y): new_yi = [] for start, stop in yi: start_offset = sum(map(len, xi[:start[0]])) stop_offset = sum(map(len, xi[:stop[0]])) new_start = 0, start_offset + start[1] new_stop = 0, stop_offset + stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y if self.config.single: new_y = [] for yi in y: new_yi = [] for start, stop in yi: new_start = 0, start[1] new_stop = 0, stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y yp, yp2, wyp = yp[:data_set.num_examples], yp2[:data_set.num_examples], wyp[:data_set.num_examples] if self.config.wy: spans, scores = zip(*[get_best_span_wy(wypi, self.config.th) for wypi in wyp]) else: spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) if self.config.mode == "train": id2answer_dict = {id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])} else: id2answer_dict = {id_: [_get2(context, xi, span), ' '.join([data_set.shared['idx2word'][elem] for elem in gq if elem > 1])] for id_, xi, span, context, gq in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'], gen_q)} id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)} id2answer_dict['scores'] = id2score_dict if self.config.na: id2na_dict = {id_: float(each) for id_, each in zip(data_set.data['ids'], na)} id2answer_dict['na'] = id2na_dict correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)] f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)] tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y, correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict) print("[Loss] :", float(loss)) if self.config.wy: e.dict['wyp'] = wyp.tolist() return e
def get_scores(self, sess, batches, get_summary=False, k=10): assert isinstance(sess, tf.Session) feed_dict = {} yis = [] for batch, model in zip(batches, self.models): _, ds = batch paragraph_pointers = ds.data['*p'][0] par = ds.shared['p'][paragraph_pointers[0]] """ print(par) print(paragraph_pointers) print(ds.data.keys()) print(ds.shared.keys()) assert(False) print(ds.data['span_answerss'][0]) print(ds.data['answerss'][0]) print(ds.data['*p'][0]) print(ds.shared.keys()) print(ds.shared['p']) assert(False) """ yis.append(ds.data['y']) feed_dict.update(model.get_feed_dict(ds, True)) # ASSUMPTION: Only 1 replica model # NOTE: sometimes y < batch size. If so pad y with zeros y = yis[0] loss_mask = [True] * len(y) while len(y) < model.config.batch_size: y.append([[[0, 0], [0, 1]]]) loss_mask.append(False) yps, yp2s = sess.run([self.yps, self.yp2s], feed_dict=feed_dict) top_k_spans = [] top_k_scores = [] top_k_matches = [] start_features = [] end_features = [] span_lengths = [] yp = yps[0] yp2 = yp2s[0] _, data_set = batches[0] for _ in range(k): spans_, scores_ = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) if len(top_k_spans) == 0: print("Appending spans") for _ in range(len(spans_)): start_features.append([]) end_features.append([]) top_k_spans.append([]) top_k_scores.append([]) top_k_matches.append([]) span_lengths.append([]) for i in range(0, len(spans_)): cur_span = spans_[i] cur_score = scores_[i] yp[i][cur_span[0][0]][cur_span[0][1]] = 0 yp2[i][cur_span[1][0]][cur_span[1][1] - 1] = 0 top_k_spans[i].append(cur_span) top_k_scores[i].append(cur_score) span_lengths[i].append(cur_span[1][1] - cur_span[0][1]) top_k_f1s = np.array([list(map(lambda sp: F1Evaluator.span_f1(yi, sp), \ top_k_span)) for yi, top_k_span in zip(y, top_k_spans)]) top_k_matches = np.array([list(map(lambda sp: F1Evaluator.compare2(yi, sp), \ top_k_span)) for yi, top_k_span in zip(y, top_k_spans)]) top_k_scores = np.array(top_k_scores) best_f1_indices = np.argmax(top_k_f1s, axis=1) k_scores = [] predicted_f1_scores = top_k_f1s[:, 0] predicted_matches = top_k_matches[:, 0] predicted_scores = top_k_scores[:, 0] predicted_spans = np.array(top_k_spans)[:, 0] print("Span shape %s f1 indices shape %s" % (len(top_k_spans), len(best_f1_indices))) best_spans = np.array(top_k_spans)[range(len(top_k_spans)), best_f1_indices] best_scores = top_k_scores[range(len(top_k_scores)), best_f1_indices] best_matches = np.max(top_k_matches, axis=1) best_f1_scores = np.max(top_k_f1s, axis=1) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] top_k_answers = np.array([list(map(lambda sp: " ".join(_get(xi, sp)), spans)) for xi, spans in zip(data_set.data['x'], top_k_spans)]) best_answers = top_k_answers[range(len(top_k_answers)), best_f1_indices[0:len(top_k_answers)]] predicted_answers = top_k_answers[:, 0] results = {} results['q'] = data_set.data['q'] results['answerss'] = data_set.data['answerss'] results['x'] = data_set.data['x'] results['predicted_answers'] = predicted_answers results['best_answers'] = best_answers results['best_f1_indices'] = best_f1_indices results['top_k_f1_scores'] = top_k_f1s results['best_f1_scores'] = best_f1_scores results['predicted_f1_scores'] = predicted_f1_scores results['best_spans'] = best_spans results['predicted_spans'] = predicted_spans results['top_k_spans'] = top_k_spans results['loss_mask'] = loss_mask return [results] #Hack since we only have one batch """
def accuracy2_visualizer(args): model_name = args.model_name data_type = args.data_type num_per_page = args.num_per_page data_dir = args.data_dir run_id = args.run_id.zfill(2) step = args.step eval_path =os.path.join("out", model_name, run_id, "eval", "{}-{}.json".format(data_type, str(step).zfill(6))) print("loading {}".format(eval_path)) eval_ = json.load(open(eval_path, 'r')) _id = 0 html_dir = "/tmp/list_results%d" % _id while os.path.exists(html_dir): _id += 1 html_dir = "/tmp/list_results%d" % _id if os.path.exists(html_dir): shutil.rmtree(html_dir) os.mkdir(html_dir) cur_dir = os.path.dirname(os.path.realpath(__file__)) templates_dir = os.path.join(cur_dir, 'templates') env = Environment(loader=FileSystemLoader(templates_dir)) env.globals.update(zip=zip, reversed=reversed) template = env.get_template(args.template_name) data_path = os.path.join(data_dir, "data_{}.json".format(data_type)) shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type)) print("loading {}".format(data_path)) data = json.load(open(data_path, 'r')) print("loading {}".format(shared_path)) shared = json.load(open(shared_path, 'r')) rows = [] for i, (idx, yi, ypi, yp2i) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2')])), total=len(eval_['idxs'])): id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss')) x = shared['x'][rx[0]][rx[1]] ques = [" ".join(q)] para = [[word for word in sent] for sent in x] span = get_best_span(ypi, yp2i) ap = get_segment(para, span) score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1]) row = { 'id': id_, 'title': "Hello world!", 'ques': ques, 'para': para, 'y': yi[0][0], 'y2': yi[0][1], 'yp': ypi, 'yp2': yp2i, 'a': answers, 'ap': ap, 'score': score } rows.append(row) if i % num_per_page == 0: html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8)) if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']): var_dict = {'title': "Accuracy Visualization", 'rows': rows } with open(html_path, "wb") as f: f.write(template.render(**var_dict).encode('UTF-8')) rows = [] os.chdir(html_dir) port = args.port host = args.host # Overriding to suppress log message class MyHandler(http.server.SimpleHTTPRequestHandler): def log_message(self, format, *args): pass handler = MyHandler httpd = socketserver.TCPServer((host, port), handler) if args.open == 'True': os.system("open http://%s:%d" % (args.host, args.port)) print("serving at %s:%d" % (host, port)) httpd.serve_forever()
def get_evaluation(self, sess, batch): idxs, data_set = batch assert isinstance(data_set, DataSet) feed_dict = self.model.get_feed_dict(data_set, False) if self.config.mode == 'server': global_step, yp, yp2, loss, vals, na, u, h, p0, g1, g2 = sess.run( [ self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values()), self.na, self.u, self.h, self.p0, self.g1, self.g2 ], feed_dict=feed_dict) u_l, h_l, p0_l, g1_l, g2_l = [ x.tolist() for x in (u, h, p0, g1, g2) ] else: global_step, yp, yp2, loss, vals, na = sess.run( [ self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values()), self.na ], feed_dict=feed_dict) u_l = h_l = p0_l = g1_l = g2_l = [] yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples] spans, scores = zip( *[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) id2answer_dict = { id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p']) } id2score_dict = { id_: score for id_, score in zip(data_set.data['ids'], scores) } id2answer_dict['scores'] = id2score_dict id2na_dict = { id_: float(each) for id_, each in zip(data_set.data['ids'], na) } id2answer_dict['na'] = id2na_dict tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, na.tolist(), u_l, h_l, p0_l, g1_l, g2_l, tensor_dict=tensor_dict) return e
def get_evaluation(self, sess, batch): idxs, data_set = self._split_batch(batch) assert isinstance(data_set, DataSet) feed_dict = self._get_feed_dict(batch) if self.config.na: global_step, yp, yp2, wyp, loss, na, vals = sess.run( [ self.global_step, self.yp, self.yp2, self.wyp, self.loss, self.na, list(self.tensor_dict.values()) ], feed_dict=feed_dict) else: global_step, yp, yp2, wyp, loss, vals = sess.run( [ self.global_step, self.yp, self.yp2, self.wyp, self.loss, list(self.tensor_dict.values()) ], feed_dict=feed_dict) y = data_set.data['y'] yp, yp2, wyp = yp[:data_set. num_examples], yp2[:data_set. num_examples], wyp[:data_set. num_examples] if self.config.wy: spans, scores = zip( *[get_best_span_wy(wypi, self.config.th) for wypi in wyp]) else: spans, scores = zip( *[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) if self.config.split_supports: id2answer_dict = { id_: _get2(context[0], xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x2'], spans, data_set.data['p2']) } else: id2answer_dict = { id_: _get2(context[0], xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p']) } id2score_dict = { id_: score for id_, score in zip(data_set.data['ids'], scores) } id2answer_dict['scores'] = id2score_dict if self.config.na: id2na_dict = { id_: float(each) for id_, each in zip(data_set.data['ids'], na) } id2answer_dict['na'] = id2na_dict correct = [ self.__class__.compare2(yi, span) for yi, span in zip(y, spans) ] f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)] tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y, correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict) if self.config.wy: e.dict['wyp'] = wyp.tolist() return e
def get_evaluation(self, sess, batch): """ :param sess: :param batch: batch_size tuple indices, for example, from 0 to 59 or 60 to 119 :return: """ idxs, data_set = self._split_batch(batch) # idxs from 0 to 59, data_set a DataSet instance assert isinstance(data_set, DataSet) feed_dict = self._get_feed_dict(batch) global_step, yp, yp2, loss, vals = sess.run( [self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict) y = data_set.data['y'] ind = 0 if self.print: for y_t, y1_p, y2_p in zip(y, yp, yp2): y1_index = np.argmax(y1_p) y2_index = np.argmax(y2_p) print((str)(y_t) + ", " + (str)(y1_index) + ", " + (str)(y2_index)) print("the question is: {}".format(data_set.data['q'][ind])) # print("the answer is: {}".format(data_set.data['answerss'][ind])) print("the answer is: {}".format(data_set.data['x'][ind][0][y_t[0][0][1]:y_t[0][1][1]])) print( "the pred answer is: {}".format( data_set.data['x'][ind][0][min(y1_index, y2_index):max(y1_index, y2_index) + 1])) ind += 1 if self.config.squash: new_y = [] for xi, yi in zip(data_set.data['x'], y): new_yi = [] for start, stop in yi: start_offset = sum(map(len, xi[:start[0]])) stop_offset = sum(map(len, xi[:stop[0]])) new_start = 0, start_offset + start[1] new_stop = 0, stop_offset + stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y if self.config.single: new_y = [] for yi in y: new_yi = [] for start, stop in yi: new_start = 0, start[1] new_stop = 0, stop[1] new_yi.append((new_start, new_stop)) new_y.append(new_yi) y = new_y yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples] spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)]) def _get(xi, span): if len(xi) <= span[0][0]: return [""] if len(xi[span[0][0]]) <= span[1][1]: return [""] return xi[span[0][0]][span[0][1]:span[1][1]] def _get2(context, xi, span): if len(xi) <= span[0][0]: return "" if len(xi[span[0][0]]) <= span[1][1]: return "" return get_phrase(context, xi, span) id2answer_dict = {id_: _get2(context, xi, span) for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])} id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)} id2answer_dict['scores'] = id2score_dict correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)] f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)] tensor_dict = dict(zip(self.tensor_dict.keys(), vals)) e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y, correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict) return e