Beispiel #1
0
def ensemble3(context, wordss, y1_list, y2_list):
    d = defaultdict(float)
    for y1, y2 in zip(y1_list, y2_list):
        span, score = get_best_span(y1, y2)
        phrase = get_phrase(context, wordss, span)
        d[phrase] += score
    return max(d.items(), key=lambda pair: pair[1])[0]
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        assert isinstance(data_set, DataSet)
        feed_dict = self.model.get_feed_dict(data_set, False)
        global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)
        id2answer_dict = {id_: _get2(context, xi, span)
                          for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, tensor_dict=tensor_dict)
        return e
Beispiel #3
0
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        assert isinstance(data_set, DataSet)
        feed_dict = self.model.get_feed_dict(data_set, False)
        global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        id2answer_dict = {id_: _get2(context, xi, span)
                          for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = ForwardEvaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), float(loss), id2answer_dict, tensor_dict=tensor_dict)
        return e
Beispiel #4
0
    def get_evaluation(self, sess, batch):
        idxs, data_set = self._split_batch(batch)
        assert isinstance(data_set, DataSet)
        feed_dict = self._get_feed_dict(batch)
        global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
        y = data_set.data['y']
        if self.config.squash:
            new_y = []
            for xi, yi in zip(data_set.data['x'], y):
                new_yi = []
                for start, stop in yi:
                    start_offset = sum(map(len, xi[:start[0]]))
                    stop_offset = sum(map(len, xi[:stop[0]]))
                    new_start = 0, start_offset + start[1]
                    new_stop = 0, stop_offset + stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y
        if self.config.single:
            new_y = []
            for yi in y:
                new_yi = []
                for start, stop in yi:
                    new_start = 0, start[1]
                    new_stop = 0, stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        id2answer_dict = {id_: _get2(context, xi, span)
                          for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
        f1s = [20 * self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
        
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
                         correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
        return e
    def get_evaluation(self, sess, batch):
        idxs, data_set = self._split_batch(batch)
        assert isinstance(data_set, DataSet)
        feed_dict = self._get_feed_dict(batch)
        global_step, yp, yp2, loss, vals = sess.run([self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
        y = data_set.data['y']
        if self.config.squash:
            new_y = []
            for xi, yi in zip(data_set.data['x'], y):
                new_yi = []
                for start, stop in yi:
                    start_offset = sum(map(len, xi[:start[0]]))
                    stop_offset = sum(map(len, xi[:stop[0]]))
                    new_start = 0, start_offset + start[1]
                    new_stop = 0, stop_offset + stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y
        if self.config.single:
            new_y = []
            for yi in y:
                new_yi = []
                for start, stop in yi:
                    new_start = 0, start[1]
                    new_stop = 0, stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        id2answer_dict = {id_: _get2(context, xi, span)
                          for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
        f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
                         correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
        return e
Beispiel #6
0
def ensemble2(context, wordss, y1_list, y2_list):
    start_dict = defaultdict(float)
    stop_dict = defaultdict(float)
    for y1, y2 in zip(y1_list, y2_list):
        span, score = get_best_span(y1, y2)
        start_dict[span[0]] += y1[span[0][0]][span[0][1]]
        stop_dict[span[1]] += y2[span[1][0]][span[1][1]]
    start = max(start_dict.items(), key=lambda pair: pair[1])[0]
    stop = max(stop_dict.items(), key=lambda pair: pair[1])[0]
    best_span = (start, stop)
    return get_phrase(context, wordss, best_span)
Beispiel #7
0
def ensemble1(context, wordss, y1_list, y2_list):
    """

    :param context: Original context
    :param wordss: tokenized words (nested 2D list)
    :param y1_list: list of start index probs (each element corresponds to probs form single model)
    :param y2_list: list of stop index probs
    :return:
    """
    sum_y1 = combine_y_list(y1_list)
    sum_y2 = combine_y_list(y2_list)
    span, score = get_best_span(sum_y1, sum_y2)
    return get_phrase(context, wordss, span)
def accuracy2_visualizer(args):
    model_name = args.model_name
    data_type = args.data_type
    num_per_page = args.num_per_page
    data_dir = args.data_dir
    run_id = args.run_id.zfill(2)
    step = args.step

    eval_path = os.path.join(
        "out", model_name, run_id, "eval",
        "{}-{}.pklz".format(data_type,
                            str(step).zfill(6)))
    print("loading {}".format(eval_path))
    eval_ = pickle.load(gzip.open(eval_path, 'r'))

    _id = 0
    html_dir = "/tmp/list_results%d" % _id
    while os.path.exists(html_dir):
        _id += 1
        html_dir = "/tmp/list_results%d" % _id

    if os.path.exists(html_dir):
        shutil.rmtree(html_dir)
    os.mkdir(html_dir)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    templates_dir = os.path.join(cur_dir, 'templates')
    env = Environment(loader=FileSystemLoader(templates_dir))
    env.globals.update(zip=zip, reversed=reversed)
    template = env.get_template(args.template_name)

    data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
    shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
    print("loading {}".format(data_path))
    data = json.load(open(data_path, 'r'))
    print("loading {}".format(shared_path))
    shared = json.load(open(shared_path, 'r'))

    rows = []
    for i, (idx, yi, ypi, yp2i, wypi) in tqdm(enumerate(
            zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2', 'wyp')])),
                                              total=len(eval_['idxs'])):
        id_, q, rx, answers = (data[key][idx]
                               for key in ('ids', 'q', '*x', 'answerss'))
        x = shared['x'][rx[0]][rx[1]]
        ques = [" ".join(q)]
        para = [[word for word in sent] for sent in x]
        span, score = get_best_span_wy(
            wypi, 0.5) if args.wy else get_best_span(ypi, yp2i)
        ap = get_segment(para, span)
        # score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])

        row = {
            'id': id_,
            'title': "Hello world!",
            'ques': ques,
            'para': para,
            'y': yi[0][0],
            'y2': yi[0][1],
            'yp': wypi if args.wy else ypi,
            'yp2': wypi if args.wy else yp2i,
            'a': answers,
            'ap': ap,
            'score': score
        }
        rows.append(row)

        if i % num_per_page == 0:
            html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8))

        if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']):
            var_dict = {'title': "Accuracy Visualization", 'rows': rows}
            with open(html_path, "wb") as f:
                f.write(template.render(**var_dict).encode('UTF-8'))
            rows = []

    os.chdir(html_dir)
    port = args.port
    host = args.host

    # Overriding to suppress log message
    class MyHandler(http.server.SimpleHTTPRequestHandler):
        def log_message(self, format, *args):
            pass

    handler = MyHandler
    httpd = socketserver.TCPServer((host, port), handler)
    if args.open == 'True':
        os.system("open http://%s:%d" % (args.host, args.port))
    print("serving at %s:%d" % (host, port))
    httpd.serve_forever()
Beispiel #9
0
    def get_evaluation(self, sess, batch):
        idxs, data_set = self._split_batch(batch)
        assert isinstance(data_set, DataSet)
        feed_dict = self._get_feed_dict(batch)
        if self.config.na:
            global_step, gen_q, yp, yp2, wyp, loss, na, vals = sess.run([self.global_step, self.gen_q, self.yp, self.yp2, self.wyp, self.loss, self.na, list(self.tensor_dict.values())], feed_dict=feed_dict)
        else:
            global_step, gen_q, yp, yp2, wyp, loss, vals = sess.run([self.global_step, self.gen_q, self.yp, self.yp2, self.wyp, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
        y = data_set.data['y']
        if self.config.squash:
            new_y = []
            for xi, yi in zip(data_set.data['x'], y):
                new_yi = []
                for start, stop in yi:
                    start_offset = sum(map(len, xi[:start[0]]))
                    stop_offset = sum(map(len, xi[:stop[0]]))
                    new_start = 0, start_offset + start[1]
                    new_stop = 0, stop_offset + stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y
        if self.config.single:
            new_y = []
            for yi in y:
                new_yi = []
                for start, stop in yi:
                    new_start = 0, start[1]
                    new_stop = 0, stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y

        yp, yp2, wyp = yp[:data_set.num_examples], yp2[:data_set.num_examples], wyp[:data_set.num_examples]
        if self.config.wy:
            spans, scores = zip(*[get_best_span_wy(wypi, self.config.th) for wypi in wyp])
        else:
            spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)
        if self.config.mode == "train":
            id2answer_dict = {id_: _get2(context, xi, span)
                              for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        else:
            id2answer_dict = {id_: [_get2(context, xi, span), ' '.join([data_set.shared['idx2word'][elem] for elem in gq if elem > 1])]
                              for id_, xi, span, context, gq in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'], gen_q)}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        if self.config.na:
            id2na_dict = {id_: float(each) for id_, each in zip(data_set.data['ids'], na)}
            id2answer_dict['na'] = id2na_dict
        correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
        f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
                         correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
        print("[Loss] :", float(loss))
        if self.config.wy:
            e.dict['wyp'] = wyp.tolist()
        return e
Beispiel #10
0
    def get_scores(self, sess, batches, get_summary=False, k=10):
        assert isinstance(sess, tf.Session)
        feed_dict = {}
        yis = []
        for batch, model in zip(batches, self.models):
            _, ds = batch
            paragraph_pointers = ds.data['*p'][0]
            par = ds.shared['p'][paragraph_pointers[0]]
            
            """
            print(par)
            print(paragraph_pointers)
            print(ds.data.keys())
            print(ds.shared.keys())
            assert(False)
            print(ds.data['span_answerss'][0])
            print(ds.data['answerss'][0])
            print(ds.data['*p'][0])
            print(ds.shared.keys())
            print(ds.shared['p'])
            assert(False)
            """
            yis.append(ds.data['y'])
            feed_dict.update(model.get_feed_dict(ds, True))

        # ASSUMPTION: Only 1 replica model
        # NOTE: sometimes y < batch size. If so pad y with zeros
        y = yis[0]
        loss_mask = [True] * len(y)
        while len(y) < model.config.batch_size:
            y.append([[[0, 0], [0, 1]]])
            loss_mask.append(False)

        yps, yp2s = sess.run([self.yps, self.yp2s], feed_dict=feed_dict)

        top_k_spans = []
        top_k_scores = []
        top_k_matches = []

        start_features = []
        end_features = []
        span_lengths = []

        yp = yps[0]
        yp2 = yp2s[0]
        _, data_set = batches[0]
        for _ in range(k):
            spans_, scores_ = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])
            if len(top_k_spans) == 0:
                print("Appending spans")
                for _ in range(len(spans_)):
                    start_features.append([])
                    end_features.append([])
                    top_k_spans.append([])
                    top_k_scores.append([])
                    top_k_matches.append([])
                    span_lengths.append([])

            for i in range(0, len(spans_)):
                cur_span = spans_[i]
                cur_score = scores_[i]

                yp[i][cur_span[0][0]][cur_span[0][1]] = 0  
                yp2[i][cur_span[1][0]][cur_span[1][1] - 1] = 0  

                top_k_spans[i].append(cur_span)
                top_k_scores[i].append(cur_score)
                span_lengths[i].append(cur_span[1][1] - cur_span[0][1])


        top_k_f1s = np.array([list(map(lambda sp: F1Evaluator.span_f1(yi, sp), \
            top_k_span)) for yi, top_k_span in zip(y, top_k_spans)])
        top_k_matches = np.array([list(map(lambda sp: F1Evaluator.compare2(yi, sp), \
            top_k_span)) for yi, top_k_span in zip(y, top_k_spans)])
        top_k_scores = np.array(top_k_scores)
        best_f1_indices = np.argmax(top_k_f1s, axis=1)

        k_scores = [] 
        predicted_f1_scores = top_k_f1s[:, 0]
        predicted_matches = top_k_matches[:, 0]
        predicted_scores = top_k_scores[:, 0]
        predicted_spans = np.array(top_k_spans)[:, 0]

        print("Span shape %s f1 indices shape %s" % (len(top_k_spans), len(best_f1_indices)))
        best_spans = np.array(top_k_spans)[range(len(top_k_spans)), best_f1_indices]
        best_scores = top_k_scores[range(len(top_k_scores)), best_f1_indices]
        best_matches = np.max(top_k_matches, axis=1)
        best_f1_scores = np.max(top_k_f1s, axis=1)


        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        top_k_answers = np.array([list(map(lambda sp: " ".join(_get(xi, sp)), spans))
                          for xi, spans in zip(data_set.data['x'], top_k_spans)])
        best_answers = top_k_answers[range(len(top_k_answers)), best_f1_indices[0:len(top_k_answers)]]
        predicted_answers = top_k_answers[:, 0]



        results = {}
        results['q'] = data_set.data['q']
        results['answerss'] = data_set.data['answerss']
        results['x'] = data_set.data['x']
        results['predicted_answers'] = predicted_answers 
        results['best_answers'] = best_answers
        results['best_f1_indices'] = best_f1_indices
        results['top_k_f1_scores'] = top_k_f1s
        results['best_f1_scores'] = best_f1_scores 
        results['predicted_f1_scores'] = predicted_f1_scores
        results['best_spans'] = best_spans
        results['predicted_spans'] = predicted_spans
        results['top_k_spans'] = top_k_spans
        results['loss_mask'] = loss_mask
        return [results] #Hack since we only have one batch

        """
def accuracy2_visualizer(args):
    model_name = args.model_name
    data_type = args.data_type
    num_per_page = args.num_per_page
    data_dir = args.data_dir
    run_id = args.run_id.zfill(2)
    step = args.step

    eval_path =os.path.join("out", model_name, run_id, "eval", "{}-{}.json".format(data_type, str(step).zfill(6)))
    print("loading {}".format(eval_path))
    eval_ = json.load(open(eval_path, 'r'))

    _id = 0
    html_dir = "/tmp/list_results%d" % _id
    while os.path.exists(html_dir):
        _id += 1
        html_dir = "/tmp/list_results%d" % _id

    if os.path.exists(html_dir):
        shutil.rmtree(html_dir)
    os.mkdir(html_dir)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    templates_dir = os.path.join(cur_dir, 'templates')
    env = Environment(loader=FileSystemLoader(templates_dir))
    env.globals.update(zip=zip, reversed=reversed)
    template = env.get_template(args.template_name)

    data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
    shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
    print("loading {}".format(data_path))
    data = json.load(open(data_path, 'r'))
    print("loading {}".format(shared_path))
    shared = json.load(open(shared_path, 'r'))

    rows = []
    for i, (idx, yi, ypi, yp2i) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2')])), total=len(eval_['idxs'])):
        id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss'))
        x = shared['x'][rx[0]][rx[1]]
        ques = [" ".join(q)]
        para = [[word for word in sent] for sent in x]
        span = get_best_span(ypi, yp2i)
        ap = get_segment(para, span)
        score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])

        row = {
            'id': id_,
            'title': "Hello world!",
            'ques': ques,
            'para': para,
            'y': yi[0][0],
            'y2': yi[0][1],
            'yp': ypi,
            'yp2': yp2i,
            'a': answers,
            'ap': ap,
            'score': score
               }
        rows.append(row)

        if i % num_per_page == 0:
            html_path = os.path.join(html_dir, "%s.html" % str(i).zfill(8))

        if (i + 1) % num_per_page == 0 or (i + 1) == len(eval_['y']):
            var_dict = {'title': "Accuracy Visualization",
                        'rows': rows
                        }
            with open(html_path, "wb") as f:
                f.write(template.render(**var_dict).encode('UTF-8'))
            rows = []

    os.chdir(html_dir)
    port = args.port
    host = args.host
    # Overriding to suppress log message
    class MyHandler(http.server.SimpleHTTPRequestHandler):
        def log_message(self, format, *args):
            pass
    handler = MyHandler
    httpd = socketserver.TCPServer((host, port), handler)
    if args.open == 'True':
        os.system("open http://%s:%d" % (args.host, args.port))
    print("serving at %s:%d" % (host, port))
    httpd.serve_forever()
Beispiel #12
0
    def get_evaluation(self, sess, batch):
        idxs, data_set = batch
        assert isinstance(data_set, DataSet)
        feed_dict = self.model.get_feed_dict(data_set, False)
        if self.config.mode == 'server':
            global_step, yp, yp2, loss, vals, na, u, h, p0, g1, g2 = sess.run(
                [
                    self.global_step, self.yp, self.yp2, self.loss,
                    list(self.tensor_dict.values()), self.na, self.u, self.h,
                    self.p0, self.g1, self.g2
                ],
                feed_dict=feed_dict)
            u_l, h_l, p0_l, g1_l, g2_l = [
                x.tolist() for x in (u, h, p0, g1, g2)
            ]
        else:
            global_step, yp, yp2, loss, vals, na = sess.run(
                [
                    self.global_step, self.yp, self.yp2, self.loss,
                    list(self.tensor_dict.values()), self.na
                ],
                feed_dict=feed_dict)
            u_l = h_l = p0_l = g1_l = g2_l = []

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(
            *[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        id2answer_dict = {
            id_: _get2(context, xi, span)
            for id_, xi, span, context in
            zip(data_set.data['ids'], data_set.data['x'], spans,
                data_set.data['p'])
        }
        id2score_dict = {
            id_: score
            for id_, score in zip(data_set.data['ids'], scores)
        }
        id2answer_dict['scores'] = id2score_dict
        id2na_dict = {
            id_: float(each)
            for id_, each in zip(data_set.data['ids'], na)
        }
        id2answer_dict['na'] = id2na_dict
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = ForwardEvaluation(data_set.data_type,
                              int(global_step),
                              idxs,
                              yp.tolist(),
                              yp2.tolist(),
                              float(loss),
                              id2answer_dict,
                              na.tolist(),
                              u_l,
                              h_l,
                              p0_l,
                              g1_l,
                              g2_l,
                              tensor_dict=tensor_dict)
        return e
Beispiel #13
0
    def get_evaluation(self, sess, batch):
        idxs, data_set = self._split_batch(batch)
        assert isinstance(data_set, DataSet)
        feed_dict = self._get_feed_dict(batch)
        if self.config.na:
            global_step, yp, yp2, wyp, loss, na, vals = sess.run(
                [
                    self.global_step, self.yp, self.yp2, self.wyp, self.loss,
                    self.na,
                    list(self.tensor_dict.values())
                ],
                feed_dict=feed_dict)
        else:
            global_step, yp, yp2, wyp, loss, vals = sess.run(
                [
                    self.global_step, self.yp, self.yp2, self.wyp, self.loss,
                    list(self.tensor_dict.values())
                ],
                feed_dict=feed_dict)
        y = data_set.data['y']

        yp, yp2, wyp = yp[:data_set.
                          num_examples], yp2[:data_set.
                                             num_examples], wyp[:data_set.
                                                                num_examples]
        if self.config.wy:
            spans, scores = zip(
                *[get_best_span_wy(wypi, self.config.th) for wypi in wyp])
        else:
            spans, scores = zip(
                *[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        if self.config.split_supports:
            id2answer_dict = {
                id_: _get2(context[0], xi, span)
                for id_, xi, span, context in
                zip(data_set.data['ids'], data_set.data['x2'], spans,
                    data_set.data['p2'])
            }
        else:
            id2answer_dict = {
                id_: _get2(context[0], xi, span)
                for id_, xi, span, context in
                zip(data_set.data['ids'], data_set.data['x'], spans,
                    data_set.data['p'])
            }
        id2score_dict = {
            id_: score
            for id_, score in zip(data_set.data['ids'], scores)
        }
        id2answer_dict['scores'] = id2score_dict
        if self.config.na:
            id2na_dict = {
                id_: float(each)
                for id_, each in zip(data_set.data['ids'], na)
            }
            id2answer_dict['na'] = id2na_dict
        correct = [
            self.__class__.compare2(yi, span) for yi, span in zip(y, spans)
        ]
        f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = F1Evaluation(data_set.data_type,
                         int(global_step),
                         idxs,
                         yp.tolist(),
                         yp2.tolist(),
                         y,
                         correct,
                         float(loss),
                         f1s,
                         id2answer_dict,
                         tensor_dict=tensor_dict)
        if self.config.wy:
            e.dict['wyp'] = wyp.tolist()
        return e
Beispiel #14
0
    def get_evaluation(self, sess, batch):
        """

        :param sess:
        :param batch: batch_size tuple indices, for example, from 0 to 59 or 60 to 119
        :return:
        """
        idxs, data_set = self._split_batch(batch)  # idxs from 0 to 59, data_set a DataSet instance
        assert isinstance(data_set, DataSet)
        feed_dict = self._get_feed_dict(batch)
        global_step, yp, yp2, loss, vals = sess.run(
            [self.global_step, self.yp, self.yp2, self.loss, list(self.tensor_dict.values())], feed_dict=feed_dict)
        y = data_set.data['y']
        ind = 0

        if self.print:
            for y_t, y1_p, y2_p in zip(y, yp, yp2):
                y1_index = np.argmax(y1_p)
                y2_index = np.argmax(y2_p)
                print((str)(y_t) + ", " + (str)(y1_index) + ", " + (str)(y2_index))
                print("the question is: {}".format(data_set.data['q'][ind]))
                # print("the answer is: {}".format(data_set.data['answerss'][ind]))
                print("the answer is: {}".format(data_set.data['x'][ind][0][y_t[0][0][1]:y_t[0][1][1]]))
                print(
                    "the pred answer is: {}".format(
                        data_set.data['x'][ind][0][min(y1_index, y2_index):max(y1_index, y2_index) + 1]))

                ind += 1

        if self.config.squash:
            new_y = []
            for xi, yi in zip(data_set.data['x'], y):
                new_yi = []
                for start, stop in yi:
                    start_offset = sum(map(len, xi[:start[0]]))
                    stop_offset = sum(map(len, xi[:stop[0]]))
                    new_start = 0, start_offset + start[1]
                    new_stop = 0, stop_offset + stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y
        if self.config.single:
            new_y = []
            for yi in y:
                new_yi = []
                for start, stop in yi:
                    new_start = 0, start[1]
                    new_stop = 0, stop[1]
                    new_yi.append((new_start, new_stop))
                new_y.append(new_yi)
            y = new_y

        yp, yp2 = yp[:data_set.num_examples], yp2[:data_set.num_examples]
        spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])

        def _get(xi, span):
            if len(xi) <= span[0][0]:
                return [""]
            if len(xi[span[0][0]]) <= span[1][1]:
                return [""]
            return xi[span[0][0]][span[0][1]:span[1][1]]

        def _get2(context, xi, span):
            if len(xi) <= span[0][0]:
                return ""
            if len(xi[span[0][0]]) <= span[1][1]:
                return ""
            return get_phrase(context, xi, span)

        id2answer_dict = {id_: _get2(context, xi, span)
                          for id_, xi, span, context in
                          zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
        id2score_dict = {id_: score for id_, score in zip(data_set.data['ids'], scores)}
        id2answer_dict['scores'] = id2score_dict
        correct = [self.__class__.compare2(yi, span) for yi, span in zip(y, spans)]
        f1s = [self.__class__.span_f1(yi, span) for yi, span in zip(y, spans)]
        tensor_dict = dict(zip(self.tensor_dict.keys(), vals))
        e = F1Evaluation(data_set.data_type, int(global_step), idxs, yp.tolist(), yp2.tolist(), y,
                         correct, float(loss), f1s, id2answer_dict, tensor_dict=tensor_dict)
        return e