Python Discretizerの例、kangqi.util.discretizer.Discretizer Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self,
                 base='/home/xianyang/aqqu/aqqu',
                 parser_ip='202.120.38.146',
                 parser_port=9601,
                 linking_mode='Raw',
                 q_links_dict=None,
                 lukov_linker=None):
        self.base = base
        self.linking_mode = linking_mode
        self.q_links_dict = q_links_dict  # save S-MART results
        self.lukov_linker = lukov_linker
        assert linking_mode in ('Raw', 'S-MART', 'Lukov')
        if linking_mode == 'Lukov':
            assert self.lukov_linker is not None
        """
            Raw: the raw version, won't read anything from S-MART or our Lukov's implementation
            S-MART: read from S-MART result (only available in WebQ)
            Lukov: read from our lukov_ngram linker data
        """
        LogInfo.logs('Initiating parser ... ')
        self.parser = parser.CoreNLPParser(
            'http://%s:%d/parse' %
            (parser_ip, parser_port))  # just open the parser

        self.is_data_loaded = False
        self.surface_index = None
        self.entity_linker = None
        self.type_linker = None

        self.smart_score_disc = Discretizer(
            split_list=[2, 3, 8, 50, 2000, 12500, 25000, 40000],
            output_mode='list')
        # the split distribution is manually designed by observing S-MART data in both CompQ & WebQ datasets

        self.pop_filter_num = 5

コード例 #2

0

ファイルを表示

    def generate_extra_feat(self, sc):
        ans_size_disc = Discretizer([2, 3, 5, 10, 50],
                                    output_mode='list')  # 5+1
        extra_feat_list = [
        ]  # E/T/Tm/Ord size, T/T/Tm/Ord indicator, 2_hop, with_med, ans_size_discrete
        # 4 + 4 + 2 + 6 = 16
        constr_size_dict = {}
        main_pred_seq = []
        for category, _, pred_seq in sc.raw_paths:
            if category == 'Main':
                main_pred_seq = pred_seq
            constr_size_dict[category] = 1 + constr_size_dict.get(category, 0)
        for cate in ('Entity', 'Type', 'Time', 'Ordinal'):
            extra_feat_list.append(constr_size_dict.get(
                cate, 0))  # how many constraint paths
        for cate in ('Entity', 'Type', 'Time', 'Ordinal'):
            extra_feat_list.append(min(1, constr_size_dict.get(
                cate, 0)))  # whether have such constraints
        is_two_hop = 1 if sc.hops == 2 else 0
        with_med = 1 if is_mediator_as_expect(main_pred_seq[0]) else 0
        extra_feat_list += [is_two_hop, with_med]
        extra_feat_list += ans_size_disc.convert(sc.ans_size)
        assert len(extra_feat_list) == self.extra_feat_size

        return np.array(extra_feat_list, dtype='float32')

コード例 #3

0

ファイルを表示

ファイル: new_lukov_linker.py プロジェクト: lkq1992yeah/CompQA

 def __init__(self, lexicon):
     self.lexicon = lexicon
     self.year_re = re.compile(r'^[1-2][0-9][0-9][0-9]$')
     self.punc_str = u"?!:',."
     self.trivial_set = {'the', 'a', 'an', 'of', 'on', 'at', 'by'}
     # Lukov et al., Sec 2.2.1
     self.log_wiki_pop_disc = Discretizer([1, 2, 3, 4, 6],
                                          output_mode='list',
                                          name='log_wiki_pop')
     self.log_fb_pop_disc = Discretizer([3, 4, 6, 8],
                                        output_mode='list',
                                        name='log_fb_pop')

コード例 #4

0

ファイルを表示

ファイル: webq_yih_compare.py プロジェクト: lkq1992yeah/CompQA

def work(exp_dir, data_dir, best_epoch, qa_list, yih_ret_dict):
    log_fp = '%s/yih_compare_%03d.txt' % (exp_dir, best_epoch)

    pick_sc_dict = {q_idx: (-1, 0.) for q_idx in range(3778, 5810)}
    ret_fp = '%s/result/full.t.%03d' % (exp_dir, best_epoch)
    with open(ret_fp, 'r') as br:
        for line in br.readlines():
            spt = line.strip().split('\t')
            q_idx = int(spt[0])
            line_no = int(spt[1])
            ours_f1 = float(spt[2])
            pick_sc_dict[q_idx] = (line_no, ours_f1)

    disc = Discretizer([-0.99, -0.50, -0.25, -0.01, 0.01, 0.25, 0.50, 0.99])
    delta_tup_list = []
    avg_yih_f1 = 0.
    avg_ours_f1 = 0.
    for q_idx in range(3778, 5810):
        qa = qa_list[q_idx]
        q = qa['utterance']
        gold_answer_list = qa['targetValue']
        yih_answer_list = json.loads(yih_ret_dict[q])
        _, _, yih_f1 = compute_f1(goldList=gold_answer_list,
                                  predictedList=yih_answer_list)
        ours_f1 = pick_sc_dict[q_idx][1]
        avg_yih_f1 += yih_f1
        avg_ours_f1 += ours_f1
        delta = ours_f1 - yih_f1
        disc.convert(delta)
        delta_tup_list.append((q_idx, delta))
    avg_yih_f1 /= 2032
    avg_ours_f1 /= 2032

    delta_tup_list.sort(key=lambda _tup: _tup[1])
    LogInfo.logs('%d questions delta sorted.', len(delta_tup_list))

    total_size = len(delta_tup_list)
    worse_size = len(filter(lambda _tup: _tup[1] < 0., delta_tup_list))
    better_size = len(filter(lambda _tup: _tup[1] > 0., delta_tup_list))
    equal_size = total_size - worse_size - better_size

    bw = codecs.open(log_fp, 'w', 'utf-8')
    LogInfo.redirect(bw)
    LogInfo.logs('Avg_Yih_F1 = %.6f, Avg_Ours_F1 = %.6f', avg_yih_f1,
                 avg_ours_f1)
    LogInfo.logs(' Worse cases = %d (%.2f%%)', worse_size,
                 100. * worse_size / total_size)
    LogInfo.logs(' Equal cases = %d (%.2f%%)', equal_size,
                 100. * equal_size / total_size)
    LogInfo.logs('Better cases = %d (%.2f%%)', better_size,
                 100. * better_size / total_size)
    disc.show_distribution()
    LogInfo.logs()
    for q_idx, _ in delta_tup_list:
        qa = qa_list[q_idx]
        line_no, ours_f1 = pick_sc_dict[q_idx]
        q = qa['utterance']
        yih_answer_list = json.loads(yih_ret_dict[q])
        if line_no == -1:
            continue
        single_question(q_idx=q_idx,
                        qa=qa,
                        data_dir=data_dir,
                        line_no=line_no,
                        yih_answer_list=yih_answer_list,
                        ours_f1=ours_f1)
    LogInfo.stop_redirect()

コード例 #5

0

ファイルを表示

class LinkingWrapper:
    def __init__(self,
                 base='/home/xianyang/aqqu/aqqu',
                 parser_ip='202.120.38.146',
                 parser_port=9601,
                 linking_mode='Raw',
                 q_links_dict=None,
                 lukov_linker=None):
        self.base = base
        self.linking_mode = linking_mode
        self.q_links_dict = q_links_dict  # save S-MART results
        self.lukov_linker = lukov_linker
        assert linking_mode in ('Raw', 'S-MART', 'Lukov')
        if linking_mode == 'Lukov':
            assert self.lukov_linker is not None
        """
            Raw: the raw version, won't read anything from S-MART or our Lukov's implementation
            S-MART: read from S-MART result (only available in WebQ)
            Lukov: read from our lukov_ngram linker data
        """
        LogInfo.logs('Initiating parser ... ')
        self.parser = parser.CoreNLPParser(
            'http://%s:%d/parse' %
            (parser_ip, parser_port))  # just open the parser

        self.is_data_loaded = False
        self.surface_index = None
        self.entity_linker = None
        self.type_linker = None

        self.smart_score_disc = Discretizer(
            split_list=[2, 3, 8, 50, 2000, 12500, 25000, 40000],
            output_mode='list')
        # the split distribution is manually designed by observing S-MART data in both CompQ & WebQ datasets

        self.pop_filter_num = 5
        # Only used in LukovLinker, for each span,
        # we just select top number of entities sorted by popularity

    def load_data(self):
        if self.is_data_loaded:
            return
        LogInfo.begin_track('EL-Wrapper initializing ... ')
        LogInfo.logs('Initiating index ...')
        self.surface_index = surface_index_memory.EntitySurfaceIndexMemory(
            self.base + '/data/entity-list',
            self.base + '/data/entity-surface-map',
            self.base + '/data/entity-index')
        LogInfo.logs('Initiating entity_linker')
        self.entity_linker = entity_linker.EntityLinker(self.surface_index, 7)

        LogInfo.logs('Initiating type_linker [KQ]')
        self.type_linker = type_linker.TypeLinker()

        LogInfo.end_track('Initialized.')
        self.is_data_loaded = True

    # Key function: return tokens, entities, types and times
    def link(self, q_idx, sentence):
        parse_result = self.parser.parse(sentence)
        tokens = parse_result.tokens
        linking_mode = self.linking_mode

        el_result = []
        tl_result = []
        tml_result = []

        if linking_mode in ('Raw', 'S-MART'):
            self.load_data()
            raw_result = self.entity_linker.identify_entities_in_tokens(
                tokens)  # entity & time
            for item in raw_result:
                if isinstance(item.entity, entity_linker.KBEntity):
                    el_result.append(item)
                elif isinstance(item.entity, entity_linker.DateValue):
                    tml_result.append(item)
            if linking_mode == 'S-MART':
                # won't use the previous results, but just read S-MART data
                el_result = []
                smart_list = self.q_links_dict.get(q_idx, [])
                for smart_item in smart_list:  # enumerate each candidate in S-MART result
                    use_tokens = [
                    ]  # determine the token we use for the current EL result
                    start = smart_item.st_pos
                    end = smart_item.st_pos + smart_item.length
                    cur_pos = 0
                    for t in tokens:
                        if start <= cur_pos < end:
                            use_tokens.append(t)
                        cur_pos += len(t.token) + 1
                    obj_entity = KBEntity(name=smart_item.e_name,
                                          identifier=smart_item.mid,
                                          score=0,
                                          aliases=None)
                    perfect = (smart_item.e_name.lower().replace(
                        '_', ' ') == smart_item.surface_form.lower())
                    # Chicago_Ohio --> chicago ohio
                    el_item = IdentifiedEntity(tokens=use_tokens,
                                               name=smart_item.e_name,
                                               entity=obj_entity,
                                               score=0,
                                               surface_score=smart_item.score,
                                               perfect_match=perfect)
                    link_feat = self.smart_score_disc.convert(
                        score=smart_item.score)
                    setattr(el_item, 'link_feat', link_feat)
                    el_result.append(el_item)
            tl_result = self.type_linker.identiy_types_in_tokens(
                tokens)  # type link
        elif linking_mode == 'Lukov':
            # All the Entity/Type/Time linking will be performed by the Lukov Linker
            el_result, tl_result, tml_result = self.lukov_linker.link_single_question(
                tokens)
            # if self.q_links_dict is None:
            #     self.q_links_dict = load_lukov_link_result(link_fp=self.aux_fp)
            # el_result = []
            # tl_result = []
            # tml_result = []
            # lukov_link_list = self.q_links_dict.get(q_idx, [])
            # group_link_dict = {}        # <st_ed, links>
            # """ separate link results into several groups by [st, ed) """
            # for link_tup in lukov_link_list:
            #     st = link_tup.start
            #     ed = st + link_tup.length
            #     key = '%s_%s' % (st, ed)
            #     group_link_dict.setdefault(key, []).append(link_tup)
            # """ judge tagging, at least one NN(P) and JJ occurs in the span """
            # postag_available_groups = []    # store all st-ed pair satisfying pos-tag limitation
            # for st_ed in group_link_dict.keys():
            #     st, ed = [int(x) for x in st_ed.split('_')]
            #     flag = False
            #     for idx in range(st, ed):
            #         postag = tokens[idx].pos
            #         if postag.startswith('NN') or postag.startswith('JJ'):
            #             flag = True
            #             break
            #     if flag:
            #         postag_available_groups.append((st, ed))
            # """ longest match filtering """
            # longest_match_groups = []
            # sz = len(postag_available_groups)
            # for i in range(sz):
            #     st_i, ed_i = postag_available_groups[i]
            #     filter_flag = False
            #     for j in range(sz):
            #         if i == j:
            #             continue
            #         st_j, ed_j = postag_available_groups[j]
            #         if st_j <= st_i and ed_j >= ed_i:       # [st_i, ed_i) \in [st_j, ed_j)
            #             filter_flag = True      # found a longer span, filter the current one
            #             break
            #     if not filter_flag:
            #         longest_match_groups.append((st_i, ed_i))
            # """ Popularity filtering at each position """
            # for st, ed in longest_match_groups:
            #     key = '%s_%s' % (st, ed)
            #     links = group_link_dict[key]
            #     links.sort(key=lambda tup: tup.popularity, reverse=True)        # E/T/Tm
            #     for link_tup in links[: self.pop_filter_num]:
            #         LogInfo.logs('[%s] [%d, %d): %s (%s)',
            #                      link_tup.category, link_tup.start,
            #                      link_tup.start + link_tup.length,
            #                      link_tup.mid, link_tup.name.encode('utf-8'))
            #         if link_tup.category in ('Entity', 'Type'):
            #             obj_item = KBEntity(name=link_tup.name,
            #                                 identifier=link_tup.mid,
            #                                 score=link_tup.score,
            #                                 aliases=None)
            #             perfect = (link_tup.name.lower() == link_tup.surface.lower())
            #             el_item = IdentifiedEntity(tokens=tokens[st: ed],
            #                                        name=link_tup.name,
            #                                        entity=obj_item, score=0.,
            #                                        surface_score=link_tup.score,
            #                                        perfect_match=perfect)
            #             if link_tup.category == 'Entity':
            #                 el_result.append(el_item)
            #             else:
            #                 tl_result.append(el_item)
            #         else:       # Time obj
            #             tmv = DateValue(name=link_tup.name, date=link_tup.mid)
            #             # either name or date is the year surface
            #             tml_item = IdentifiedEntity(tokens=tokens[st: ed],
            #                                         name=link_tup.name,
            #                                         entity=tmv, score=0.,
            #                                         surface_score=link_tup.score,
            #                                         perfect_match=True)
            #             tml_result.append(tml_item)

        return tokens, el_result, tl_result, tml_result

    def parse(self, sentence):
        return self.parser.parse(sentence)

    # contains the time identification
    def entity_identify_with_parse(self, tokens):
        self.load_data()
        return self.entity_linker.identify_entities_in_tokens(tokens)

    def time_identify_with_parse(self, tokens):
        self.load_data()
        return self.entity_linker.identify_dates(tokens)

    # ==== Used in SimpleQuestions, given an entity, return its mention ==== #

    def link_with_ground_truth(self, sentence, focus_name, focus_mid):
        """
        ** ONLY USED IN SIMPLEQUESTIONS SCENARIO **
        Given the focus entity name, return the most likely mention span.
        The best span would be:
        1. exact match the entity name
        2. the longest substring of the entity name
        We allow the mention starting with a useless "the"
        :param sentence: the question surface
        :param focus_name: the focus name
        :param focus_mid: the corresponding mid
        :return: the identified entities (but there should be only one)
        """
        tokens = self.parser.parse(sentence).tokens
        q_word_list = [tok.token.lower() for tok in tokens]

        focus_word_list = ['']  # the default list, just an empty string
        if focus_name != '':
            focus_tokens = self.parser.parse(focus_name).tokens
            focus_word_list = [tok.token.lower() for tok in focus_tokens]

        n = len(q_word_list)
        m = len(focus_word_list)
        st = ed = -1
        best_match_words = 0.
        best_match_chars = 0.
        for i in range(n):
            if best_match_words == m:
                break  # already found exact match
            for j in range(i + 1, n + 1):
                if best_match_words == m:
                    break  # already found exact match
                span = q_word_list[i:j]
                if self.is_contained(span, focus_word_list):
                    match_words = len(span)
                    match_chars = len(''.join(span))
                    if match_words < best_match_words:
                        continue
                    if match_words == best_match_words and match_chars < best_match_chars:
                        continue
                    # now update the interval
                    st = i
                    ed = j - 1  # close interval
                    best_match_words = match_words
                    best_match_chars = match_chars
        if st > 0 and q_word_list[st - 1] == 'the':
            st -= 1
        obj_entity = KBEntity(name=focus_name,
                              identifier=focus_mid,
                              score=0,
                              aliases=None)
        el_item = IdentifiedEntity(tokens=tokens[st:ed + 1],
                                   name=focus_name,
                                   entity=obj_entity,
                                   score=0,
                                   surface_score=1. * best_match_words / m,
                                   perfect_match=best_match_words == m)
        LogInfo.logs('Q surface: %s', q_word_list)
        LogInfo.logs('Focus surface: %s', focus_word_list)
        LogInfo.logs('EL result: [%d, %d] "%s" --> %s', st, ed,
                     ' '.join(q_word_list[st:ed + 1]).encode('utf-8'),
                     focus_name.encode('utf-8'))
        if st == -1 or ed == -1:
            LogInfo.logs('Warning: no suitable span found.')
        el_result = [el_item]
        tl_result = []
        tml_result = []
        return tokens, el_result, tl_result, tml_result

    @staticmethod
    def is_contained(span, target_word_list):
        """
        Check whether the span is a sub word sequence in the target word list
        """
        len_diff = len(target_word_list) - len(span)
        if len_diff < 0:
            return False
        for st in range(len_diff + 1):
            flag = True
            for i in range(len(span)):
                if span[i] != target_word_list[st + i]:
                    flag = False
                    break
            if flag:
                return True
        return False

コード例 #6

0

ファイルを表示

ファイル: kq_schema.py プロジェクト: lkq1992yeah/CompQA

    get_dt_preds_given_type, get_ord_preds_given_type, \
    is_mediator_as_expect, inverse_predicate, get_end_dt_pred, load_sup_sub_types

from kangqi.util.discretizer import Discretizer
# from kangqi.util.LogUtil import LogInfo

tml_comp_dict = {
    '==': u'm.__in__',
    '<': u'm.__before__',
    '>': u'm.__after__',
    '>=': u'm.__since__'
}  # convert time comparison into a virtual mid

ordinal_dict = {'max': u'm.__max__', 'min': u'm.__min__'}

ans_size_disc = Discretizer([2, 3, 5, 10, 50], output_mode='list')  # 5+1
# ans < 2
# 2 <= ans < 3
# 3 <= ans < 5
# 5 <= ans < 10
# 10 <= ans < 50
# ans >= 50

RawPath = namedtuple('RawPath', ['path_cate', 'focus', 'pred_seq'])


class CompqSchema(object):
    def __init__(self):
        self.q_idx = None
        self.gather_linkings = None  # all related linkings of this question (either used or not used)

コード例 #7

0

ファイルを表示

ファイル: patch_180121.py プロジェクト: lkq1992yeah/CompQA

import os
import json
import math
import codecs
import cPickle
import shutil


from ...candgen.smart import load_webq_linking_data
from ...eff_candgen.combinator import LinkData

from kangqi.util.LogUtil import LogInfo
from kangqi.util.discretizer import Discretizer


log_score_disc = Discretizer(split_list=[0, 2, 4, 6, 8, 10, 12])    # 7+1
ratio_disc = Discretizer(split_list=[0.001, 0.01, 0.1, 0.2, 0.5])   # 5+1
feat_len = log_score_disc.len + ratio_disc.len


def build_feature_vector(score, max_score):
    log_score = math.log(score)
    ratio = 1. * score / max_score
    log_score_vec = log_score_disc.convert(score=log_score).tolist()
    ratio_vec = ratio_disc.convert(score=ratio).tolist()
    return log_score_vec + ratio_vec


def single_question(schema_fp, ans_fp, links_fp, smart_item_list):
    if os.path.isfile(schema_fp + '.ori') and os.path.isfile(links_fp + '.ori'):
        LogInfo.logs('Skip, already done.')

コード例 #8

0

ファイルを表示

ファイル: dataset_structural_helper.py プロジェクト: lkq1992yeah/CompQA

"""
Author: Kangqi Luo
Date: 180118
Goal: Generate Structural Data
"""

import numpy as np
from .dataset_schema_reader import schema_classification

from kangqi.util.discretizer import Discretizer
from kangqi.util.LogUtil import LogInfo

ans_size_disc = Discretizer([2, 3, 5, 10, 50])  # 5+1
# ans < 2
# 2 <= ans < 3
# 3 <= ans < 5
# 5 <= ans < 10
# 10 <= ans < 50
# ans >= 50


def build_structural_data(all_cands_tup_list):
    """
    Given all the candidate schemas, build structural-based data
    """
    cand_size = len(all_cands_tup_list)
    LogInfo.begin_track('Build Structural Data for %d candidates:', cand_size)

    data_list = []  # store all extra list of candidates
    for data_idx, q_idx, sc in all_cands_tup_list:
        if data_idx % 50000 == 0:

コード例 #9

0

ファイルを表示

def work(data_name, exp_dir_1, data_dir_1, exp_dir_2, data_dir_2,
         out_detail_fp, out_anno_fp):
    qa_list = load_compq()
    detail_fp_1 = exp_dir_1 + '/detail/full.t.best'
    detail_fp_2 = exp_dir_2 + '/detail/full.t.best'
    qidx_meta_dict_1 = read_ours(detail_fp_1)
    qidx_meta_dict_2 = read_ours(detail_fp_2)
    bw_detail = codecs.open(out_detail_fp, 'w', 'utf-8')
    bw_anno = codecs.open(out_anno_fp, 'w', 'utf-8')
    LogInfo.redirect(bw_detail)
    for bw in (bw_detail, bw_anno):
        bw.write('detail_fp_1: [%s] --> [%s]\n' % (data_dir_1, detail_fp_1))
        bw.write('detail_fp_2: [%s] --> [%s]\n\n' % (data_dir_2, detail_fp_2))

    missing_list = []
    first_only_list = []
    second_only_list = []
    compare_list = []
    if data_name == 'WebQ':
        range_list = range(3778, 5810)
    else:
        assert data_name == 'CompQ'
        range_list = range(1300, 2100)
    for q_idx in range_list:
        if q_idx not in qidx_meta_dict_1 and q_idx not in qidx_meta_dict_2:
            missing_list.append(q_idx)
        elif q_idx not in qidx_meta_dict_2:
            first_only_list.append(q_idx)
        elif q_idx not in qidx_meta_dict_1:
            second_only_list.append(q_idx)
        else:
            compare_list.append(q_idx)

    LogInfo.logs('Missing questions: %s', missing_list)
    LogInfo.logs('First only questions: %s', first_only_list)
    LogInfo.logs('Second only questions: %s\n', second_only_list)

    time_f1_list = [[], []]
    nontime_f1_list = [[], []]
    mark_counter = {}
    disc = Discretizer(split_list=[-0.5, -0.1, -0.000001, 0.000001, 0.1, 0.5])
    compare_list.sort(
        key=lambda x: qidx_meta_dict_1[x]['f1'] - qidx_meta_dict_2[x]['f1'])
    for q_idx in compare_list:
        info_dict_1 = qidx_meta_dict_1[q_idx]
        info_dict_2 = qidx_meta_dict_2[q_idx]
        f1_1 = info_dict_1['f1']
        f1_2 = info_dict_2['f1']
        delta = f1_1 - f1_2
        disc.convert(delta)
        qa = qa_list[q_idx]
        LogInfo.logs('============================\n')
        LogInfo.begin_track('Q-%04d: [%s]', q_idx, qa['utterance'])
        LogInfo.logs('f1_1 = %.6f, f1_2 = %.6f, delta = %.6f', f1_1, f1_2,
                     delta)
        upb_list = []
        for d_idx, (data_dir,
                    info_dict) in enumerate([(data_dir_1, info_dict_1),
                                             (data_dir_2, info_dict_2)]):
            LogInfo.begin_track('Schema-%d, line = %d', d_idx,
                                info_dict['line_no'])
            upb = retrieve_schema(data_dir, q_idx, info_dict['line_no'])
            upb_list.append(upb)
            LogInfo.end_track()
        LogInfo.end_track()
        LogInfo.logs('')

        bw_anno.write('Q-%04d: [%s]\n' % (q_idx, qa['utterance']))
        bw_anno.write('f1_1 = %.6f, f1_2 = %.6f, delta = %.6f\n' %
                      (f1_1, f1_2, delta))
        if abs(delta) >= 0.5:
            hml = 'H'
        elif abs(delta) >= 0.1:
            hml = 'M'
        elif abs(delta) >= 1e-6:
            hml = 'L'
        else:
            hml = '0'
        if delta >= 1e-6:
            sgn = '+'
        elif delta <= -1e-6:
            sgn = '-'
        else:
            sgn = ''
        bw_anno.write('# Change: [%s%s]\n' % (sgn, hml))
        has_time = 'N'
        for tok in qa['tokens']:
            if re.match('^[1-2][0-9][0-9][0-9]$', tok.token[:4]):
                has_time = 'Y'
                break
        if has_time == 'Y':
            time_f1_list[0].append(f1_1)
            time_f1_list[1].append(f1_2)
        else:
            nontime_f1_list[0].append(f1_1)
            nontime_f1_list[1].append(f1_2)
        bw_anno.write('# Time: [%s]\n' % has_time)
        upb1, upb2 = upb_list
        if upb1 - upb2 <= -1e-6:
            upb_mark = 'Less'
        elif upb1 - upb2 >= 1e-6:
            upb_mark = 'Greater'
        else:
            upb_mark = 'Equal'
        bw_anno.write('# Upb: [%s] (%.3f --> %.3f)\n' % (upb_mark, upb1, upb2))
        overall = '%s%s_%s_%s' % (sgn, hml, has_time, upb_mark)
        mark_counter[overall] = 1 + mark_counter.get(overall, 0)
        bw_anno.write('# Overall: [%s]\n' % overall)
        bw_anno.write('\n\n')

    disc.show_distribution()

    LogInfo.logs('')
    for has_time in ('Y', 'N'):
        LogInfo.logs('Related to DateTime: [%s]', has_time)
        LogInfo.logs('    \tLess\tEqual\tGreater')
        for hml in ('-H', '-M', '-L', '0', '+L', '+M', '+H'):
            line = '%4s' % hml
            for upb_mark in ('Less', 'Equal', 'Greater'):
                overall = '%s_%s_%s' % (hml, has_time, upb_mark)
                count = mark_counter.get(overall, 0)
                line += '\t%4d' % count
                # LogInfo.logs('[%s]: %d (%.2f%%)', overall, count, 100. * count / 800)
            LogInfo.logs(line)
        LogInfo.logs('')
    LogInfo.logs('DateTime-related F1: %.6f v.s. %.6f, size = %d',
                 np.mean(time_f1_list[0]), np.mean(time_f1_list[1]),
                 len(time_f1_list[0]))
    LogInfo.logs('DateTime-not-related F1: %.6f v.s. %.6f, size = %d',
                 np.mean(nontime_f1_list[0]), np.mean(nontime_f1_list[1]),
                 len(nontime_f1_list[0]))

    LogInfo.stop_redirect()

    bw_detail.close()
    bw_anno.close()