Example #1
0
def cal_confidence_score(sample, model_name):
    '''
    给未标注数据打标签,并且计算得分,返回结果
    :param model: 模型
    :param sample: 对象
    :return: 预测的功能点名称  置信度
    '''
    model = Tagger()
    model.open(model_name)
    # unlabeled sample features
    feature_sequence = build_model_features(sample, 17, False)
    # words
    # words = sample.sen_words
    chars = list(sample.sentence)
    model.set(feature_sequence)
    predicted_labels = model.tag()

    # get predicted_fps
    fp_list = []
    fp = ''
    for index, label in enumerate(predicted_labels):
        if label == 'B' or label == 'I' or label == 'E':
            fp += chars[index]
        if label == 'N' and len(fp) > 0:
            fp_list.append(fp)
            fp = ''

    # calculate the probability of tagging
    crf_confidence = model.probability(predicted_labels)

    lan_confidence = 0
    filtered_fp_list = []
    for fp_name in fp_list:
        filtered_fp_list.append(fp_name)

    if len(filtered_fp_list) == 0:
        predicted_fps = 'null'
    else:
        predicted_fps = ' '.join(filtered_fp_list)

    # print(str(sample.story_id) +' '+ sample.sentence +' '+ fp +' '+ str(confidence))
    # 为防止多进程乱序执行导致结果跟sample不对应,因此同时返回sample信息
    return sample.story_id, sample.sentence, predicted_fps, crf_confidence
Example #2
0
    pickle.dump({
        'dataset': dataset,
        'thetas': thetas
    }, open(FLEXCRF_TEST_DATA_FILE, 'wb'))
else:
    dd = pickle.load(open(FLEXCRF_TEST_DATA_FILE))
    dataset = dd['dataset']
    thetas = dd['thetas']

# -- Start classification ------------------------------------------------

for seq in range(len(dataset)):
    # -- with crfsuite
    s_ = tagger.tag(data['X'][seq])
    y_ = np.array([int(model.labels[s]) for s in s_])
    prob_ = tagger.probability(s_)

    print "\n-- With crfsuite:"
    print "labels:\n", s_, "\n", y_
    print "probability:\t %f" % prob_

    # -- with flexcrf
    f_xy, y = dataset[seq]
    theta = thetas[seq]

    m_xy, f_m_xy = _compute_all_potentials(f_xy, theta)

    y_pred = viterbi_decoder(m_xy)

    # ADD CODE TO COMPUTE POSTERIOR PROBABILITY WITH FLEXCRF HERE ....