コード例 #1
0
ファイル: test.py プロジェクト: loveyoghurt123/Linear-CRF
def test_file(model_path, test_file_path):
    """Test model

    test file format
    今   B
    晚   E
    月   B
    色   E
    真   S
    美   S
    。   S
    <MUST SEPERATE BY SPACE LINE> 
    我   S

    output file format
    今   B   preTag
    晚   E   preTag
    """
    if not os.path.isfile(model_path) or not os.path.isfile(test_file_path):
        print("File don't exist!")
    model = LinearCRF()
    model.load(model_path)

    f = codecs.open(test_file_path, 'r', encoding='utf-8')
    lines = f.readlines()
    f.close()

    sentences = []
    labels = []
    sentence = []
    label = []
    for line in lines:
        if len(line) < 2:
            # sentence end
            sentences.append(sentence)
            labels.append(label)
            sentence = []
            label = []
        else:
            char, tag = line.split()
            sentence.append(char)
            label.append(tag)

    pre_tags = [model.inference_viterbi(sen) for sen in sentences]

    with open('test_result.txt', 'w+') as f:
        for sen, sen_tag, sen_pre in zip(sentences, labels, pre_tags):
            for i in range(len(sen)):
                f.write('{}\t{}\t{}\n'.format(sen[i], sen_tag[i], sen_pre[i]))
            f.write('\n')

    print('Test finished!')
コード例 #2
0
ファイル: demo.py プロジェクト: loveyoghurt123/Linear-CRF
class Segmentation(object):
    def __init__(self, model_path='model/linear_crf.model'):
        self.model = LinearCRF()
        self.model.load(model_path)

    def seg(self, sentence):
        sentence.strip()
        tags = self.model.inference_viterbi(sentence)

        str_seg = ""
        for word, tag in zip(sentence, tags):
            str_seg += word
            if tag == 'S' or tag == 'E':
                str_seg += ' '
        result = str_seg.split()
        return result