コード例 #1
0
def test():
    vocab_path = '../../data/people_char_vocab.pkl'
    vocabs = load_vocab(vocab_path)
    train_data_path = '../../data/people.txt'

    gen = train_generator(train_data_path, vocabs=vocabs)
    states = ['B', 'M', 'E', 'S']
    hmm = HMM(vocabs=vocabs, states=states)
    #hmm.train(train_generator=gen)
    model_dir = '../../models/hmm'
    #hmm.save_model(model_dir=model_dir)
    hmm.load_model(model_dir=model_dir)

    sentence = "我是中国人,我爱我的祖国"
    decode_states={0: 'B', 1: 'M', 2: 'E', 3: 'S'}
    hiddens = hmm.decode(outputs=sentence, decode_states=decode_states)

    words = hmm.format_hiddens(hiddens, sentence)

    print(hiddens)
    print('/ '.join(words))

    sentence = '4月29日,雄浑悠长的钟声响起,关闭了近百日的武汉黄鹤楼重新开门迎客。这钟声,传递出中华民族从磨难中奋起的昂扬斗志,彰显出伟大民族精神在新时代焕发出的熠熠光辉。'
    hiddens = hmm.decode(outputs=sentence, decode_states=decode_states)
    words= hmm.format_hiddens(hiddens, sentence)
    print('/ '.join(words))
コード例 #2
0
def generate_text():
    vocab_path = '../../data/people_char_vocab.pkl'
    model_dir = '../../models/hmm'
    states = ['B', 'M', 'E', 'S']

    vocabs = load_vocab(vocab_path)
    query_vocabs = {idx: char for char, idx in vocabs.items()}
    hmm = HMM(vocabs=vocabs, states=states)
    hmm.load_model(model_dir=model_dir)

    pi = hmm.pi_cnt
    tran_p = hmm.trans_cnt  # [S, S]
    emit_p = hmm.emit_cnt  # [S, V]

    # [S, S]
    trans_cdfs = [compute_cdf(tran_p[s, :]) for s in range(tran_p.shape[0])]

    # [S, V]
    emit_cdfs = [compute_cdf(emit_p[s, :]) for s in range(emit_p.shape[0])]

    state_idx = sample_start(pi)
    out_idx = sample_output(state_idx, emit_cdfs)
    out_char = query_vocabs[out_idx]

    num_text = 1000
    print(out_char, end='')

    for i in range(num_text - 1):
        state_idx = sample_output(state=state_idx, cdfs=trans_cdfs)
        out_idx = sample_output(state=state_idx, cdfs=emit_cdfs)
        out_char = query_vocabs[out_idx]
        print(out_char, end='')
        if (i + 1) % 50 == 0:
            print()
コード例 #3
0
def train():
    vocab_path = '../../data/people_char_vocab.pkl'
    vocabs = load_vocab(vocab_path)
    train_data_path = '../../data/people.txt'

    gen = train_generator(train_data_path, vocabs=vocabs)
    states = ['B', 'M', 'E', 'S']
    hmm = HMM(vocabs=vocabs, states=states)
    hmm.train(train_generator=gen)
    model_dir = '../../models/hmm'
    hmm.save_model(model_dir=model_dir)
コード例 #4
0
def test_hmm():
    vocab_path = '../../data/people_char_vocab.pkl'
    model_dir = '../../models/hmm'
    states = ['B', 'M', 'E', 'S']
    decode_states = {0: 'B', 1: 'M', 2: 'E', 3: 'S'}

    vocabs = load_vocab(vocab_path)
    hmm = HMM(vocabs=vocabs, states=states)
    hmm.load_model(model_dir=model_dir)
    sentence = "我是中国人,我爱我的祖国"

    hiddens = hmm.decode(outputs=sentence, decode_states=decode_states)
    words = hmm.format_hiddens(hiddens, sentence)

    print(hiddens)
    print('/ '.join(words))

    sentence = '4月29日,雄浑悠长的钟声响起,关闭了近百日的武汉黄鹤楼重新开门迎客。' \
               '这钟声,传递出中华民族从磨难中奋起的昂扬斗志,彰显出伟大民族精神在新时代焕发出的熠熠光辉。'
    sentence = 'I love you china'
    hiddens = hmm.decode(outputs=sentence, decode_states=decode_states)
    words = hmm.format_hiddens(hiddens, sentence)
    print('/ '.join(words))
コード例 #5
0
    return 0


if __name__ == '__main__':
    base_dir = "/Users/flyingman/Developer/github/chinese_segmentation"

    instance_path = os.path.join(base_dir, "data/people_instance.txt")
    feature_path = os.path.join(base_dir, "data/people_feature.txt")
    model_dir = os.path.join(base_dir, "models/max_entropy")

    states = ['B', 'M', 'E', 'S']

    vocab_path = '../../data/people_char_vocab.pkl'
    train_data_path = '../../data/people.txt'

    features = [func_cur, func_is_symbol]
    model_dir = '../../models/max_entropy'
    epochs = 1000

    vocab = load_vocab(vocab_path=vocab_path)
    #print(vocab)

    max_ent = MaximumEntropy(line_limit=100, states=states)
    max_ent.train(feature_path=feature_path, epochs=epochs)
    """
    sentences = "我是中国人"
    seg_words = max_ent.predict(sentences=sentences)
    print(seg_words)
    """