Beispiel #1
0
def test():
    data = LoadTestData()
    untrained_models = []
    config = {'ngram': 3, 'est': 'add-delta', 'delta': 0.3}
    untrained_models.append((HMM(config), 'HMM. config: {}'.format(config)))
    config = {
        'ftrs': ('IS_FIRST', 'IS_LAST', 'VAL', 'PRV_VAL', 'NXT_VAL',
                 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')
    }
    untrained_models.append((MEMM(config), 'MEMM. config: {}'.format(config)))
    config = {
        'ftrs': ('IS_FIRST', 'IS_LAST', 'IDX', 'VAL', 'PRV_VAL', 'NXT_VAL',
                 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL')
    }
    untrained_models.append(
        (CRF_WORD(config), 'CRF. config: {}'.format(config)))
    trained_models = [(model.prep_data().shuffle(0xfab1e).split(0).train(),
                       name) for model, name in untrained_models]
    config = {
        'n_layers': 3,
        'hidden_dim': 32,
        'embedding': 'mds',
        'win_len': 4,
        "device": "cpu"
    }
    rnn = RNN(config)
    trained_models.append((rnn.prep_model().load('rnn_model.bin'),
                           'RNN. config: {}'.format(config)))
    for model, name in trained_models:
        trained_model = model
        conf_mat, dist = TestModel(trained_model, data)
        print('\n')
        print(name)
        print('=' * 80)
        print('Vowel metrics:')
        print('-' * 50)
        PrintConfMat(conf_mat)
        print('-' * 50)
        print('Edit distance:')
        print('-' * 50)
        for stage in range(1, 4):
            print('Stage = {}:'.format(stage_names[stage]))
            print('   Average = {}\n   Median = {}\n   Min = {}\n   Max = {}'.
                  format(dist[stage][0], dist[stage][1], dist[stage][2],
                         dist[stage][3]))
Beispiel #2
0
def f9(x, y):
    return round(random.random())


def f10(x, y):
    return round(random.random())


if __name__ == '__main__':
    data, tag = create_dataset()
    tag1 = list(set(tag))
    punctuations = ['.', ',', ':', ';', '\"', '\'', '``', '\'\'']
    all_tag = []
    for t in tag1:
        if t not in punctuations:
            all_tag.append(t)

    param = [0 for i in range(10)]
    print 'Profiling started'
    # prof = cProfile.Profile()
    # prof.enable()
    memm = MEMM(data, tag, all_tag, param,
                [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10], 0)
    memm.train()
    # prof.disable()
    # s = StringIO.StringIO()
    # sortby = 'cumulative'
    # ps = pstats.Stats(prof, stream=s).sort_stats(sortby)
    # ps.print_stats()
    # print s.getvalue()
Beispiel #3
0
print "testing set feats:", test_feat.shape

print "moving to a sparse representation..."
train_feat = sparsify(train_feat)
test_feat = sparsify(test_feat)

train_labl = [s.labels for s in train_sequences]
train_labl = [item for sublist in train_labl for item in sublist]

obsr_labl = [s.labels for s in test_sequences]
test_seq = [" ".join(x) for x in obsr_labl]
test_tok = [item for sublist in obsr_labl for item in sublist]

obsr_list = get_observations(test_feat, test_sequences)

memm = MEMM(10, 0.0001)
memm.fit(train_feat, train_labl, fe.num_feats)
pred_tok, pred_seq = memm.predict_sequences(obsr_list)

# Structured Perceptron using viterbi in the inference step
'''percep = StructuredPerceptron(10, fe, 0.1)
percep.fit(train_sequences)
pred_tok, pred_seq = percep.predict_sequences(test_sequences)

obsr_labl = [s.labels for s in test_sequences]
test_seq = [" ".join(x) for x in obsr_labl]
test_tok = [item for sublist in obsr_labl for item in sublist]'''

# print results
print metrics.classification_report(test_tok, pred_tok)
print metrics.accuracy_score(test_tok, pred_tok)