예제 #1
0
def main():
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data()
    X_train, Y_train = to_train_seq(x_train, y_train)
    X_test, Y_test = to_test_seq(x_test, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    params = {'vocab_size': vocab_size, 'n_class': n_class}
    tf_estimator = tf.estimator.Estimator(model_fn, params=params)

    tf_estimator.train(tf.estimator.inputs.numpy_input_fn(
            x = {'inputs':X_train}, y=Y_train,
            batch_size = BATCH_SIZE, num_epochs = NUM_EPOCH, shuffle = True))
    preds = tf_estimator.predict(tf.estimator.inputs.numpy_input_fn(
            x = {'inputs':X_test}, batch_size = BATCH_SIZE, shuffle = False))
    preds = np.array(list(preds))

    final_acc = (preds == Y_test).mean()
    print("final testing accuracy: %.4f" % final_acc)

    sample = ['I', 'love', 'you']
    idx = np.atleast_2d([word2idx[w] for w in sample] + [0] * (SEQ_LEN - len(sample)))
    preds = tf_estimator.predict(tf.estimator.inputs.numpy_input_fn(
            x={'inputs':idx}, batch_size=1, shuffle=False))
    preds = np.array(list(preds))
    idx2tag = {idx : tag for tag, idx in tag2idx.items()}
    print(' '.join(sample))
    print(' '.join([idx2tag[idx] for idx in preds[0][:len(sample)]]))
예제 #2
0
SEQ_LEN = 20
BATCH_SIZE = 32
sample = ['I', 'love', 'you']


def to_seq(*args):
    data = []
    for x in args:
        x = x[:(len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data(
    )
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class)
    clf.fit(X_train,
            Y_train,
            val_data=(X_test, Y_test),
            keep_prob=0.8,
            n_epoch=5,
            batch_size=BATCH_SIZE)
    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    final_acc = (y_pred == Y_test.ravel()).mean()
    print("final testing accuracy: %.4f" % final_acc)

    idx2tag = {idx: tag for tag, idx in tag2idx.items()}
예제 #3
0
import pos
import numpy as np
from sklearn.tree import DecisionTreeClassifier

if __name__ == '__main__':
    x_train, y_train, x_test, y_test, _, _, _, _ = pos.load_data()
    X_train = np.expand_dims(x_train, 1)
    X_test = np.expand_dims(x_test, 1)

    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    print(model.score(X_train, y_train))
    print(model.score(X_test, y_test))
예제 #4
0
from birnn_crf_clf import BiRNN_CRF


SEQ_LEN = 20
BATCH_SIZE = 32
sample = ['I', 'love', 'you']


def to_seq(*args):
    data = []
    for x in args:
        x = x[: (len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data()
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class)
    clf.fit(X_train, Y_train, val_data=(X_test, Y_test), keep_prob=0.8, n_epoch=5, batch_size=BATCH_SIZE)
    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    final_acc = (y_pred == Y_test.ravel()).mean()
    print("final testing accuracy: %.4f" % final_acc)

    idx2tag = {idx : tag for tag, idx in tag2idx.items()}
    labels = clf.infer([word2idx[w] for w in sample])
    print(' '.join(sample))
    print(' '.join([idx2tag[idx] for idx in labels]))
예제 #5
0
import pos
import numpy as np
from sklearn.tree import DecisionTreeClassifier


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, _, _, _, _ = pos.load_data()
    X_train = np.expand_dims(x_train, 1)
    X_test = np.expand_dims(x_test, 1)

    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    print(model.score(X_train, y_train))
    print(model.score(X_test, y_test))