sample = '我来到大学读书,希望学到知识' py = int(sys.version[0]) def to_seq(*args): data = [] for x in args: x = x[:(len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data( ) X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test) print('Vocab size: %d' % vocab_size) clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS) clf.fit(X_train, Y_train, val_data=(X_test, Y_test), n_epoch=N_EPOCH) chars = list(sample) if py == 3 else list(sample.decode('utf-8')) labels = clf.infer([char2idx[c] for c in chars]) res = '' for i, l in enumerate(labels): c = sample[i] if py == 3 else sample.decode('utf-8')[i] if l == 2 or l == 3: c += ' ' res += c print(res)
def to_seq(*args): data = [] for x in args: x = x[:(len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data( ) X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test) print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class) clf.fit(X_train, Y_train, val_data=(X_test, Y_test), keep_prob=0.8, n_epoch=5, batch_size=BATCH_SIZE) y_pred = clf.predict(X_test, batch_size=BATCH_SIZE) final_acc = (y_pred == Y_test.ravel()).mean() print("final testing accuracy: %.4f" % final_acc) idx2tag = {idx: tag for tag, idx in tag2idx.items()} labels = clf.infer([word2idx[w] for w in sample]) print(' '.join(sample)) print(' '.join([idx2tag[idx] for idx in labels]))
return data def iter_seq(x): return np.array([x[i:i + SEQ_LEN] for i in range(0, len(x) - SEQ_LEN)]) if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data( ) X_train, Y_train = to_train_seq(x_train, y_train) X_test, Y_test = to_test_seq(x_test, y_test) print('Vocab size: %d' % vocab_size) clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS) clf.fit(X_train, Y_train, val_data=(X_test, Y_test), n_epoch=N_EPOCH, batch_size=BATCH_SIZE) chars = list(sample) if py == 3 else list(sample.decode('utf-8')) labels = clf.infer([char2idx[c] for c in chars]) res = '' for i, l in enumerate(labels): c = sample[i] if py == 3 else sample.decode('utf-8')[i] if l == 2 or l == 3: c += ' ' res += c print(res)
from birnn_crf_clf import BiRNN_CRF SEQ_LEN = 20 BATCH_SIZE = 32 sample = ['I', 'love', 'you'] def to_seq(*args): data = [] for x in args: x = x[: (len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data() X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test) print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class) clf.fit(X_train, Y_train, val_data=(X_test, Y_test), keep_prob=0.8, n_epoch=5, batch_size=BATCH_SIZE) y_pred = clf.predict(X_test, batch_size=BATCH_SIZE) final_acc = (y_pred == Y_test.ravel()).mean() print("final testing accuracy: %.4f" % final_acc) idx2tag = {idx : tag for tag, idx in tag2idx.items()} labels = clf.infer([word2idx[w] for w in sample]) print(' '.join(sample)) print(' '.join([idx2tag[idx] for idx in labels]))
for x in args: x = x[:(len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data def iter_seq(x, text_iter_step=1): return np.array( [x[i:i + SEQ_LEN] for i in range(0, len(x) - SEQ_LEN, text_iter_step)]) if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data( ) X_train, Y_train = to_train_seq(x_train, y_train) X_test, Y_test = to_test_seq(x_test, y_test) print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) clf = BiRNN_CRF(vocab_size, n_class) clf.fit(X_train, Y_train, keep_prob=0.8, n_epoch=1, batch_size=BATCH_SIZE) y_pred = clf.predict(X_test, batch_size=BATCH_SIZE) final_acc = (y_pred == Y_test).astype(np.float32).mean() print("final testing accuracy: %.4f" % final_acc) idx2tag = {idx: tag for tag, idx in tag2idx.items()} labels = clf.infer([word2idx[w] for w in sample]) print(' '.join(sample)) print(' '.join([idx2tag[idx] for idx in labels]))
N_EPOCH = 1 sample = '我来到大学读书,希望学到知识' py = int(sys.version[0]) def to_seq(*args): data = [] for x in args: x = x[: (len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data() X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test) print('Vocab size: %d' % vocab_size) clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS) clf.fit(X_train, Y_train, val_data=(X_test, Y_test), n_epoch=N_EPOCH) chars = list(sample) if py == 3 else list(sample.decode('utf-8')) labels = clf.infer([char2idx[c] for c in chars]) res = '' for i, l in enumerate(labels): c = sample[i] if py == 3 else sample.decode('utf-8')[i] if l == 2 or l == 3: c += ' ' res += c print(res)
x = x[:(len(x) - len(x) % SEQ_LEN)] data.append(np.reshape(x, [-1, SEQ_LEN])) return data def iter_seq(x, text_iter_step=5): return np.array( [x[i:i + SEQ_LEN] for i in range(0, len(x) - SEQ_LEN, text_iter_step)]) if __name__ == '__main__': x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data( ) X_train, Y_train = to_train_seq(x_train, y_train) X_test, Y_test = to_test_seq(x_test, y_test) print('Vocab size: %d' % vocab_size) clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS) clf.fit(X_train, Y_train, n_epoch=N_EPOCH, batch_size=BATCH_SIZE) chars = list(sample) if py == 3 else list(sample.decode('utf-8')) labels = clf.infer([char2idx[c] for c in chars]) res = '' for i, l in enumerate(labels): c = sample[i] if py == 3 else sample.decode('utf-8')[i] if l == 2 or l == 3: c += ' ' res += c print(res)