Example #1
0
def demo_lstm():
    demo_data = load_demo_data()
    demo_train = demo_data.iloc[:int(0.8 * demo_data.shape[0])]
    demo_valid = demo_data.iloc[int(0.8 * demo_data.shape[0]):]
    text_fullset = demo_data.iloc[:, 1]
    x_cut = word_cut(text_fullset)
    print(x_cut.map(len))
    print(max(x_cut.map(len)))
    index_dict, word_vectors, x_combined = word2vec_train(x_cut)
    print('Text Words Length:{}'.format(len(index_dict)))
    n_symbols, embedding_weights = get_model_data(index_dict, word_vectors)
    print(demo_train.shape, demo_train.shape)
    text_lstm = ChatTextLSTM(input_dim=n_symbols,
                             embedding_dim=setting.VOCABULARY_VECTOR_DIM,
                             embedding_weights=embedding_weights)
    x_train = x_combined[:int(demo_train.shape[0])]
    x_valid = x_combined[int(demo_train.shape[0]):]
    f1_score_dict = dict()
    for col in demo_data.columns[2:]:
        y_train = demo_train[col] + 2
        y_valid = demo_valid[col] + 2
        text_lstm.train(x_train, y_train, x_valid, y_valid)
        y_valid_pred = pd.Series([0] * x_valid.shape[0])
        for ind in range(x_valid.shape[0]):
            y_pred = np.argmax(
                text_lstm.model.predict(x_valid[ind].reshape(1, -1)))
            y_valid_pred[ind] = y_pred
        print(y_valid_pred)
        f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro')
        print('{} F1 Score:{}'.format(col, f1_score_dict[col]))
        text_lstm.model.save('lstm_model_demo_{}.h5'.format(col))
    f1_score_mn = np.mean(list(f1_score_dict.values()))
    print(f1_score_mn)
    return True
Example #2
0
def cnn_train(X):
    x_cut = word_cut(X)
    index_dict, word_vectors, x_combined = word2vec_train(x_cut)
    n_symbols, embedding_weights, x_train, y_train, x_test, y_test = get_model_data(
        index_dict, word_vectors, x_combined, X.iloc[:, 1])
    text_cnn = ChatTextCNN(input_dim=n_symbols,
                           embedding_dim=setting.VOCABULARY_VECTOR_DIM,
                           embedding_weights=embedding_weights)
    text_cnn.train(x_train, y_train, x_test, y_test)
    return text_cnn.model