예제 #1
0
def demo_lstm():
    demo_data = load_demo_data()
    demo_train = demo_data.iloc[:int(0.8 * demo_data.shape[0])]
    demo_valid = demo_data.iloc[int(0.8 * demo_data.shape[0]):]
    text_fullset = demo_data.iloc[:, 1]
    x_cut = word_cut(text_fullset)
    print(x_cut.map(len))
    print(max(x_cut.map(len)))
    index_dict, word_vectors, x_combined = word2vec_train(x_cut)
    print('Text Words Length:{}'.format(len(index_dict)))
    n_symbols, embedding_weights = get_model_data(index_dict, word_vectors)
    print(demo_train.shape, demo_train.shape)
    text_lstm = ChatTextLSTM(input_dim=n_symbols,
                             embedding_dim=setting.VOCABULARY_VECTOR_DIM,
                             embedding_weights=embedding_weights)
    x_train = x_combined[:int(demo_train.shape[0])]
    x_valid = x_combined[int(demo_train.shape[0]):]
    f1_score_dict = dict()
    for col in demo_data.columns[2:]:
        y_train = demo_train[col] + 2
        y_valid = demo_valid[col] + 2
        text_lstm.train(x_train, y_train, x_valid, y_valid)
        y_valid_pred = pd.Series([0] * x_valid.shape[0])
        for ind in range(x_valid.shape[0]):
            y_pred = np.argmax(
                text_lstm.model.predict(x_valid[ind].reshape(1, -1)))
            y_valid_pred[ind] = y_pred
        print(y_valid_pred)
        f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro')
        print('{} F1 Score:{}'.format(col, f1_score_dict[col]))
        text_lstm.model.save('lstm_model_demo_{}.h5'.format(col))
    f1_score_mn = np.mean(list(f1_score_dict.values()))
    print(f1_score_mn)
    return True
예제 #2
0
def cnn_train(X):
    x_cut = word_cut(X)
    index_dict, word_vectors, x_combined = word2vec_train(x_cut)
    n_symbols, embedding_weights, x_train, y_train, x_test, y_test = get_model_data(
        index_dict, word_vectors, x_combined, X.iloc[:, 1])
    text_cnn = ChatTextCNN(input_dim=n_symbols,
                           embedding_dim=setting.VOCABULARY_VECTOR_DIM,
                           embedding_weights=embedding_weights)
    text_cnn.train(x_train, y_train, x_test, y_test)
    return text_cnn.model
예제 #3
0
def sentiment_train_manager():
    print('Train Text Preprocess')
    x_train_cut, x_valid_cut, train_set, valid_set = train_preprocess()
    print('Train Text Word Embedding')
    index_dict, word_vectors, x_combined = word2vec_train(x_train_cut)
    n_symbols, embedding_weights = get_model_data(index_dict, word_vectors)
    x_valid = input_transform(x_valid_cut)
    x_train = x_combined
    print('Text Words Length:{}'.format(len(index_dict)))
    print('Model Select:{}'.format(ALGORITHM))
    text_train_model = model_select(n_symbols, embedding_weights)
    f1_score_dict = dict()
    print('Start Model Training....')
    col_len = len(train_set.columns[2:])
    for i, col in enumerate(train_set.columns[2:]):
        print('{} column is training, finish {}%!'.format(
            col,
            float(i) / float(col_len) * 100))
        y_train = train_set[col] + 2
        y_valid = valid_set[col] + 2
        text_train_model.train(x_train, y_train, x_valid, y_valid)
        y_valid_pred = pd.Series([0] * x_valid.shape[0])
        for ind in range(x_valid.shape[0]):
            y_pred = np.argmax(
                text_train_model.model.predict(x_valid[ind].reshape(1, -1)))
            y_valid_pred[ind] = y_pred
        f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro')
        print('{} F1 Score:{}'.format(col, f1_score_dict[col]))
        model_save_path = os.path.join(
            MODELS_SAVE_DIR,
            '{}_model_{}_{}.h5'.format(ALGORITHM.lower(), col, VERSION))
        text_train_model.model.save(model_save_path)
    f1_score_mn = np.mean(list(f1_score_dict.values()))
    with open(F1_SCORE_PATH, 'w', encoding='utf-8') as fp:
        for col, f1_score_ in f1_score_dict.items():
            fp.writelines('{}:{}\n'.format(col, f1_score_))
    print('Train Finished, F1 Score:{}'.format(f1_score_mn))
    return True