Beispiel #1
0
import pandas as pd

if __name__ == "__main__":
    file_x = "X.csv"
    file_y = "Y_.csv"
    content = pd.read_csv(file_x, encoding="utf-8")
    Label = pd.read_csv(file_y, encoding="utf-8")
    # content = content.as_matrix()
    content = content.ix[:, 1]
    content = np.array(content).ravel()
    print(np.array(content).transpose().shape)
    Label = Label.as_matrix()
    Label = np.matrix(Label)
    np.random.seed(7)
    # print(Label)
    content = [txt.text_cleaner(x, deep_clean=True) for x in content]
    X_train, X_test, y_train, y_test = train_test_split(content,
                                                        Label,
                                                        test_size=0.1,
                                                        random_state=42)
    batch_size = 256
    sparse_categorical = 0
    n_epochs = [100, 100, 100]  ## DNN--RNN-CNN
    Random_Deep = [2, 2, 2]  ## DNN--RNN-CNN

    RMDL.Text_Classification(X_train,
                             y_train,
                             X_test,
                             y_test,
                             batch_size=batch_size,
                             sparse_categorical=True,
Beispiel #2
0
''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''

import os
import numpy as np
from RMDL import text_feature_extraction as txt
from RMDL.Download import Download_WOS as WOS
from RMDL import RMDL_Text as RMDL
from sklearn.cross_validation import train_test_split

if __name__ == "__main__":
    path_WOS = WOS.download_and_extract()
    fname = os.path.join(path_WOS, "WebOfScience/WOS5736/X.txt")
    fnamek = os.path.join(path_WOS, "WebOfScience/WOS5736/Y.txt")
    with open(fname, encoding="utf-8") as f:
        content = f.readlines()
        content = [txt.text_cleaner(x) for x in content]
    with open(fnamek) as fk:
        contentk = fk.readlines()
    contentk = [x.strip() for x in contentk]
    Label = np.matrix(contentk, dtype=int)
    Label = np.transpose(Label)
    np.random.seed(7)
    print(Label.shape)
    X_train, X_test, y_train, y_test = train_test_split(content,
                                                        Label,
                                                        test_size=0.2,
                                                        random_state=42)

    batch_size = 100
    sparse_categorical = 0
    n_epochs = [100, 100, 100]  ## DNN--RNN-CNN
Beispiel #3
0
from RMDL import text_feature_extraction as txt
from keras.datasets import imdb
import numpy as np
from RMDL import RMDL_Text as RMDL

if __name__ == "__main__":
    print("Load IMDB dataset....")
    MAX_NB_WORDS = 75000
    (X_train, y_train), (X_test,
                         y_test) = imdb.load_data(num_words=MAX_NB_WORDS)
    print(len(X_train))
    print(y_test)
    word_index = imdb.get_word_index()
    index_word = {v: k for k, v in word_index.items()}
    X_train = [
        txt.text_cleaner(' '.join(index_word.get(w) for w in x))
        for x in X_train
    ]
    X_test = [
        txt.text_cleaner(' '.join(index_word.get(w) for w in x))
        for x in X_test
    ]
    X_train = np.array(X_train)
    X_train = np.array(X_train).ravel()
    print(X_train.shape)
    X_test = np.array(X_test)
    X_test = np.array(X_test).ravel()

    batch_size = 100
    sparse_categorical = 0
    n_epochs = [500, 500, 500]  ## DNN--RNN-CNN