fnamek = os.path.join(path_WOS, "WebOfScience/WOS5736/Y.txt") with open(fname, encoding="utf-8") as f: content = f.readlines() content = [txt.text_cleaner(x) for x in content] with open(fnamek) as fk: contentk = fk.readlines() contentk = [x.strip() for x in contentk] Label = np.matrix(contentk, dtype=int) Label = np.transpose(Label) np.random.seed(7) print(Label.shape) X_train, X_test, y_train, y_test = train_test_split(content, Label, test_size=0.2, random_state=42) batch_size = 100 sparse_categorical = 0 n_epochs = [100, 100, 100] ## DNN--RNN-CNN Random_Deep = [3, 3, 3] ## DNN--RNN-CNN RMDL.Text_Classification(X_train, y_train, X_test, y_test, batch_size=batch_size, sparse_categorical=True, random_deep=Random_Deep, epochs=n_epochs, no_of_classes=12)
print(y_test) word_index = imdb.get_word_index() index_word = {v: k for k, v in word_index.items()} X_train = [ txt.text_cleaner(' '.join(index_word.get(w) for w in x)) for x in X_train ] X_test = [ txt.text_cleaner(' '.join(index_word.get(w) for w in x)) for x in X_test ] X_train = np.array(X_train) X_train = np.array(X_train).ravel() print(X_train.shape) X_test = np.array(X_test) X_test = np.array(X_test).ravel() batch_size = 100 sparse_categorical = 0 n_epochs = [500, 500, 500] ## DNN--RNN-CNN Random_Deep = [3, 3, 3] ## DNN--RNN-CNN RMDL.Text_Classification(X_train, y_train, X_test, y_test, batch_size=batch_size, sparse_categorical=sparse_categorical, random_deep=Random_Deep, epochs=n_epochs)
test_labels = labels[split_data:] #batch_size should not be very small neither too big batch_size = 2 sparse_categorical = 0 #epoch for DNN , RNN and CNN n_epochs = [5, 5, 5] ## DNN--RNN-CNN Random_Deep = [3, 3, 3] ## DNN--RNN-CNN no_of_classes = 2 RMDL.Text_Classification(np.array(train_sentences), np.array(train_labels), np.array(test_sentences), np.array(test_labels), batch_size=batch_size, sparse_categorical=sparse_categorical, random_deep=Random_Deep, epochs=n_epochs, no_of_classes=2) #output # # Found 129 unique tokens. # (10, 500) # Total 400000 word vectors. # 2 # DNN 0 # <keras.optimizers.Adagrad object at 0x7f00801bbb70> # Train on 8 samples, validate on 2 samples # Epoch 1/5 # - 0s - loss: 0.8781 - acc: 0.5000 - val_loss: 0.1762 - val_acc: 1.0000
with open(fnamek) as fk: contentk = fk.readlines() contentk = [x.strip() for x in contentk] Label = np.matrix(contentk, dtype=int) Label = np.transpose(Label) np.random.seed(7) print(Label.shape) X_train, X_test, y_train, y_test = train_test_split(content, Label, test_size=0.2, random_state=0, shuffle=False) batch_size = 128 n_epochs = [0, 2, 2] ## DNN--RNN-CNN Random_Deep = [0, 1, 1] ## DNN--RNN-CNN RMDL.Text_Classification(X_train, y_train, X_test, y_test, batch_size=batch_size, sparse_categorical=True, random_deep=Random_Deep, epochs=n_epochs, GloVe_dir="../dataset/", GloVe_file="glove.6B.300d.txt", EMBEDDING_DIM=300, MAX_SEQUENCE_LENGTH=100, MAX_NB_WORDS=50000)