def load_sentences(load_sentences_from, input_dir): # call file 'sentences' if load_sentences_from == 'manually': # Just write them below sentences = ['I am a boy.', 'I am a boy', 'I am a girl'] elif load_sentences_from == 'txt': # Load from txt with open(input_dir + 'sentences.txt', 'r') as f: sentences = f.read().splitlines() elif load_sentences_from == 'csv': # Load from csv. sentences = np.array( pd.read_csv(input_dir + 'sentences.csv', header=None)).flatten() # Preprocess/clean text sentences = [ data_helpers.clean_str_new(sentence) for sentence in sentences ] # Turn words into IDs based on training data Xtrain = Xtrain, Ytrain = data_helpers.load_all_data( config.train_path, config.validation_path, categories, shuffle=False) # I changed this so it combines train and test Xtest, Ytest = data_helpers.load_data(config.test_path, categories) tokenizer = Tokenizer( filters='') # depending on word embedding, set lower=False. tokenizer.fit_on_texts(np.append(np.array(Xtrain), np.array(Xtest))) sequences = tokenizer.texts_to_sequences(sentences) sentences_encoded = pad_sequences(sequences, maxlen=sequence_length, padding='post') return sentences, sentences_encoded
'word21': pd.DataFrame(layer_output_21), 'word27': pd.DataFrame(layer_output_27), 'word33': pd.DataFrame(layer_output_33) } return layer_output_last, timestep_outputs else: return layer_output_last else: layer_output_all = pd.DataFrame(layer_output_all) return layer_output_all # Load data ##============================================================================================ Xtrain, Ytrain = data_helpers.load_all_data( config.train_path, config.validation_path, categories, shuffle=False) # I changed this so it combines train and test Xtest, Ytest = data_helpers.load_data(config.test_path, categories) Xtest_raw, Ytest_raw = data_helpers.load_data_raw(config.test_path, categories) X, y = data_helpers.load_whole_dataset(config.train_path, config.validation_path, config.test_path, categories, load_all=True, shuffle=False, one_hot=False) import importlib importlib.reload(data_helpers) ## Encode Ytrain
import tensorflow as tf from autoencoder import StackedAutoencoderClassifier from cnn import CNNLayer import pandas as pd from gensim.models import Word2Vec import os import data_helpers import datetime import time from numeric_cnn import NumericCNN from cnn_rnn import CNN_RNN import sys text_features_train, numeric_features_train, labels_train = data_helpers.load_all_data('../data/trainingset.txt') text_features_test, numeric_features_test, labels_test = data_helpers.load_all_data('../data/testset.txt') max_sentence_length = max([len(x.split(' ')) for x in (text_features_train + text_features_test)]) numeric_feature_num = len(numeric_features_train[0]) init_words_embedded_model = Word2Vec.load('../data/word2vec.model') num_classes = len(labels_train[0]) l2_reg_lambda = 0.6 os.environ['CUDA_VISIBLE_DEVICES'] = '0' def train(x_train_cnn, x_train_cnn_rnn, x_train_ncnn, y_train, x_test_cnn, x_test_cnn_rnn, x_test_ncnn, y_test): cnn_max_sentence_length = max([len(x.split(' ')) for x in (x_train_cnn + x_test_cnn)]) ncnn_feature_num = len(x_train_ncnn[0])
# layer_name = 'kmaxpool_1' # intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output_shape) # Load data ##============================================================================================ # # importlib.reload(data_helpers) # Xtrain, Ytrain = data_helpers.load_all_data(config.train_path,config.validation_path, categories, shuffle=False) # I changed this so it combines train and validation # Xvalidation, Yvalidation = data_helpers.load_data(config.validation_path, categories) # Xvalidation_raw, Yvalidation_raw = data_helpers.load_data_raw(config.validation_path, categories) # Xtrain, Ytrain = data_helpers.load_data(config.train_path, categories) Xtrain, Ytrain = data_helpers.load_all_data(config.train_path, config.validation_path, categories, shuffle=False) Xvalidation, Yvalidation = data_helpers.load_data(config.validation_path, categories) Xvalidation_raw, Yvalidation_raw = data_helpers.load_data_raw( config.validation_path, categories) ## Encode Ytrain # ===================================================================================== #one hot encode and integer encode Ytrain_encoded = np_utils.to_categorical(Ytrain) Ytrain_integer = np.array(Ytrain) Yvalidation_encoded = np_utils.to_categorical(Yvalidation) Yvalidation_integer = np.array(Yvalidation) # Zero pad (encode) Xtrain and Xvalidation