def load_raw_data(self, nrows=None, save_to_hdf=False): """Load the data from the csv or hdf (if it exists) Parameters ---------- nrows : int or None The number of lines to load in the train. save_to_hdf : bool If the file is saved in hdf after beeing loaded """ train = load_train(nrows=nrows, save_to_hdf=save_to_hdf) test = load_test(save_to_hdf=save_to_hdf) train.drop(self.drop_features, axis=1, inplace=True) test.drop(self.drop_features, axis=1, inplace=True) correct_dates(train) to_datetime(train, keep_dates=True) correct_dates(test) to_datetime(test, keep_dates=True) test.loc[test.listen_type == 0, 'listen_type'] = 1 train['diff_days'] = (train.dt_listen - train.dt_media).dt.days test['diff_days'] = (test.dt_listen - test.dt_media).dt.days train.drop([ 'dt_listen', 'dt_media', ], axis=1, inplace=True) test.drop([ 'dt_listen', 'dt_media', ], axis=1, inplace=True) train.to_pickle('../input/train_clean.pkl') test.to_pickle('../input/test_clean.pkl') self.train = train self.test = test
import loading max_features = 10000 # number of words to consider as features maxlen = 500 # cut texts after this number of words (among top max_features most common words) batch_size = 32 #print('Loading data...') #(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features) #print(len(input_train), 'train sequences') #print(len(input_test), 'test sequences') #print('Pad sequences (samples x time)') #input_train = sequence.pad_sequences(input_train, maxlen=maxlen) #input_test = sequence.pad_sequences(input_test, maxlen=maxlen) #print('input_train shape:', input_train.shape) #print('input_test shape:', input_test.shape) input_train, y_train, x_val, y_val = loading.load_train() input_test, y_test = loading.load_test() print('input_train shape:', input_train.shape) print('input_test shape:', input_test.shape) from keras.models import Sequential from keras.layers import Dense, SimpleRNN, Embedding model = Sequential() model.add(Embedding(max_features, 32)) model.add(SimpleRNN(32)) model.add(Dense(3, activation='sigmoid')) model.summary() #model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) model.compile(optimizer='adam', loss='categorical_crossentropy',
Created on Fri Mar 1 08:59:14 2019 @author: xabuka using RNN, LSTM, GRU , BI """ import loading max_features = 10000 max_len = 100 training_samples = 1000 # We will be training on 200 samples validation_samples = 2000 # We will be validating on 10000 samples data_dir = '../data/SplitedPalSent' #'/Users/xabuka/PycharmProjects/measuring_acceptability/python-files/aclImdb' # input_train, y_train = loading.load_train(data_dir,max_len,training_samples,validation_samples,max_features, Validation = False ) input_test, y_test = loading.load_test(data_dir,max_len,max_features) print('input_train shape:', input_train.shape) print('input_test shape:', input_test.shape) from keras.models import Sequential from keras.layers import Dense, Embedding,GRU,LSTM model = Sequential() model.add(Embedding(max_features, 64, input_length= max_len)) model.add(LSTM(32)) #GRU #bidirectional #model.add(layers.Bidirectional(layers.LSTM(32)))
""" Created on Fri Mar 1 08:59:14 2019 @author: xabuka using CNN """ import loading max_features = 10000 # number of words to consider as features max_len = 500 training_samples = 700 # We will be training on 200 samples validation_samples = 200 # We will be validating on 10000 samples x_train, y_train, x_val, y_val = loading.load_train(max_len, training_samples, validation_samples, max_features) x_test, y_test = loading.load_test(max_len, max_features) print('input_train shape:', x_train.shape) print('input_test shape:', x_test.shape) from keras.models import Sequential from keras import layers from keras.optimizers import RMSprop #from keras.layers import Embedding, Conv1D,MaxPooling1D, GlobalMaxPooling1D, Dense model = Sequential() model.add(layers.Embedding(max_features, 128, input_length=max_len)) model.add(layers.Conv1D(32, 7, activation='relu')) model.add(layers.MaxPooling1D(5)) model.add(layers.Conv1D(32, 7, activation='relu')) model.add(layers.GlobalMaxPooling1D())