Exemple #1
0
    def load_raw_data(self, nrows=None, save_to_hdf=False):
        """Load the data from the csv or hdf (if it exists)
        
        Parameters
        ----------
        nrows : int or None
            The number of lines to load in the train.
        save_to_hdf : bool
            If the file is saved in hdf after beeing loaded            
        """
        train = load_train(nrows=nrows, save_to_hdf=save_to_hdf)
        test = load_test(save_to_hdf=save_to_hdf)

        train.drop(self.drop_features, axis=1, inplace=True)
        test.drop(self.drop_features, axis=1, inplace=True)

        correct_dates(train)
        to_datetime(train, keep_dates=True)
        correct_dates(test)
        to_datetime(test, keep_dates=True)
        test.loc[test.listen_type == 0, 'listen_type'] = 1

        train['diff_days'] = (train.dt_listen - train.dt_media).dt.days
        test['diff_days'] = (test.dt_listen - test.dt_media).dt.days
        train.drop([
            'dt_listen',
            'dt_media',
        ], axis=1, inplace=True)
        test.drop([
            'dt_listen',
            'dt_media',
        ], axis=1, inplace=True)

        train.to_pickle('../input/train_clean.pkl')
        test.to_pickle('../input/test_clean.pkl')

        self.train = train
        self.test = test
Exemple #2
0
import loading

max_features = 10000  # number of words to consider as features
maxlen = 500  # cut texts after this number of words (among top max_features most common words)
batch_size = 32
#print('Loading data...')
#(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
#print(len(input_train), 'train sequences')
#print(len(input_test), 'test sequences')
#print('Pad sequences (samples x time)')
#input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
#input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
#print('input_train shape:', input_train.shape)
#print('input_test shape:', input_test.shape)

input_train, y_train, x_val, y_val = loading.load_train()
input_test, y_test = loading.load_test()
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Embedding

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SimpleRNN(32))
model.add(Dense(3, activation='sigmoid'))
model.summary()
#model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
Exemple #3
0
Created on Fri Mar  1 08:59:14 2019

@author: xabuka
using RNN, LSTM, GRU , BI 
"""

import loading

max_features = 10000
max_len = 100
training_samples = 1000  # We will be training on 200 samples
validation_samples = 2000  # We will be validating on 10000 samples
data_dir = '../data/SplitedPalSent'
#'/Users/xabuka/PycharmProjects/measuring_acceptability/python-files/aclImdb' #

input_train, y_train = loading.load_train(data_dir,max_len,training_samples,validation_samples,max_features, Validation = False )
input_test, y_test = loading.load_test(data_dir,max_len,max_features)


print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)


from keras.models import Sequential
from keras.layers import Dense, Embedding,GRU,LSTM

model = Sequential()
model.add(Embedding(max_features, 64, input_length= max_len))
model.add(LSTM(32)) #GRU
#bidirectional 
#model.add(layers.Bidirectional(layers.LSTM(32)))
Exemple #4
0
"""
Created on Fri Mar  1 08:59:14 2019

@author: xabuka
using CNN
"""

import loading

max_features = 10000  # number of words to consider as features
max_len = 500
training_samples = 700  # We will be training on 200 samples
validation_samples = 200  # We will be validating on 10000 samples

x_train, y_train, x_val, y_val = loading.load_train(max_len, training_samples,
                                                    validation_samples,
                                                    max_features)
x_test, y_test = loading.load_test(max_len, max_features)
print('input_train shape:', x_train.shape)
print('input_test shape:', x_test.shape)

from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
#from keras.layers import Embedding, Conv1D,MaxPooling1D, GlobalMaxPooling1D, Dense
model = Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())