from keras.preprocessing import sequence from keras.utils import np_utils from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.datasets import imdb from nlp_code.get_data import input_data max_features = 20000 maxlen = 100 # cut texts after this number of words (among top max_features most common words) batch_size = 32 print('Loading data...') # (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, # test_split=0.2) X_train, y_train, X_test, y_test = input_data() print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) #(samples, dim) print('X_test shape:', X_test.shape) #Y 1, 0 print('Build model...') model = Sequential() # model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.5)) model.add(Embedding(max_features, 128, input_length=maxlen)) # model.add(LSTM(128, dropout_W=0.5, dropout_U=0.5)) # try using a GRU instead, for fun
from nlp_code.get_data import input_data_gen_w2v, input_data_w2v, input_data __author__ = 'bohaohan' from keras.models import Sequential from keras.layers import LSTM, Dense, Embedding import numpy as np print('Loading data...') x_train, y_train, x_val, y_val = input_data() print "end load" data_dim = 300 timesteps = len(x_train[0]) nb_classes = 1 nb_epoch = 2000 print "build model" # expected input data shape: (batch_size, timesteps, data_dim) model = Sequential() model.add(Embedding(1, 128, input_length=timesteps)) model.add(LSTM(200, return_sequences=True)) # ,input_shape=(timesteps, data_dim))) # returns a sequence of vectors of dimension 32 model.add(LSTM( 100, return_sequences=True)) # returns a sequence of vectors of dimension 32 model.add(LSTM(32)) # return a single vector of dimension 32 model.add(Dense(nb_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print "finish model" # generate dummy training data
from keras.utils import np_utils from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.datasets import imdb from nlp_code.get_data import input_data max_features = 20000 maxlen = 100 # cut texts after this number of words (among top max_features most common words) batch_size = 32 print('Loading data...') # (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, # test_split=0.2) X_train, y_train, X_test, y_test = input_data() print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) #(samples, dim) print('X_test shape:', X_test.shape) #Y 1, 0 print('Build model...') model = Sequential() # model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.5)) model.add(Embedding(max_features, 128, input_length=maxlen)) # model.add(LSTM(128, dropout_W=0.5, dropout_U=0.5)) # try using a GRU instead, for fun
''' from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.optimizers import SGD from keras.utils import np_utils import random, cPickle from nlp_code.get_data import input_data # load data data, label = input_data() # shuffle data index = [i for i in range(len(data))] random.shuffle(index) data = data[index] data = data/255 label = label[index] print(data.shape[0], ' samples') # 20 classes, transform labels into the format requirement of keras is binary class matrices label = np_utils.to_categorical(label, 20) # print (data) ############### # start to build CNN model ###############
from nlp_code.get_data import input_data_gen_w2v, input_data_w2v, input_data __author__ = 'bohaohan' from keras.models import Sequential from keras.layers import LSTM, Dense, Embedding import numpy as np print('Loading data...') x_train, y_train, x_val, y_val = input_data() print "end load" data_dim = 300 timesteps = len(x_train[0]) nb_classes = 1 nb_epoch = 2000 print "build model" # expected input data shape: (batch_size, timesteps, data_dim) model = Sequential() model.add(Embedding(1, 128, input_length=timesteps)) model.add(LSTM(200, return_sequences=True)) # ,input_shape=(timesteps, data_dim))) # returns a sequence of vectors of dimension 32 model.add(LSTM(100, return_sequences=True)) # returns a sequence of vectors of dimension 32 model.add(LSTM(32)) # return a single vector of dimension 32 model.add(Dense(nb_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print "finish model" # generate dummy training data
CPU run command: python cnn.py ''' from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.optimizers import SGD from keras.utils import np_utils import random, cPickle from nlp_code.get_data import input_data # load data data, label = input_data() # shuffle data index = [i for i in range(len(data))] random.shuffle(index) data = data[index] data = data / 255 label = label[index] print(data.shape[0], ' samples') # 20 classes, transform labels into the format requirement of keras is binary class matrices label = np_utils.to_categorical(label, 20) # print (data) ############### # start to build CNN model ###############