def train_model(self): tokenizer = Tokenizer(num_words=self.max_words) x_train = tokenizer.sequences_to_matrix(self.x_train, mode='binary') x_test = tokenizer.sequences_to_matrix(self.x_test, mode='binary') y_train = tokenizer.sequences_to_matrix(self.y_train, mode='binary') y_test = tokenizer.sequences_to_matrix(self.y_test, mode='binary') # y_train = keras.utils.to_categorical(self.y_train, self.num_classes) # y_test = keras.utils.to_categorical(self.y_test, self.num_classes) self.model = Sequential() self.model.add(Dense(1024, input_shape=(self.max_words, ))) self.model.add(Activation('tanh')) self.model.add(Dense(self.num_classes)) self.model.add(Activation('sigmoid')) self.model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['binary_accuracy']) history = self.model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, verbose=1, validation_data=(x_test, y_test)) pyplot.plot(history.history['loss']) pyplot.plot(history.history['val_loss']) pyplot.title('model train vs validation loss') pyplot.ylabel('loss') pyplot.xlabel('epoch') pyplot.legend(['train', 'validation'], loc='upper right') pyplot.show()
def closure(mu): (x_train, y_train), (_, _) = imdb.load_data() tokenizer = Tokenizer(num_words=5000) tokenizer.fit_on_sequences(x_train) x_train = tokenizer.sequences_to_matrix(x_train, "tfidf") # Note: svd_solver=full is needed on GPU server x_train = PCA(n_components=100, svd_solver='full').fit_transform(x_train) ds = {"data": x_train, "target": y_train} # Apply noise and return res = preprocess_and_noise(dataset=ds, mu=mu) return res
print(y_train[0]) print(word_index['the']) index_to_word = {} for key, value in word_index.items(): index_to_word[value] = key print(' '.join(index_to_word[x] for x in x_train[9])) from keras_preprocessing.text import Tokenizer max_words = 10000 tokenizer = Tokenizer(num_words=max_words) x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print(x_train.shape) print(x_train[0]) print(y_train.shape) print(y_train[0]) from keras.models import Sequential from keras.layers import Dense, Dropout, Activation model = Sequential()
def load(self): # Setup train and test splits if self.source in ['mnist', 'cifar10']: (x_train, y_train), (x_test, y_test) = self.db(self.source).load_data() elif self.source == 'fashion_mnist': (x_train, y_train), (x_test, y_test) = self.db(self.source).load_data() x_train = x_train / 255.0 x_test = x_test / 255.0 self.class_names = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot' ] elif self.source == 'cifar100': (x_train, y_train), (x_test, y_test) = self.db( self.source).load_data(label_mode='fine') elif self.source == 'imdb': (x_train, y_train), (x_test, y_test) = self.db(self.source).load_data( path="imdb.npz", num_words=10000, #memory limitation skip_top=0, maxlen=None, seed=113, start_char=1, oov_char=2, index_from=3) data = np.concatenate((x_train, x_test), axis=0) targets = np.concatenate((y_train, y_test), axis=0) data = self.__vectorize(data) targets = np.array(targets).astype("float32") self.x_test = data[:10000] self.y_test = targets[:10000] self.x_train = data[10000:] self.y_train = targets[10000:] self.is_text = True elif self.source == 'reuters': (x_train, y_train), (x_test, y_test) = self.db( self.source).load_data(path="reuters.npz", num_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3) self.word_index = self.db( self.source).get_word_index(path="reuters_word_index.json") self.is_text = True else: (x_train, y_train), (x_test, y_test) = self.db(self.source).load_data() print("Training label shape: ", y_train.shape) # (60000,) -- 60000 numbers (all 0-9) print("First 5 training labels: ", y_train[:5]) # [5, 0, 4, 1, 9] # Flatten the images train_count = x_train.shape[0] test_count = x_test.shape[0] image_size = 0 image_vector_size = 0 if len(x_train.shape) >= 2: image_size = x_train.shape[1] image_vector_size = image_size * image_size if len(x_train.shape) == 4: channels = x_train.shape[3] else: channels = 1 if self.source in ['mnist', 'cifar10', 'cifar100']: self.x_train = x_train self.x_test = x_test self.y_train_label = y_train.copy() self.y_test_label = y_test.copy() self.x_train1d = x_train.reshape(train_count, image_vector_size) self.x_test1d = x_test.reshape(test_count, image_vector_size) self.x_train2d = x_train.reshape(-1, image_size, image_size, channels) self.x_test2d = x_test.reshape(-1, image_size, image_size, channels) # Convert to "one-hot" vectors using the to_categorical function self.num_classes = self.db_categories(self.source) self.y_train = keras.utils.to_categorical(y_train, self.num_classes) self.y_test = keras.utils.to_categorical(y_test, self.num_classes) elif self.source == 'reuters': self.num_classes = np.max(y_train) + 1 max_words = 10000 # Vectorizing sequence data tokenizer = Tokenizer(num_words=max_words) self.x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') self.x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) # Convert class vector to binary class matrix self.y_train = keras.utils.to_categorical(y_train, self.num_classes) self.y_test = keras.utils.to_categorical(y_test, self.num_classes) print("First 5 training labels as one-hot encoded vectors:\n", y_train[:5])
test_labels) = reuters.load_data(num_words=10000) print(train_data) print(train_labels) print(test_data) print(test_labels) print('done') word_index = reuters.get_word_index() num_classes = max(train_labels) + 1 max_words = 10000 token = Tokenizer(max_words) train_data = token.sequences_to_matrix(train_data) test_data = token.sequences_to_matrix(test_data) train_labels = to_categorical(train_labels, num_classes) test_labels = to_categorical(test_labels, num_classes) model = Sequential() model.add(Dense(512, input_shape=(max_words, ))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])