def train(x_train, y_train, x_test, y_test, dim_img, nb_filters, nb_conv, batch_size, nb_epoch, nb_classes): """ Function description. Parameters ---------- parameter_01 : type Description. parameter_02 : type Description. parameter_03 : type Description. Returns ------- return_01 Description. """ y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) print(x_train.shape, y_train.shape, x_test.shape, y_test.shape) mdl = model(dim_img, nb_filters, nb_conv, nb_classes) mdl.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(x_test, y_test)) mdl.save_weights('weight_center.h5') score = mdl.evaluate(x_test, y_test, show_accuracy=True, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) return mdl
def train_model(model, train, test, nb_classes): X_train = train[0].reshape(train[0].shape[0], 1, img_rows, img_cols) X_test = test[0].reshape(test[0].shape[0], 1, img_rows, img_cols) X_train = X_train.astype("float32") X_test = X_test.astype("float32") X_train /= 255 X_test /= 255 print("X_train shape:", X_train.shape) print(X_train.shape[0], "train samples") print(X_test.shape[0], "test samples") # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(train[1], nb_classes) Y_test = np_utils.to_categorical(test[1], nb_classes) model.compile(loss="categorical_crossentropy", optimizer="adadelta") t = now() model.fit( X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_data=(X_test, Y_test), ) print("Training time: %s" % (now() - t)) score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) print("Test score:", score[0]) print("Test accuracy:", score[1])
def test_TensorBoard_with_ReduceLROnPlateau(tmpdir): import shutil np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_class) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_class, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) cbks = [ callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=4, verbose=1), callbacks.TensorBoard( log_dir=filepath)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=2) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir()
def train_top_model(X_train, X_test, y_train, y_test): model = util.load_alex_finetune56_finetune567(nb_class=config.nb_class, weights_path=config.alexnet_weights_path,top_model_weight_path="models/alex_finetune56_weights" + str(fold_count) + ".h5") print "\nTraining CNN.." Y_train = np_utils.to_categorical(y_train, config.nb_class) Y_test = np_utils.to_categorical(y_test, config.nb_class) shape=X_train.shape[1:] model.compile( loss='categorical_crossentropy', #optimizer=SGD(lr=0.00001, decay=1e-6, momentum=0.9, nesterov=True), optimizer=SGD(lr=0.00001, momentum=0.9), metrics=['accuracy']) hist = model.fit(X_train, Y_train, nb_epoch=2, batch_size=32,verbose=1, validation_data=(X_test, Y_test)) util.save_history(hist,"finetune56_finetune567_fold"+ str(fold_count),fold_count) scores = model.evaluate(X_test, Y_test, verbose=0) model.save_weights("models/alex_finetune56_finetune567" + str(fold_count) + ".h5") #model.save_weights("model/alex_topmodel" + str(fold_count) + ".h5") print("Softmax %s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) return scores[1]
def init_cascade_training(self, x3, x6, y, x3_unif, x6_unif, y_unif): ''' helper function to initialize the training of the cascade model:: shuffle the training set and make categorical the targets :param x3: 33x33 patches :param x6: 65x65 patches :param y: labels :param x3_unif: 33x33 uniformly distribuited patches :param x6_unif: 65x65 uniformly distribuited patches :param y_unif: uniformly distribuited labels :return: ''' Y_train = np_utils.to_categorical(y, 5) # shuffle training set shuffle = zip(x3, x6, Y_train) np.random.shuffle(shuffle) # transform shuffled training set back to numpy arrays X33_train = np.array([shuffle[i][0] for i in xrange(len(shuffle))]) X65_train = np.array([shuffle[i][1] for i in xrange(len(shuffle))]) Y_train = np.array([shuffle[i][2] for i in xrange(len(shuffle))]) # .reshape((len(shuffle), 5, 1, 1)) Y_uniftrain = np_utils.to_categorical(y_unif, 5) # shuffle uniformly distribuited training set shuffle = zip(x3_unif, x6_unif, Y_uniftrain) np.random.shuffle(shuffle) # transform shuffled uniformly distribuited training set back to numpy arrays X33_uniftrain = np.array([shuffle[i][0] for i in xrange(len(shuffle))]) X65_uniftrain = np.array([shuffle[i][1] for i in xrange(len(shuffle))]) Y_uniftrain = np.array([shuffle[i][2] for i in xrange(len(shuffle))]) # .reshape((len(shuffle), 5, 1, 1)) return X33_train, X65_train, Y_train, X33_uniftrain, X65_uniftrain, Y_uniftrain
def test_ReduceLROnPlateau(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_class) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def make_model(): np.random.seed(1337) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.1), metrics=['accuracy']) return model model = make_model() # This should reduce the LR after the first epoch (due to high epsilon). cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, epsilon=10, patience=1, cooldown=5)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.01, atol=K.epsilon()) model = make_model() cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, epsilon=0, patience=1, cooldown=5)] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) assert np.allclose(float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())
def test_LambdaCallback(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_class) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # Start an arbitrary process that should run during model training and be terminated after training has completed. def f(): while True: pass p = multiprocessing.Process(target=f) p.start() cleanup_callback = callbacks.LambdaCallback(on_train_end=lambda logs: p.terminate()) cbks = [cleanup_callback] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5) p.join() assert not p.is_alive()
def data(): nb_classes = 10 # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) return datagen, X_train, Y_train, X_test, Y_test
def test_EarlyStopping(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim,), classification=True, num_classes=num_class) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) mode = 'max' monitor = 'val_acc' patience = 0 cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)] history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) mode = 'auto' monitor = 'val_acc' patience = 2 cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)] history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20)
def feature_scaled_nn_acc(mds, type): train, validation = validation_split(mds) # Multiply by 1 to convert to bool y_train = train['Up'] * 1 X_train = train.drop('Up', axis=1) y_validation = validation['Up'] * 1 X_validation = validation.drop('Up', axis=1) pre = PCA(n_components=19, whiten=True) X_train_pca = pre.fit_transform(X_train) X_validation_pca = pre.fit_transform(X_validation) model = create_model(X_train_pca.shape[1], type) # Convert to Keras format y_train = to_categorical(y_train.values) y_validation = to_categorical(y_validation.values) model.fit(X_train_pca, y_train, nb_epoch=5, batch_size=16) time.sleep(0.1) # Fit and guess guess_train = model.predict_classes(X_train_pca) guess_train = to_categorical(guess_train) guess_validation = model.predict_classes(X_validation_pca) guess_validation = to_categorical(guess_validation) train_acc = accuracy_score(y_train, guess_train) validation_acc = accuracy_score(y_validation, guess_validation) print "\n neural net train accuracy is {}".format(train_acc) print "\n neural net validation accuracy is {}".format(validation_acc) return guess_validation
def nn_acc(mds, type, epoch=5, batch=16): train, validation = validation_split(mds) # Multiply by 1 to convert to bool y_train = train['Up'] * 1 X_train = train.drop('Up', axis=1) y_validation = validation['Up'] * 1 X_validation = validation.drop('Up', axis=1) # Create Model model = create_model(X_train.shape[1], type) # Convert to Keras format X_train = (X_train).as_matrix() X_validation = (X_validation).as_matrix() y_train = to_categorical(y_train.values) y_validation = to_categorical(y_validation.values) # Fit and guess model.fit(X_train, y_train, nb_epoch=epoch, batch_size=batch) guess_train = model.predict_classes(X_train) guess_train = to_categorical(guess_train) guess_validation = model.predict_classes(X_validation) guess_validation = to_categorical(guess_validation) train_acc = accuracy_score(y_train, guess_train) validation_acc = accuracy_score(y_validation, guess_validation) print "\n neural net train accuracy is {}".format(train_acc) print "\n neural net validation accuracy is {}".format(validation_acc) return guess_validation
def dataset_mnist(): from keras.datasets import mnist dataset = { "name": "mnist" } dataset["classes"] = {0: "0", 1: "1", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7", 8: "8", 9: "9"} dataset["img_rows"], dataset["img_cols"] = 28, 28 dataset["nb_classes"] = 10 dataset["batch_size"] = 128 dataset["epochs_until_report"] = 1 dataset["img_channels"] = 1 dataset["input_shape"] = (dataset["img_channels"], dataset["img_rows"], dataset["img_cols"]) dataset["PNG_mode"] = "L" (dataset["x_train"], dataset["y_train"]), (dataset["x_test"], dataset["y_test"]) = mnist.load_data() dataset["x_train"] = dataset["x_train"].reshape(dataset["x_train"].shape[0], dataset["img_channels"], dataset["img_rows"], dataset["img_cols"]) dataset["x_test"] = dataset["x_test"].reshape(dataset["x_test"].shape[0], dataset["img_channels"], dataset["img_rows"], dataset["img_cols"]) dataset["x_train"] = dataset["x_train"].astype('float32') dataset["x_test"] = dataset["x_test"].astype('float32') dataset["x_train"] /= 255 dataset["x_test"] /= 255 dataset["y_train"] = np_utils.to_categorical(dataset["y_train"], dataset["nb_classes"]) dataset["y_test"] = np_utils.to_categorical(dataset["y_test"], dataset["nb_classes"]) return dataset
def dataset_cifar10(): from keras.datasets import cifar10 dataset = { "name": "CIFAR 10" } dataset["classes"] = {0: "Plane", 1: "Car", 2: "Bird", 3: "Cat", 4: "Deer", 5: "Dog", 6: "Frog", 7: "Horse", 8: "Ship", 9: "Truck"} dataset["img_rows"], dataset["img_cols"] = 32, 32 dataset["img_channels"] = 3 dataset["nb_classes"] = 10 dataset["batch_size"] = 128 dataset["epochs_until_report"] = 1 dataset["input_shape"] = (dataset["img_channels"], dataset["img_rows"], dataset["img_cols"]) dataset["PNG_mode"] = "RGB" (dataset["x_train"], dataset["y_train"]), (dataset["x_test"], dataset["y_test"]) = cifar10.load_data() dataset["x_train"] = dataset["x_train"].astype('float32') dataset["x_test"] = dataset["x_test"].astype('float32') dataset["x_train"] /= 255 dataset["x_test"] /= 255 dataset["y_train"] = np_utils.to_categorical(dataset["y_train"], dataset["nb_classes"]) dataset["y_test"] = np_utils.to_categorical(dataset["y_test"], dataset["nb_classes"]) return dataset
def CNN_3_layer(activation): Xtrain, ytrain, XCV, yCV, Xtest, ytest = load_data("mnist.pkl.gz") Xtrain = Xtrain.reshape(Xtrain.shape[0], 1, 28, 28) Xtest = Xtest.reshape(Xtest.shape[0], 1, 28, 28) XCV = Xtest.reshape(XCV.shape[0], 1, 28, 28) # 0~9 ten classes ytrain = np_utils.to_categorical(ytrain, 10) ytest = np_utils.to_categorical(ytest, 10) yCV = np_utils.to_categorical(yCV, 10) # Build the model model = Sequential() model.add(Convolution2D(32,3,3,border_mode='valid',input_shape=(1,28,28))) model.add(Activation(activation)) model.add(Convolution2D(32,3,3)) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(16,3,3)) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation(activation)) model.add(Dropout(0.5)) model.add(Dense(10)) model.add(Activation('softmax')) # fit module print "fit module" model.compile(loss='categorical_crossentropy',optimizer='adadelta',metrics=['accuracy']) model.fit(Xtrain,ytrain,batch_size=100,nb_epoch=20,verbose=1,validation_data=(XCV,yCV)) score = model.evaluate(Xtest,ytest, verbose=0) print score[0] print score[1]
def evaluate(lr, pos): (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = (X_train.astype("float32")).reshape((60000, 784)) X_test = (X_test.astype("float32")).reshape((10000, 784)) X_train /= 255 X_test /= 255 Y_train = np_utils.to_categorical(y_train, 10) Y_test = np_utils.to_categorical(y_test, 10) model = Sequential() model.add(Dense(output_dim=layer1, input_dim=784)) if pos == 0: model.add(BatchNormalization()) model.add(Activation("relu")) model.add(Dense(output_dim=layer2, input_dim=layer1)) if pos == 1: model.add(BatchNormalization()) model.add(Activation("relu")) model.add(Dense(output_dim=10, input_dim=layer2)) if pos == 2: model.add(BatchNormalization()) model.add(Activation("softmax")) model.compile( loss="categorical_crossentropy", optimizer=SGD(lr=lr, momentum=0.9, nesterov=True), metrics=["accuracy"] ) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) return score[1]
def test_img_clf(self): print('image classification data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 8, 8), classification=True, nb_class=2) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(Convolution2D(8, 8, 8, input_shape=(3, 8, 8))) model.add(Activation('sigmoid')) model.add(Flatten()) model.add(Dense(y_test.shape[-1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd') history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=0) print(history.history['val_acc'][-1]) self.assertTrue(history.history['val_acc'][-1] > 0.9)
def test_vector_clf(self): nb_hidden = 10 print('vector classification data:') (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(10,), classification=True, nb_class=2) print('X_train:', X_train.shape) print('X_test:', X_test.shape) print('y_train:', y_train.shape) print('y_test:', y_test.shape) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(Dense(nb_hidden, input_shape=(X_train.shape[-1],))) model.add(Activation('relu')) model.add(Dense(y_train.shape[-1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=0) self.assertTrue(history.history['val_acc'][-1] > 0.8)
def load_mnist(image_dim_ordering): (X_train, y_train), (X_test, y_test) = mnist.load_data() if image_dim_ordering == 'th': X_train = X_train.reshape(X_train.shape[0], 1, 28, 28) X_test = X_test.reshape(X_test.shape[0], 1, 28, 28) else: X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) X_test = X_test.reshape(X_test.shape[0], 28, 28, 1) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train = normalization(X_train) X_test = normalization(X_test) nb_classes = len(np.unique(np.hstack((y_train, y_test)))) Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) print X_train.shape, X_test.shape, Y_train.shape, Y_test.shape return X_train, Y_train, X_test, Y_test
def data(path2indir): data_train = np.load(os.path.join(path2indir, 'train.npy')) q_train = data_train[0][:, 1:] a_train = data_train[1][:] data_dev = np.load(os.path.join(path2indir, 'dev.npy')) q_dev = data_dev[0][:, 1:] a_dev = data_dev[1][:] data_val = np.load(os.path.join(path2indir, 'val.npy')) q_val = data_val[0][:, 1:] a_val = data_val[1][:] fread = open(os.path.join(path2indir, 'qdict.pkl')) qdict = pickle.load(fread) fread.close() fread = open(os.path.join(path2indir, 'adict.pkl')) adict = pickle.load(fread) fread.close() nb_ans = len(adict) a_train = np_utils.to_categorical(a_train, nb_ans) a_dev = np_utils.to_categorical(a_dev, nb_ans) a_val = np_utils.to_categorical(a_val, nb_ans) return q_train, q_dev, q_val, a_train, a_dev, a_val, qdict, adict
def train(model, X_train_original,X_test_original,y_train,y_test,nb_classes,batch_size,nb_epoch): mean_value = np.mean(X_train_original) max_value = np.std(X_train_original) print("mean_value",mean_value) print("max_value",max_value) X_train = (X_train_original - mean_value) / max_value if X_test_original is not None: X_test = (X_test_original - mean_value) / max_value # convert class vectors to binary class matrices if nb_classes > 1: Y_train = np_utils.to_categorical(y_train, nb_classes) if X_test_original is not None: Y_test = np_utils.to_categorical(y_test, nb_classes) else: Y_train = y_train if X_test_original is not None: Y_test = y_test if X_test_original is not None: history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) return score,max_value,mean_value else: history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1) return None,max_value,mean_value
def read_data(self, trainfile, testfile, dev=None, freeze=False): """ reads in CoNLL files, maps tokens and tags to indices, pads sequences sets self.train_X, self.train_Y [and same for test_X|Y, and dev_X|Y] """ print(trainfile) # convert word 2 indices, labels 2 number train_X_in, train_Y_in, self.w2i, self.t2i = self.make_data(trainfile) self.test_X_in, self.test_Y_in, self.test_X_org = self.make_data(testfile, w2i=self.w2i, t2i=self.t2i, freeze=True) #keep textX for later self.max_sentence_len = max([len(s) for s in train_X_in] + [len(s) for s in self.test_X_in]) print("max_sentence_len:", self.max_sentence_len, file=sys.stderr) # pad sequences self.train_X = pad_sentences(train_X_in, self.max_sentence_len, self.w2i["<pad>"]) train_Y_padded = pad_sentences(train_Y_in, self.max_sentence_len,0) self.test_X = pad_sentences(self.test_X_in, self.max_sentence_len, self.w2i["<pad>"]) test_Y_padded = pad_sentences(self.test_Y_in, self.max_sentence_len, 0) nb_classes = len(self.t2i) # convert class vectors to one-hot self.train_Y = np.array([list(np_utils.to_categorical(seq, nb_classes)) for seq in train_Y_padded]) self.test_Y = np.array([list(np_utils.to_categorical(seq, nb_classes)) for seq in test_Y_padded]) if dev: dev_X_in, dev_Y_in, dev_X_org = self.make_data(dev, w2i=self.w2i, t2i=self.t2i, freeze=True) self.dev_X = pad_sentences(dev_X_in, self.max_sentence_len, self.w2i["<pad>"]) dev_Y_padded = pad_sentences(dev_Y_in, self.max_sentence_len, 0) self.dev_Y = [list(np_utils.to_categorical(seq, nb_classes)) for seq in dev_Y_padded]
def make_batch(self): # make datasets x_dataset, y_dataset = ps.make_sente_datasets(1,100) #print(x_dataset[110]) #print(y_dataset[110]) x_dataset = np.asarray(x_dataset) y_dataset = np.asarray(y_dataset) nb_data = x_dataset.shape[0] x_train,x_test = np.split(x_dataset,[nb_data*0.9]) y_train,y_test = np.split(y_dataset,[nb_data*0.9]) #x_train = x_train.reshape(x_train.shape[0], 1, 15, 9) #x_test = x_test.reshape(x_test.shape[0], 1, 15, 9) x_train = x_train.reshape(x_train.shape[0], 1, 11, 9) x_test = x_test.reshape(x_test.shape[0], 1, 11, 9) y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) print("x_train shape:", x_train.shape) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") return x_train, y_train, x_test, y_test
def test_temporal_classification(): ''' Classify temporal sequences of float numbers of length 3 into 2 classes using single layer of GRU units and softmax applied to the last activations of the units ''' np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, nb_test=500, input_shape=(3, 5), classification=True, nb_class=2) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model = Sequential() model.add(GRU(y_train.shape[-1], input_shape=(X_train.shape[1], X_train.shape[2]), activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy']) history = model.fit(X_train, y_train, nb_epoch=20, batch_size=32, validation_data=(X_test, y_test), verbose=0) assert(history.history['val_acc'][-1] >= 0.85)
def get_cifar100(): (X_train, y_train), (X_test, y_test) = cifar100.load_data() Y_train = np_utils.to_categorical(y_train, 100).astype("float32") Y_test = np_utils.to_categorical(y_test, 100).astype("float32") X_train = X_train.astype("float32") / 255 X_test = X_test.astype("float32") / 255 return (X_train, Y_train), (X_test, Y_test)
def load_whale_data(train_file, test_file, nb_classes=447): print("loading whale data") # nomalize train data print("--> loading training data") train_data = read_csv(train_file) X_train = train_data[:, 1:] X_train = X_train.astype(np.float32) X_train = X_train / 255 y_train = np.vstack(train_data[:, 0]) y_train = y_train.astype(np.uint16) X_train, y_train = shuffle(X_train, y_train, random_state=42) X_train = X_train.reshape(-1, 1, 96, 96) Y_train = np_utils.to_categorical(y_train, 447) print("--> training data loaded") # nomalize test data print("--> loading test data") test_data = read_csv(test_file) X_test = test_data[:, 1:] X_test = X_test.astype(np.float32) X_test = X_test / 255 y_test = np.vstack(test_data[:, 0]) y_test = y_test.astype(np.uint16) X_test, y_test = shuffle(X_test, y_test, random_state=42) X_test = X_test.reshape(-1, 1, 96, 96) Y_test = np_utils.to_categorical(y_test, 447) print("--> test data loaded") return (X_train, Y_train, X_test, Y_test)
def mlp_model(X_train, y_train, X_test, y_test): tokenizer = Tokenizer(nb_words=1000) nb_classes = np.max(y_train) + 1 X_train = tokenizer.sequences_to_matrix(X_train, mode="freq") X_test = tokenizer.sequences_to_matrix(X_test, mode="freq") Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) print("Building model...") model = Sequential() model.add(Dense(512, input_shape=(max_len,))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode='categorical') history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1, show_accuracy=True, validation_split=0.1) model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=1, show_accuracy=True) # print('Test score:', score[0]) # print('Test accuracy:', score[1]) pred_labels = model.predict_classes(X_test) # print pred_labels # print y_test accuracy = accuracy_score(y_test, pred_labels) precision, recall, f1, supp = precision_recall_fscore_support(y_test, pred_labels, average='weighted') print precision, recall, f1, supp return accuracy, precision, recall, f1
def DNN(X_train, Y_train, X_test, Y_test): batch_size = 64 nb_classes = 10 nb_epoch = 20 np.random.seed(1337) X_train = X_train.reshape(60000,784) X_test = X_test.reshape(10000,784) X_train = X_train.astype("float32") X_test.astype("float32") X_train /= 255 X_test /= 255 print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test smaples') Y_train = np_utils.to_categorical(Y_train, nb_classes) Y_test = np_utils.to_categorical(Y_test, nb_classes) model = Sequential() model.add(Dense(784, 128)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(128,128)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(128,10)) model.add(Activation('softmax')) rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1])
def imdb_test(): # set parameters: max_features = 5000 # number of vocabulary maxlen = 200 # padding batch_size = 16 nb_epoch = 10 print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) nb_classes = 2 y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) model = imdb_cnn() plot(model, to_file='./images/imdb_model.png') # try using different optimizers and different optimizer configs # model.compile(loss='binary_crossentropy', optimizer='adagrad', class_mode="binary") model.compile(loss='categorical_crossentropy', optimizer='adagrad') print("Train...") early_stopping = EarlyStopping(monitor='val_loss', patience=5) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, y_test), show_accuracy=True, callbacks=[early_stopping]) score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True) print('Test score:', score) print('Test accuracy:', acc)
def load_data(trainfile, devfile, testfile): ### load data train_sents, train_y = load_animacy_sentences_and_labels(trainfile) dev_sents, dev_y = load_animacy_sentences_and_labels(devfile) test_sents, test_y = load_animacy_sentences_and_labels(testfile) ### create mapping word to indices word2idx = {"_UNK": 0} # reserve 0 for OOV ### convert training etc data to indices X_train = [[get_index(w,word2idx) for w in x] for x in train_sents] freeze=True X_dev = [[get_index(w,word2idx,freeze) for w in x] for x in dev_sents] X_test = [[get_index(w,word2idx,freeze) for w in x] for x in test_sents] vocab_size = len(word2idx) X_train = convert_to_n_hot(X_train, vocab_size) X_dev = convert_to_n_hot(X_dev, vocab_size) X_test = convert_to_n_hot(X_test, vocab_size) ### convert labels to one-hot label2idx = {label: i for i, label in enumerate(set(train_y+dev_y))} num_labels = len(label2idx.keys()) train_y = np_utils.to_categorical([label2idx[label] for label in train_y], num_classes=num_labels) dev_y = np_utils.to_categorical([label2idx[label] for label in dev_y], num_classes=num_labels) test_y = np_utils.to_categorical([label2idx[label] for label in test_y], num_classes=num_labels) return X_train, train_y, X_dev, dev_y, X_test, test_y, word2idx, label2idx
def keras_mnist_data(): """ retrieve the MNIST database for keras """ # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = mnist.load_data() img_rows, img_cols = 28, 28 # should be cmputed from the data if K.image_dim_ordering() == 'th': X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) else: X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # convert class vectors to binary class matrices nb_classes = len(set(y_train)) Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return (X_train, Y_train), (X_test, Y_test)
chars = sorted(list(set(samples))) concatenated_samples = 'B'+'EB'.join(samples)+'E' X = concatenated_samples[0:(len(concatenated_samples)-1)] Y = concatenated_samples[1:len(concatenated_samples)] chars = sorted(list(set(X))) print('total chars:', len(chars)) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) #Array = np.asarray([char_indices[c] for c in X]) categorized = np_utils.to_categorical([char_indices[c] for c in X]) categorizedY = np_utils.to_categorical([char_indices[c] for c in Y]) decategorized = np.argmax(categorized,axis=1) decoded = ''.join([indices_char[i] for i in decategorized]) def batchify(X,Y,num_batches,batch_size,batch_length): retX = np.ndarray(shape=np.append([num_batches,batch_size,batch_length],X.shape[1:])) retY = np.ndarray(shape=np.append([num_batches,batch_size,batch_length],Y.shape[1:])) for i in range(num_batches): for j in range(batch_size): for k in range(batch_length): retX[i][j][k]=X[j+i*batch_length+k] retY[i][j][k]=Y[j+i*batch_length+k] return retX,retY
if r[3] == 0: test_text.append(processed_text) test_output.append(r[2]) else: train_text.append(processed_text) train_output.append(r[2]) conn.close() tokenizer = Tokenizer() tokenizer.fit_on_texts(train_text) dictionary = tokenizer.word_index train_text = prep_training(train_text, dictionary, tokenizer) encoder = LabelEncoder() train_output = np_utils.to_categorical(encoder.fit_transform(train_output)) model = Sequential() model.add(Dense(512, input_shape=(len(dictionary) + 1, ), activation='relu')) model.add(Dropout(0.5)) model.add(Dense(256, activation='tanh')) model.add(Dropout(0.5)) model.add(Dense(num_categories, activation='sigmoid')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(train_text, train_output, batch_size=32,
import os (X_train, y_train), (X_test, y_test) = mnist.load_data() X_test = X_test.reshape(len(X_test), 784) X_test = X_test.astype('float32') X_test /= 255 # load the model and create predictions on the test set model = load_model("keras_model_1000.h5") model.load_weights("mnist_weight_1000.h5") n_classes = 10 print("Shape before one-hot encoding: ", y_train.shape) Y_train = np_utils.to_categorical(y_train, n_classes) Y_test = np_utils.to_categorical(y_test, n_classes) print("Shape after one-hot encoding: ", Y_train.shape) loss_and_metrics = model.evaluate(X_test, Y_test, verbose=2) print("Test Loss", loss_and_metrics[0]) print("Test Accuracy", loss_and_metrics[1]) predicted_classes = model.predict_classes(X_test) # see which we predicted correctly and which not correct_indices = np.nonzero(predicted_classes == y_test)[0] incorrect_indices = np.nonzero(predicted_classes != y_test)[0] print() print(len(correct_indices), " classified correctly") print(len(incorrect_indices), " classified incorrectly")
def one_hot_encode(y): # one hot encode outputs y = np_utils.to_categorical(y) num_classes = y.shape[1] return y,num_classes
eye_trainingsamples = len(eye_training_list) nose_trainingsamples = len(nose_training_list) eye_traininglabels = numpy.zeros((eye_trainingsamples, ), dtype=int) nose_traininglabels = numpy.zeros((nose_trainingsamples, ), dtype=int) eye_traininglabels[0:66] = 0 eye_traininglabels[66:113] = 1 eye_traininglabels[113:156] = 2 nose_traininglabels[0:66] = 0 nose_traininglabels[66:113] = 1 nose_traininglabels[113:156] = 2 eye_traininglabels = np_utils.to_categorical(eye_traininglabels, 3) nose_traininglabels = np_utils.to_categorical(nose_traininglabels, 3) etraining_data = [eye_training_list, eye_traininglabels] (etrainingframes, etraininglabels) = (etraining_data[0], etraining_data[1]) etraining_set = numpy.zeros((eye_trainingsamples, 1, 32, 32, 18)) for h in range(eye_trainingsamples): etraining_set[h][0][:][:][:] = etrainingframes[h, :, :, :] etraining_set = etraining_set.astype('float32') etraining_set -= numpy.mean(etraining_set) etraining_set /= numpy.max(etraining_set) ntraining_data = [nose_training_list, nose_traininglabels] (ntrainingframes, ntraininglabels) = (ntraining_data[0], ntraining_data[1]) ntraining_set = numpy.zeros((nose_trainingsamples, 1, 32, 32, 18))
1)), ::], cmap=plt.get_cmap("gray")) axs[j][i].axis("off") if i == 2: axs[j][i].set_title(str(j)) num_of_samples.append(len(x_selected)) num_of_samples plt.figure(figsize=(12, 6)) plt.bar(range(0, num_classes), num_of_samples) plt.title("Distribution of the training dataset") plt.xlabel("Class Number") plt.ylabel("Number of images") plt.show() y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) X_train = X_train / 255 X_test = X_test / 255 num_pixels = 784 X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) X_test = X_test.reshape(X_test.shape[0], 28, 28, 1) #Define the model def create_model(): model = Sequential() model.add(Conv2D(30, (5, 5), input_shape=(28, 28, 1), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(15, (3, 3), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2)))
X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # we need a 60K x [1 x 28 x 28] shape as input to the CONVNET X_train = X_train[:, :, :, np.newaxis] X_test = X_test[:, :, :, np.newaxis] print(X_train.shape, 'train samples') print(X_test.shape, 'test samples') print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = np_utils.to_categorical(y_train, NB_CLASSES) y_test = np_utils.to_categorical(y_test, NB_CLASSES) # initialize the optimizer and model model = LeNet.build(input_shape=INPUT_SHAPE, classes=NB_CLASSES) model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
def generate_text(model, num_terzine=33): # Evaluation step (generating text using the learned model) first_char = chr(int(np.random.randint(ord('a'), ord('z') + 1))) # print(first_char) # print(tokenizer.texts_to_sequences(first_char)) # print(tokenizer.texts_to_sequences(first_char)[0]) # Converting our start string to numbers (vectorizing) input_eval = np_utils.to_categorical([ tokenizer.texts_to_sequences(first_char) ], num_classes=n_tokens) # Empty string to store our results text_generated = [] # Low temperatures results in more predictable text. # Higher temperatures results in more surprising text. temperature = 1.5 # given the number of Terzine to write we call the model that return a Terzina # we have to pass as argument of the next call (for the next terzina) the end # of the last generated terzina end = False model.reset_states() for _ in range(num_terzine): # one array for each TrainingLine line_output = [[], [], []] for _ in range(max_line_length): # print("START") # print(input_eval) predictions = model((input_eval, X_syllables[0, 0]), training=False) cont = 0 # print(predictions) # predictions is an array that contains 3 array, each one is a new predicted char for pred in predictions: char = sample(pred[0, 0], temperature) # print("CHAR") # print(char) if char == 1 and not end: end = True if char != 1 and end: next_char = char char = 1 line_output[cont].append(char) cont += 1 # use as input the last predicted line # print("END") input_eval = np_utils.to_categorical([ tokenizer.texts_to_sequences(tokenizer.sequences_to_texts([line_output[2]])[0]) ], num_classes=n_tokens) terzina = [] # print(line_output) for i in range(3): # print(line_output[i]) cleaned_text = tokenizer.sequences_to_texts([ line_output[i] ])[0].strip()[1:].replace( ' ', '\n' ).replace(' ', '').replace('\n', ' ') terzina.append(cleaned_text) # print(terzina) text_generated.append(terzina) return text_generated
# load training and validation data filename_train_set = '../trainingData/train_set_laosheng_phonemeSeg_mfccBands_neighbor.pickle.gz' filename_train_validation_set = '../trainingData/train_set_all_laosheng_phonemeSeg_mfccBands_neighbor.pickle.gz' filename_validation_set = '../trainingData/validation_set_laosheng_phonemeSeg_mfccBands_neighbor.pickle.gz' with gzip.open(filename_train_set, 'rb') as f: X_train, Y_train = cPickle.load(f) with gzip.open(filename_train_validation_set, 'rb') as f: X_train_validation, Y_train_validation = cPickle.load(f) with gzip.open(filename_validation_set, 'rb') as f: X_validation, Y_validation = cPickle.load(f) # X_train = np.transpose(X_train) Y_train = to_categorical(Y_train) Y_train_validation = to_categorical(Y_train_validation) Y_validation = to_categorical(Y_validation) space = { 'choice': hp.choice('num_layers', [{ 'layers': 'two', }, { 'layers': 'three', }]), 'units1': hp.uniform('units1', 64, 512), 'dropout1': hp.uniform('dropout1', .25, .75), 'batch_size': hp.uniform('batch_size', 28, 128), 'nb_epochs': 500, 'optimizer': hp.choice('optimizer', ['adadelta', 'adam']), 'activation': 'relu'
def onehot(x): return to_categorical(x)
numpy.random.seed(seed) # load data (X_train, y_train), (X_test, y_test) = mnist.load_data() # reshape the images to 28x28 X_train = X_train.reshape(X_train.shape[0], 1,28,28).astype('float32') X_test = X_test.reshape(X_test.shape[0], 1,28,28).astype('float32') # normalize inputs from 0-225 to 0-1 X_train = X_train/255 X_test = X_test/255 # turn the vector of possible outputs into a binary matrix # one hot encoding y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) num_classes = y_test.shape[1] # now we can make the neural network # define the baseline model def baseline_model(): # create model model = Sequential() model.add(Conv2D(32, (5,5), input_shape=(1,28,28))) model.add(Activation('relu')) model.add(AveragePooling2D(pool_size=(2,2))) # Dropout helps prevent overfitting model.add(Dropout(0.2)) model.add(Flatten())
df['%s_out' % i] = (df[str(i)] + '\n' + df[str(i+1)].str[0]).str.pad(max_line_length+2, 'right', '\n') max_line_length += 2 inputs = df[['0_in', '1_in', '2_in']].values tokenizer = Tokenizer(filters='', char_level=True) tokenizer.fit_on_texts(inputs.flatten()) n_tokens = len(tokenizer.word_counts) + 1 print(df) # X is the input for each line in sequences of one-hot-encoded values X = np_utils.to_categorical([ tokenizer.texts_to_sequences(inputs[:,i]) for i in range(3) ], num_classes=n_tokens) outputs = df[['0_out', '1_out', '2_out']].values # Y is the output for each line in sequences of one-hot-encoded values Y = np_utils.to_categorical([ tokenizer.texts_to_sequences(outputs[:,i]) for i in range(3) ], num_classes=n_tokens) # X_syllables is the count of syllables for each line X_syllables = df[['0_syllables', '1_syllables', '2_syllables']].values # The latent dimension of the LSTM latent_dim = 2048 model = BasicDanteRNN(latent_dim, n_tokens, tokenizer)
import numpy as np import matplotlib.pyplot as plt from keras.layers import Dense, Activation from keras.models import Sequential from keras.utils import np_utils x = pd.read_csv("./fashion-mnist_test.csv") X_ = np.array(x) X = X_[:,1:] X = X/255.0 y = X_[:,0] print(X.shape, y.shape) y = np_utils.to_categorical(y) print(y.shape, y[:10]) X_train = X[:8000, :] Y_train = y[:8000, :] X_val = X[8000:, :] Y_val = y[8000:, :] model = Sequential() model.add(Dense(256, input_shape=(784,))) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(10))
def train_top_model(num_neurons): train_data = np.load(open('bottleneck_features_train.npy')) train_labels = np.array([0] * (360) + [1] * (225) + [2] * (225) + [3] * (216) + [4] * (198) + [5] * (189) + [6] * (180) + [7] * (180) + [8] * (180) + [9] * (180) + [10] * (171) + [11] * (162) + [12] * (162) + [13] * (162) + [14] * (153) + [15] * (153) + [16] * (153) + [17] * (144) + [18] * (144) + [19] * (144)) t_labels = to_categorical(train_labels) validation_data = np.load(open('bottleneck_features_validation.npy')) validation_labels = np.array([0] * (40) + [1] * (25) + [2] * (25) + [3] * (24) + [4] * (22) + [5] * (21) + [6] * (20) + [7] * (20) + [8] * (20) + [9] * (20) + [10] * (19) + [11] * (18) + [12] * (18) + [13] * (18) + [14] * (17) + [15] * (17) + [16] * (17) + [17] * (16) + [18] * (16) + [19] * (16)) v_labels = to_categorical(validation_labels) #getting dictionary values train_dict = defaultdict(int) for w in train_labels: train_dict[w] += 1 validation_dict = defaultdict(int) for w in validation_labels: validation_dict[w] += 1 #defining the FC model model = Sequential() model.add(Flatten(input_shape=train_data.shape[1:])) model.add(Dense(num_neurons, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(20, activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ratio = (train_dict[0] + validation_dict[0]) / validation_dict[0] accuracy = dict() index_dict = dict() val_index_dict = dict() index_dict[-1] = 0 val_index_dict[-1] = 0 accuracy[num_neurons] = [] for j in train_dict: index_dict[j] = 0 for k in range(0, j + 1): index_dict[j] = index_dict[j] + train_dict[k] for j in validation_dict: val_index_dict[j] = 0 for k in range(0, j + 1): val_index_dict[j] = val_index_dict[j] + validation_dict[k] for i in range(ratio): model.fit(train_data, t_labels, nb_epoch=nb_epoch, batch_size=32, validation_data=(validation_data, v_labels)) y_pred = model.predict_classes(validation_data) acc = accuracy_score(validation_labels, y_pred) accuracy[num_neurons].append(acc) if i < 9: for j in range(len(index_dict) - 1): train_data[ index_dict[j - 1] + i * validation_dict[j]:index_dict[j - 1] + (i + 1) * validation_dict[j], :, :, :] = train_data[ index_dict[j - 1] + i * validation_dict[j]:index_dict[j - 1] + (i + 1) * validation_dict[j], :, :, :] + validation_data[ val_index_dict[j - 1]:val_index_dict[j], :, :, :] validation_data[val_index_dict[ j - 1]:val_index_dict[j], :, :, :] = train_data[ index_dict[j - 1] + i * validation_dict[j]:index_dict[j - 1] + (i + 1) * validation_dict[j], :, :, :] - validation_data[ val_index_dict[j - 1]:val_index_dict[j], :, :, :] train_data[ index_dict[j - 1] + i * validation_dict[j]:index_dict[j - 1] + (i + 1) * validation_dict[j], :, :, :] = train_data[ index_dict[j - 1] + i * validation_dict[j]:index_dict[j - 1] + (i + 1) * validation_dict[j], :, :, :] - validation_data[ val_index_dict[j - 1]:val_index_dict[j], :, :, :] return accuracy
def temp_network(filePath, number_of_con_filters, con_step_length, max_pooling_feature_map_size, number_of_full_layer_nodes, learning_ratio, train_decay): #get the train data, train label, validate data, validate label, test data, test label train_dataset, valid_dataset, test_dataset = loadData(filePath + ".mat") #the dimension of the input signal's chanel channel_length = train_dataset[0].shape[1] sample_counts = train_dataset[0].shape[0] # train_dataset, test_dataset = imdb.load_data() #initialize parameters layer1_input_length = len(test_dataset[0][0]) con_filter_length = int((math.ceil( (layer1_input_length / con_step_length) / 9)) * con_step_length) destinations = numpy.max(test_dataset[1]) ############################# #Network Information Display# ############################# print("The network have ", channel_length, "input nodes in the 1st layer.") print("The amount of samples in the dataset is ", sample_counts) print("The number of classification classes is ", destinations) print("The size of the first convolutional layer is ", layer1_input_length) print('The number of convolutional filters is ', number_of_con_filters, ",each kernel sizes ", con_filter_length, "X1.") print("There are ", number_of_full_layer_nodes, " nodes in the fully connect layer.") ######################### #Construct the CNN model# ######################### model = Sequential() #the first convolutional layer layer1 = Convolution2D(number_of_con_filters, nb_row=con_filter_length, nb_col=1, border_mode='valid', subsample=(1, 1), dim_ordering='th', bias=True, input_shape=(1, layer1_input_length, 1)) print("The input to the first convolutional layer shapes", (1, layer1_input_length, 1)) model.add(layer1) model.add(Activation('tanh')) #the max pooling layer after the first convolutional layer first_feature_map_size = (layer1_input_length - con_filter_length) / con_step_length + 1 max_pooling_kernel_size = int( math.ceil(first_feature_map_size / max_pooling_feature_map_size)) print("The max pooling kernel size is ", max_pooling_kernel_size) layer2 = MaxPooling2D(pool_size=(max_pooling_kernel_size, 1), strides=(max_pooling_kernel_size, 1), border_mode='valid', dim_ordering='th') model.add(layer2) #Flatten the variables outputed from maxpooling layer model.add(Flatten()) #the fully connected layer layer3 = Dense(number_of_full_layer_nodes, bias=True) model.add(layer3) model.add(Activation('tanh')) #the activation layer which will output the final classification result layer4 = Dense(destinations + 1, activation='tanh', bias=True) # layer4 = Activation('tanh') model.add(layer4) layer5 = Activation('softmax') model.add(layer5) #the optimizer sgd = SGD(lr=learning_ratio, decay=train_decay, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) #train the constructed model #the input shape of the train_dataset should be a list. # train_dataset_data = train_dataset[0].tolist() # test_dataset_data = test_dataset[0].tolist() # train_dataset_data = [] # for tempTrainMark in range(train_dataset[0].shape[0]): # train_dataset_data.append(train_dataset[0][tempTrainMark]) # train_dataset_data = numpy.array(train_dataset_data) # print("The training dataset have ", len(train_dataset_data), "items, each item is a ", len(train_dataset_data[0]), "dimention vector.") # print("There are ", len(train_dataset[1]),"labels in training dataset, the match between labels and item numbers is:", len(train_dataset[1]) == len(train_dataset_data), ".") # test_dataset_data = [] # for tempTestMark in range(test_dataset[0].shape[0]): # test_dataset_data.append(test_dataset[0][tempTestMark]) # test_dataset_data = numpy.array(test_dataset_data) # train_dataset_data = numpy.array(train_dataset_data[:len(train_dataset_data)]) # test_dataset_data = numpy.array(test_dataset_data[:len(test_dataset_data)]) # train_dataset_data = sequence.pad_sequences(train_dataset_data, ) # train_dataset_data = train_dataset[0] # train_dataset_data = numpy.expand_dims(train_dataset[0], 1) train_dataset_data = train_dataset[0].reshape(train_dataset[0].shape[0], 1, train_dataset[0].shape[1], 1) train_dataset_label = np_utils.to_categorical(train_dataset[1]) print("The dataset used to train the model shapes ", train_dataset_data.shape) print("The label corresponding to the train data shapes ", train_dataset_label.shape) # print(train_dataset_data) #prepare the validation dataset validation_dataset_data = valid_dataset[0].reshape( valid_dataset[0].shape[0], 1, valid_dataset[0].shape[1], 1) validation_dataset_label = np_utils.to_categorical(valid_dataset[1]) validation_data = (validation_dataset_data, validation_dataset_label) history = model.fit(train_dataset_data, train_dataset_label, batch_size=10, nb_epoch=100, verbose=1, validation_data=validation_data, shuffle=True) model.save_weights(filePath + 'Model.h5', overwrite=True) sio.savemat(filePath + "Result.mat", { 'loss': history.history['loss'], 'accuracy': history.history['acc'] }) #test the model #prepare the testing dataset as the training dataset does test_dataset_data = test_dataset[0].reshape(test_dataset[0].shape[0], 1, test_dataset[0].shape[1], 1) test_dataset_label = np_utils.to_categorical(test_dataset[1]) classes = model.predict_classes(test_dataset_data, verbose=1) test_dataset_label = test_dataset[1].astype(numpy.int) print("对测试数据集的预测结果为:", classes) print("测试数据集中的真实结果为:", test_dataset_label) print("一共得到测试结果", len(classes), "个,一共有", len(test_dataset_label), "个.") count = 0 correctCount = 0 # comparasion = zip(classes, test_dataset_label) # print(comparasion) # for x,y in range(classes) # if() test_accuracy = numpy.mean(numpy.equal(test_dataset_label, classes)) print("The correct ratio of the trained CNN model is ", test_accuracy) return classes, test_dataset_label
print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print('Distribution of Training Classes:', np.bincount(y_train)) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_valid = X_valid.astype('float32') X_Pool = X_Pool.astype('float32') X_train /= 255 X_valid /= 255 X_Pool /= 255 X_test /= 255 Y_test = np_utils.to_categorical(y_test, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_Pool = np_utils.to_categorical(y_Pool, nb_classes) #loss values in each experiment Pool_Valid_Loss = np.zeros(shape=(nb_epoch, 1)) Pool_Train_Loss = np.zeros(shape=(nb_epoch, 1)) Pool_Valid_Acc = np.zeros(shape=(nb_epoch, 1)) Pool_Train_Acc = np.zeros(shape=(nb_epoch, 1)) x_pool_All = np.zeros(shape=(1)) Y_train = np_utils.to_categorical(y_train, nb_classes) print('Training Model Without Acquisitions in Experiment', e)
file.close() mean_image0 = np.mean(X_train[:, 0, :, :]) mean_image1 = np.mean(X_train[:, 1, :, :]) mean_image2 = np.mean(X_train[:, 2, :, :]) X_train[:, 0, :, :] -= mean_image0 X_train[:, 1, :, :] -= mean_image1 X_train[:, 2, :, :] -= mean_image2 index = np.arange(X_train.shape[0]) np.random.shuffle(index) X_train = X_train[index] y_true = y_true[index] tempmap = np.copy(index) Y_true = np_utils.to_categorical(y_true, nb_classes) #parameters batch_size = 32 epoch = 50 nb_epoch = 10 upper = 0.99 lower = 0.75 th = upper eta = (upper - lower) / epoch nb = 1000 #model #============================================================================== # X_train = np.transpose(X_train, (0,2,3,1)) #============================================================================== inp_ = Input(shape=(img_channels, img_rows, img_cols))
import numpy as np from keras.datasets import mnist from keras.utils import np_utils from keras.models import Sequential from keras.layers import Dense,Activation,Convolution2D,MaxPooling2D,Flatten from keras.optimizers import Adam np.random.seed(123) # X (6,000 28*28) Y (10,000, ) (X_train,Y_train),(X_test,Y_test) = mnist.load_data() X_train = X_train.reshape(-1,1,28,28) X_test = X_test.reshape(-1,1,28,28) Y_train = np_utils.to_categorical(Y_train,num_classes=10) ### one hot Y_test = np_utils.to_categorical(Y_test,num_classes=10) ### 0000000001 ===>9 model = Sequential() model.add(Convolution2D(filters=32, kernel_size=(5,5), padding ='same', input_shape=(1,28,28))) model.add(Activation('relu')) model.add(MaxPooling2D( pool_size=(2,2), strides = (2,2), padding = 'same')) model.add(Convolution2D(64,(5,5),padding='same')) model.add(Activation('relu')) model.add(MaxPooling2D(
model.add(Activation('softmax')) return model batch_size = 128 #BATCH SIZE OF TRAINING AND TESTING nb_classes = 10 #NUMBER OF CLASSES nb_epoch = 50 #NUMBER OF EPOCHS DURING CASCADE TRAINING lr = 0.01 #INITIAL LEARNING RATE weightDecay = 10.e-4 #WEIGHT DECAY OF THE TRAINING PROCEDURE sgd = SGD(lr=lr, momentum=0.9) #OPTIMIZER saveResults = False (X_train, y_train), (X_test, y_test) = cifar10.load_data() #LOAD DATA stringOfHistory = './TheAllCNN_Results/theAllCNN_BiggerNet_NoDropout' print(stringOfHistory) Y_train = np_utils.to_categorical(y_train, nb_classes) #CONVERT CLASS VECTORNS INTO AN OUTPUT MATRIX Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 outNeurons = 256 #SMALLER SET FOR DEBUGGING PURPOSES # X_train = X_train[0:100] # Y_train = Y_train[0:100] # X_test = X_test[0:100] # Y_test = Y_test[0:100] #GET VALIDATION DATA FROM TRAINING SET
max_features = 50000 maxlen = 1000 batch_size = 32 embedding_dims = 80 epochs = 10 print('Loading data...') num_train = 11314 num_test = 7532 f = codecs.open('../temp/texts.pkl', 'rb') texts = pickle.load(f) f.close() tokenizer = Tokenizer(num_words=max_features) tokenizer.fit_on_texts(texts[:num_train]) sequences = np.asarray(tokenizer.texts_to_sequences(texts)) y_train = to_categorical(np.load('../temp/Ytrain.npy')) y_test = to_categorical(np.load('../temp/Ytest.npy')) x_train = sequences[:num_train] x_test = sequences[num_train:] indice1 = np.arange(num_train) np.random.shuffle(indice1) x_train = x_train[indice1] y_train = y_train[indice1] indice2 = np.arange(num_test) np.random.shuffle(indice2) x_test = x_test[indice2] y_test = y_test[indice2] print(len(x_train), 'train sequences')
toWrite = '' for inst in X_test: toWrite += inst toWrite += '\n' file = open(_X_TEST_FILE, 'w') file.write(str(toWrite)) file.close() print("WRITTING DONE") encoder = LabelEncoder() Y_train = [ _healpy.latlon2healpix( i[0] , i[1] , math.pow(4 , 6) ) for i in Y_train ] Y_test = [ _healpy.latlon2healpix( i[0] , i[1] , math.pow(4 , 6) ) for i in Y_test ] fit_trans = encoder.fit_transform( Y_train + Y_test ) _encoder = np_utils.to_categorical(fit_trans) _newenconder = _encoder.astype(int) _NUMCLASSES = len(_newenconder[0]) print('NUM OF CLASSES --->', _NUMCLASSES ) Y_train = _newenconder[:-len(Y_test)] Y_test = _newenconder[-len(Y_test):] pickle.dump(encoder, open(_ENCODER, 'wb')) model = classification_model(_NUMCLASSES) opt = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, split=' ') tokenizer.fit_on_texts(df['question'].values) X = tokenizer.texts_to_sequences(df['question'].values) X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH) Y = df['type'] with open(VECTORIZER_PATH, 'wb') as fil: pkl.dump(tokenizer, fil) word_index = tokenizer.word_index print('Found %s unique tokens.' % len(word_index)) le = preprocessing.LabelEncoder() le.fit(Y) Y = le.transform(Y) labels = to_categorical(np.asarray(Y)) with open(LABEL_ENCODER_PATH, 'wb') as fil: pkl.dump(le, fil) # split the data into a training set and a validation set indices = np.arange(X.shape[0]) np.random.shuffle(indices) X = X[indices] labels = labels[indices] nb_validation_samples = int(VALIDATION_SPLIT * X.shape[0]) x_train = X[:-nb_validation_samples] y_train = labels[:-nb_validation_samples] x_val = X[-nb_validation_samples:] y_val = labels[-nb_validation_samples:]
random_state=0, stratify=df['Class']) X_train = train.drop(['Class'], axis=1) X_test = test.drop(['Class'], axis=1) Y_train = train['Class'] Y_test = test['Class'] print('X_train: {}'.format(X_train.shape)) print('Y_train: {}'.format(Y_train.shape)) print('X_test: {}'.format(X_test.shape)) print('Y_test: {}'.format(Y_test.shape)) print(Y_train.head()) Y_train = to_categorical(Y_train, num_classes=2) Y_test = to_categorical(Y_test, num_classes=2) np.random.seed(0) model = Sequential() model.add(Dense(64, input_dim=30, activation='relu')) model.add(Dropout(0.9)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.9)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.9)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.9)) model.add(Dense(2, activation='softmax'))
def main(): print("===================================================================") print("モデル学習 Keras 利用版") print("指定した画像ファイルをもとに学習を行いモデルを生成します。") print("===================================================================") # ディレクトリの作成 if not os.path.isdir(OUTPUT_MODEL_DIR): os.mkdir(OUTPUT_MODEL_DIR) # ディレクトリ内のファイル削除 delete_dir(OUTPUT_MODEL_DIR, False) num_classes = 2 batch_size = 32 epochs = 30 # 学習用の画像ファイルの読み込み train_file_list = load_images(TRAIN_IMAGE_DIR) # 学習用の画像ファイルのラベル付け x_train, y_train = labeling_images(train_file_list) # plt.imshow(x_train[0]) # plt.show() # print(y_train[0]) # テスト用の画像ファイルの読み込み test_file_list = load_images(TEST_IMAGE_DIR) # 学習用の画像ファイルのラベル付け x_test, y_test = labeling_images(test_file_list) # plt.imshow(x_test[0]) # plt.show() # print(y_test[0]) # 画像とラベルそれぞれの次元数を確認 print("x_train.shape:", x_train.shape) print("y_train.shape:", y_train.shape) print("x_test.shape:", x_test.shape) print("y_test.shape:", y_test.shape) # クラスラベルの1-hotベクトル化(線形分離しやすくする) y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) # 画像とラベルそれぞれの次元数を確認 print("x_train.shape:", x_train.shape) print("y_train.shape:", y_train.shape) print("x_test.shape:", x_test.shape) print("y_test.shape:", y_test.shape) # モデルの定義 model = Sequential() # 画像に対して空間的畳み込みを行い、2次元の畳み込みレイヤーを作成する # 下記であれば、32通りの3×3のフィルタを用いて32通りの出力をもとに活性化関数(ReLU)を利用して特徴量(重み)を計算 # input_shape 入力データのサイズ 64 x 64 x 3(RGB) # filters フィルタ(カーネル)の数(出力数の次元) # kernel_size フィルタ(カーネル)のサイズ数.3x3とか5x5とか奇数正方にすることが一般的 # strides ストライドの幅(フィルタを動かすピクセル数) # padding データの端の取り扱い方(入力データの周囲を0で埋める(ゼロパディング)ときは'same',ゼロパディングしないときは'valid') # activation 活性化関数 model.add(Conv2D(input_shape=(64, 64, 3), filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", activation='relu')) # 2x2の4つの領域に分割して各2x2の行列の最大値をとることで出力をダウンスケールする # パラメータはダウンスケールする係数を決定する2つの整数のタプル # 各領域内の位置の違いを無視するためモデルが小さな位置変化に対して頑健(robust)となる model.add(MaxPooling2D(pool_size=(2, 2))) # 畳み込み2 model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", activation='relu')) # 出力のスケールダウン2 model.add(MaxPooling2D(pool_size=(2, 2))) # ドロップアウト1 model.add(Dropout(0.01)) # 畳み込み3 model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", activation='relu')) # 出力のスケールダウン3 model.add(MaxPooling2D(pool_size=(2, 2))) # ドロップアウト2 model.add(Dropout(0.05)) # 全結合層(プーリング層の出力は4次元テンソルであるため1次元のベクトルに変換) model.add(Flatten()) # 予測用のレイヤー1 model.add(Dense(512, activation='sigmoid')) # 予測用のレイヤー2 model.add(Dense(128, activation='sigmoid')) # 予測用のレイヤー3 model.add(Dense(num_classes, activation='softmax')) # コンパイル model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) # サマリーの出力 model.summary() # モデルの可視化 plot_file_path = os.path.join(OUTPUT_MODEL_DIR, OUTPUT_PLOT_FILE) plot_model(model, to_file=plot_file_path, show_shapes=True) if OUTPUT_MODEL_ONLY: # 学習 model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs) else: # 学習+グラフ history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # 汎化精度の評価・表示 test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) print(f"validation loss:{test_loss}\r\nvalidation accuracy:{test_acc}") # acc(精度), val_acc(バリデーションデータに対する精度)のプロット plt.plot(history.history["acc"], label="acc", ls="-", marker="o") plt.plot(history.history["val_acc"], label="val_acc", ls="-", marker="x") plt.title('model accuracy') plt.xlabel("epoch") plt.ylabel("accuracy") plt.legend(loc="best") plt.show() # 損失の履歴をプロット plt.plot(history.history['loss'], label="loss", ls="-", marker="o") plt.plot(history.history['val_loss'], label="val_loss", ls="-", marker="x") plt.title('model loss') plt.xlabel('epoch') plt.ylabel('loss') plt.legend(loc='lower right') plt.show() # モデルを保存 model_file_path = os.path.join(OUTPUT_MODEL_DIR, OUTPUT_MODEL_FILE) model.save(model_file_path) return RETURN_SUCCESS
import numpy as np import re import itertools from collections import Counter from keras.utils import np_utils import config_reader import utils from rnn_model import * config = config_reader.read_config(utils.abs_path_of("config/default.ini")) train_file = config.get_rel_path("PATHS", "training_file") validation_file = config.get_rel_path("PATHS", "validation_file") trainX = np.asarray(unpickle(train_file)['data']) trainY = np.asarray(unpickle(train_file)['label']) trainY = np_utils.to_categorical(trainY, 2) ValidX = np.asarray(unpickle(validation_file)['data']) ValidY = np.asarray(unpickle(validation_file)['label']) ValidY = np_utils.to_categorical(ValidY, 2) def objective(args): params = {} params['rnn_num_layers'] = args['rnn_num_layers'] params['rnn_num_hidden'] = args['rnn_num_hidden'] params['learning_rate'] = args['learning_rate'] params['rnn_batch_size'] = args['rnn_batch_size'] params['dropout_keep_probability'] = args['dropout_keep_probability'] params['validation_window'] = args['validation_window']
print(df.shape) y_user_id = df.iloc[:, 0].as_matrix() y_phone_id = df['phone id@{1,2,3,4,5}'].as_matrix() y_doc_id = df['doc id@{1,2,3,4,5,6,7}'].as_matrix() df.drop(df.columns[[0]], axis=1, inplace=True) df.drop(['phone id@{1,2,3,4,5}', 'doc id@{1,2,3,4,5,6,7}'], axis=1, inplace=True) ################################################################################################## encoder_user_id = LabelEncoder() encoder_user_id.fit(y_user_id) encoded_Y_user_id = encoder_user_id.transform(y_user_id) dummy_Y_user_id = np_utils.to_categorical(encoded_Y_user_id) model = Sequential() model.add( Dense(500, input_dim=X.shape[1], activation='hard_sigmoid', kernel_initializer='uniform')) model.add(Dropout(0.01)) model.add(Dense(1000, activation='hard_sigmoid', kernel_initializer='uniform')) model.add(Dropout(0.01)) model.add(Dense(500, activation='hard_sigmoid', kernel_initializer='uniform')) model.add(Dropout(0.01)) model.add( Dense(dummy_Y_user_id.shape[1], activation='softmax',
plt.imshow(X_train[8], interpolation="bicubic") plt.grid(False) plt.subplot(144) plt.imshow(X_train[12], interpolation="bicubic") plt.grid(False) plt.show() # 자료형 변환 및 스케일링 X_train = X_train.astype('float32') / 255.0 X_test = X_test.astype('float32') / 255.0 print(X_train.shape, X_train.dtype) # one - hot - encoding Y_train = np_utils.to_categorical(y_train0, 100) Y_test = np_utils.to_categorical(y_test0, 100) Y_train[:4] # model 구현 seed = 0 np.random.seed(seed) tf.set_random_seed(seed) model = Sequential() model.add( Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3), activation='relu'))
fpr = dict() tpr = dict() roc_auc = dict() for i in range(nb): # ( actual labels, predicted probabilities ) fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_prediction[:, i]) # flip here roc_auc[i] = auc(fpr[i], tpr[i]) return [round(roc_auc[x], 3) for x in range(nb)] nb_classes = 2 y_test2 = pd.Series.as_matrix(dataTest.loc[:, 'SURV2']) y_test2 = np_utils.to_categorical(y_test2, nb_classes) y_pred2 = np.row_stack([1 - y_pred, y_pred]).T ROC2 = AUC(y_test2, y_pred2, nb_classes) print('AUC:', ROC2[1]) #AUC acc = accuracy_score(np.asarray(y_test), np.round(y_pred), normalize=False) print('Accuracy:', acc) def AUCalt(test_labels, test_prediction): # convert to non-categorial test_prediction = np.array([x[1] for x in test_prediction])
seq_length = 100 dataX = [] dataY = [] for i in range(0, n_chars - seq_length, 1): seq_in = raw_text[i:i + seq_length] seq_out = raw_text[i + seq_length] dataX.append([char_to_int[char] for char in seq_in]) dataY.append(char_to_int[seq_out]) n_patterns = len(dataX) print "Total Patterns: ", n_patterns # reshape X to be [samples, time steps, features] X = numpy.reshape(dataX, (n_patterns, seq_length, 1)) # normalize X = X / float(n_vocab) # one hot encode the output variable y = np_utils.to_categorical(dataY) # define the LSTM model model = Sequential() model.add( LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True)) model.add(Dropout(0.2)) #model.add(LSTM(256, return_sequences=True)) #model.add(Dropout(0.2)) #model.add(LSTM(128, return_sequences=True)) #model.add(Dropout(0.2)) model.add(LSTM(256)) model.add(Dropout(0.2)) model.add(Dense(y.shape[1], activation='softmax')) # load the network weights #filename = "weights/weights-improvement-most-recent.hdf5" #filename = "weights/multilayer-weights.hdf5"
print(y_train[0]) ###################### preprocess data #################### ######## 句子长度最长设置为20 ######## max_len = 20 ######## 对文本进行填充,将文本转成相同长度 ######## x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=max_len) x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=max_len) print(x_train.shape) print(x_train[0]) ######## 对label做one-hot处理 ######## num_class = 2 y_train = to_categorical(y_train, num_class) y_test = to_categorical(y_test, num_class) print(y_train.shape) print(y_train[0]) ###################### build network #################### ######## word dim 词向量维度 ######## word_dim = 8 ######## network structure ######## model = Sequential() #### Embedding层 #### model.add(Embedding(input_dim=1000, output_dim=word_dim, input_length=max_len))