def train(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None): """ Standard neural network training procedure. """ model = Sequential() print(data.train_data.shape) model.add(Conv2D(params[0], (3, 3), input_shape=data.train_data.shape[1:])) model.add(Activation('relu')) model.add(Conv2D(params[1], (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(params[2], (3, 3))) model.add(Activation('relu')) model.add(Conv2D(params[3], (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(params[4])) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(params[5])) model.add(Activation('relu')) model.add(Dense(10)) if init != None: model.load_weights(init) def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) if file_name != None: model.save(file_name) return model
def train(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9): """ Train a 2-layer simple network for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() # reshape the input (28*28*1) or (32*32*3) to 1-D model.add(Flatten(input_shape=data.train_data.shape[1:])) # first dense layer (the hidden layer) model.add(Dense(params[0])) # \alpha = 10 in softplus, multiply input by 10 model.add(Lambda(lambda x: x * 10)) # in Keras the softplus activation cannot set \alpha model.add(Activation('softplus')) # so manually add \alpha to the network model.add(Lambda(lambda x: x * 0.1)) # the output layer, with 10 classes model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) # initiate the SGD optimizer with given hyper parameters sgd = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) # run training with given dataset, and print progress model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return model
class LSTM(object): """Long Short Term Memory Regressor Class""" def __init__(self, layers, pct_dropout=0.2): """Build computational graph model Parameters ---------- layers: list | [input, hidden_1, hidden_2, output] Dimensions of each layer pct_dropout: float | 0.0 to 1.0 Percentage of dropout for hidden LSTM layers Returns ------- model: keras.Model Compiled keras sequential model """ if not isinstance(layers, list): raise TypeError( 'layers was expected to be of type %s, received %s' % (type([]), type(layers))) if len(layers) != 4: raise ValueError('4 layer dimentions required, received only %d' % len(layers)) self.model = Sequential() self.model.add( _LSTM(layers[1], input_shape=(layers[1], layers[0]), return_sequences=True, dropout=pct_dropout)) self.model.add( _LSTM(layers[2], return_sequences=False, dropout=pct_dropout)) self.model.add(Dense(layers[3], activation='linear')) self.model.compile(loss="mse", optimizer="rmsprop") def fit(self, X, y, **kwargs): """Train the model""" self.model.fit(X, y, **kwargs) def predict(self, series): """Prediction using provided series""" return self.model.predict(series)
def train(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9): """ Train a n-layer simple network for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() # reshape the input (28*28*1) or (32*32*3) to 1-D model.add(Flatten(input_shape=data.train_data.shape[1:])) # dense layers (the hidden layer) for param in params: model.add(Dense(param)) # ReLU activation model.add(Activation('relu')) # the output layer, with 10 classes model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) # initiate the SGD optimizer with given hyper parameters sgd = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format( len(params) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return {'model': model, 'history': history}
def train(data, file_name, params, num_epochs=50, batch_size=256, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9, activation="relu", optimizer_name="sgd"): """ Train a n-layer simple network for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() # reshape the input (28*28*1) or (32*32*3) to 1-D model.add(Flatten(input_shape=data.train_data.shape[1:])) # dense layers (the hidden layer) n = 0 for param in params: n += 1 model.add(Dense(param, kernel_initializer='he_uniform')) # ReLU activation if activation == "arctan": model.add(Lambda(lambda x: tf.atan(x), name=activation+"_"+str(n))) else: model.add(Activation(activation, name=activation+"_"+str(n))) # the output layer, with 10 classes model.add(Dense(10, kernel_initializer='he_uniform')) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted/train_temp) if optimizer_name == "sgd": # initiate the SGD optimizer with given hyper parameters optimizer = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) elif optimizer_name == "adam": optimizer = Adam(lr=lr, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay=decay, amsgrad=False) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=optimizer, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format(len(params) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) print('model saved to ', file_name) return {'model':model, 'history':history}
# Adding the second hidden layer classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu')) # Adding the output layer classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) # Compiling the ANN classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size=10, epochs=100, validation_split=0.1) # Part 3 - Making predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print(cm) backend.clear_session()
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() self.x_vail = [] self.y_vail = [] self.train_filepath = '' self.train_img_filepath = '' self.valid_filepath = '' self.valid_img_filepath = '' self.test_img_filepath = '' self.test_addition_img_filepath = '' self.test_img_name_list = '' self.y_map = {} def setTrainFilePath(self, value): self.train_filepath = value def getTrainFilePath(self): return self.train_filepath def setValidFilePath(self, value): self.valid_filepath = value def getValidFilePath(self): return self.valid_filepath def setTrainImgFilePath(self, value): self.train_img_filepath = value def getTrainImgFilePath(self): return self.train_img_filepath def setValidImgFilePath(self, value): self.valid_img_filepath = value def getValidImgFilePath(self): return self.valid_img_filepath def setTestImgFilePath(self, value): self.test_img_filepath = value def getTestImgFilePath(self): return self.test_img_filepath def setTestAdditionImgFilePath(self, value): self.test_addition_img_filepath = value def getTestAdditionImgFilePath(self): return self.test_addition_img_filepath def getTestImgNameList(self): return self.test_img_name_list def getYMap(self): return self.y_map def vgg(self, type=16, bn=False, img_size=(224, 224), img_channels=3, output_size=1000): if type == 16 and bn == False: layer_list = vgg.vgg16(num_classes=output_size) elif type == 16 and bn == True: layer_list = vgg.vgg16_bn(num_classes=output_size) elif type == 11 and bn == False: layer_list = vgg.vgg11(num_classes=output_size) elif type == 11 and bn == True: layer_list = vgg.vgg11_bn(num_classes=output_size) elif type == 13 and bn == False: layer_list = vgg.vgg13(num_classes=output_size) elif type == 13 and bn == True: layer_list = vgg.vgg13_bn(num_classes=output_size) elif type == 19 and bn == False: layer_list = vgg.vgg19(num_classes=output_size) elif type == 19 and bn == True: layer_list = vgg.vgg19_bn(num_classes=output_size) else: print("请输入11,13,16,19这四个数字中的一个!") self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) for i, value in enumerate(layer_list): self.classifier.add(eval(value)) def squeezenet(self, type, img_size=(64, 64), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 1: x = squeezenet.squeezenet1_0(input_shape, num_classes=output_size) elif type == 1.1: x = squeezenet.squeezenet1_1(input_shape, num_classes=output_size) else: print("请输入1,1.0这两个数字中的一个!") model = Model(inputs=input_shape, outputs=x) self.classifier = model def resnet(self, type, img_size=(64, 64), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 18: x = resnet.resnet18(input_shape, num_classes=output_size) elif type == 34: x = resnet.resnet34(input_shape, num_classes=output_size) elif type == 50: x = resnet.resnet50(input_shape, num_classes=output_size) elif type == 101: x = resnet.resnet101(input_shape, num_classes=output_size) elif type == 152: x = resnet.resnet152(input_shape, num_classes=output_size) else: print("请输入18,34,50,101,152这五个数字中的一个!") return model = Model(inputs=input_shape, outputs=x) self.classifier = model def inception(self, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) x = inception.inception_v3(input_shape, num_classes=output_size, aux_logits=True, transform_input=False) model = Model(inputs=input_shape, outputs=x) self.classifier = model def densenet(self, type, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 161: x = densenet.densenet161(input_shape, num_classes=output_size) elif type == 121: x = densenet.densenet121(input_shape, num_classes=output_size) elif type == 169: x = densenet.densenet169(input_shape, num_classes=output_size) elif type == 201: x = densenet.densenet201(input_shape, num_classes=output_size) else: print("请输入161,121,169,201这四个数字中的一个!") return model = Model(inputs=input_shape, outputs=x) self.classifier = model def alexnet(self, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) x = alexnet.alexnet(input_shape, num_classes=output_size) model = Model(inputs=input_shape, outputs=x) self.classifier = model def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add( Conv2D(32, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(64, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(128, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(128, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(256, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(256, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(512, activation='relu')) self.classifier.add(BatchNormalization()) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score2(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) result_threshold_list_final, score_result = self.grid_search_best_threshold( y_valid, np.array(p_valid)) return result_threshold_list_final, score_result def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def grid_search_best_threshold(self, y_valid, p_valid): threshold_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] result_threshold_list_temp = [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ] result_threshold_list_final = [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ] for i in range(17): score_result = 0 for j in range(9): result_threshold_list_temp[i] = threshold_list[j] score_temp = fbeta_score(y_valid, p_valid > result_threshold_list_temp, beta=2, average='samples') if score_result < score_temp: score_result = score_temp result_threshold_list_final[i] = threshold_list[j] result_threshold_list_temp[i] = result_threshold_list_final[i] return result_threshold_list_final, score_result def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) self.x_vail = X_valid self.y_vail = y_valid opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit( X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def train_model_generator(self, generator_train, generator_valid, learn_rate=0.001, epoch=5, batchSize=128, steps=32383, validation_steps=8096, train_callbacks=()): history = LossHistory() #valid 8096 32383 opt = Adam(lr=learn_rate) steps = steps / batchSize + 1 - 9 validation_steps = validation_steps / batchSize + 1 if steps % batchSize == 0: steps = steps / batchSize - 9 if validation_steps % batchSize == 0: validation_steps = validation_steps / batchSize print(steps, validation_steps) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit_generator( generator_train, steps_per_epoch=steps, epochs=epoch, verbose=1, validation_data=generator_valid, validation_steps=validation_steps, callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def generate_trainOrValid_img_from_file(self, train_set_folder, train_csv_file, img_resize=(32, 32), batchSize=128, process_count=cpu_count()): labels_df = pd.read_csv(train_csv_file) labels = sorted( set( chain.from_iterable( [tags.split(" ") for tags in labels_df['tags'].values]))) labels_map = {l: i for i, l in enumerate(labels)} files_path = [] tags_list = [] for file_name, tags in labels_df.values: files_path.append('{}/{}.jpg'.format(train_set_folder, file_name)) tags_list.append(tags) X = [] Y = [] iter_num = 1 self.y_map = {v: k for k, v in labels_map.items()} with ThreadPoolExecutor(process_count) as pool: for img_array, targets in tqdm(pool.map( self._train_transform_to_matrices, [(file_path, tag, labels_map, img_resize) for file_path, tag in zip(files_path, tags_list)]), total=len(files_path)): if iter_num % batchSize == 0: X = [] Y = [] iter_num = 0 X.append(img_array) Y.append(targets) iter_num += 1 if iter_num == batchSize: print(iter_num) yield (np.array(X), np.array(Y)) def _train_transform_to_matrices(self, *args): file_path, tags, labels_map, img_resize = list(args[0]) img = Image.open(file_path) img.thumbnail(img_resize) img_array = np.asarray(img.convert("RGB"), dtype=np.float32) / 255 targets = np.zeros(len(labels_map)) for t in tags.split(' '): targets[labels_map[t]] = 1 return img_array, targets def generate_test_img_from_file(self, test_set_folder, img_resize=(32, 32), batchSize=128, process_count=cpu_count()): x_test = [] x_test_filename = [] files_name = os.listdir(test_set_folder) X = [] Y = [] iter_num = 1 with ThreadPoolExecutor(process_count) as pool: for img_array, file_name in tqdm(pool.map( _test_transform_to_matrices, [(test_set_folder, file_name, img_resize) for file_name in files_name]), total=len(files_name)): x_test.append(img_array) x_test_filename.append(file_name) self.test_img_name_list = x_test_filename if iter_num % batchSize == 0: X = [] Y = [] iter_num = 0 X.append(img_array) Y.append(targets) iter_num += 1 if iter_num == batchSize: print(iter_num) yield (np.array(X), np.array(Y)) def _test_transform_to_matrices(self, *args): test_set_folder, file_name, img_resize = list(args[0]) img = Image.open('{}/{}'.format(test_set_folder, file_name)) img.thumbnail(img_resize) # Convert to RGB and normalize img_array = np.array(img.convert("RGB"), dtype=np.float32) / 255 return img_array, file_name def save_weights(self, weight_file_path): self.classifier.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.classifier.load_weights(weight_file_path) def setBestThreshold(self): result_threshold_list_final, score_result = self._get_fbeta_score2( self.classifier, self.x_vail, self.y_vail) print('最好得分:{}'.format(score_result)) print('最好的阈值:{}'.format(result_threshold_list_final)) return result_threshold_list_final def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def predict_generator(self, generator): predictions = self.classifier.predcit_generator(generator) return predictions def map_predictions(self, predictions, labels_map, thresholds): predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
import numpy as np from tensorflow.contrib.keras.api.keras.models import Sequential from tensorflow.contrib.keras.api.keras.layers import Dense, Activation from tensorflow.contrib.keras.api.keras.optimizers import SGD, Adam #import matplotlib.pyplot as plt data = np.loadtxt('sin.csv', delimiter=',', unpack=True) x = data[0] y = data[1] model = Sequential() model.add(Dense(30, input_shape=(1, ))) model.add(Activation('sigmoid')) model.add(Dense(40)) model.add(Activation('sigmoid')) model.add(Dense(1)) sgd = Adam(lr=0.1) model.compile(loss='mean_squared_error', optimizer=sgd) model.fit(x, y, epochs=1000, batch_size=20, verbose=0) print('save model') model.save('sin_model.h5') predictions = model.predict(x) print(np.mean(np.square(predictions - y))) preds = model.predict(x) plt.plot(x, y, 'b', x, preds, 'r--') plt.show()
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add( Conv2D(32, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(64, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(128, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(128, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(256, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(256, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(512, activation='relu')) self.classifier.add(BatchNormalization()) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Nadam(lr=learn_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') for i in range(epoch): self.classifier.fit( X_train, y_train, batch_size=batch_size, epochs=1, verbose=2, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) print('fbeta score: %s' % fbeta_score) return [history.train_losses, history.val_losses, fbeta_score] def save_weights(self, weight_file_path): self.classifier.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.classifier.load_weights(weight_file_path) def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def map_predictions(self, predictions, labels_map, thresholds): """ Return the predictions mapped to their labels :param predictions: the predictions from the predict() method :param labels_map: the map :param thresholds: The threshold of each class to be considered as existing or not existing :return: the predictions list mapped to their labels """ predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
#通过 input_shape 指定,不需要样本大小,见例子 #通过 batch_input_shape 指定,需要指定样本大小 #2D Layer 通过input_dim指定各维大小,3D Layer通过input_dim 和 input_length 两个参数指定 #Keras LSTM层的工作方式是通过接收3维(N,W,F)的数字阵列,其中N是训练序列的数目,W是序列长度,F是每个序列的特征数目。 TIME_STEPS = 30 INPUT_SIZE = 1 #model.add(LSTM(1,batch_input_shape=(None, TIME_STEPS, INPUT_SIZE))) model.add(LSTM(1,input_shape=(TIME_STEPS,INPUT_SIZE))) model.add(Dropout(0.2)) model.add(Dense(1)) model.add(Activation("linear")) start = time.time() model.compile(loss="mse", optimizer="rmsprop") print("Compilation Time : ", time.time() - start) tbCallBack.set_model(model) model.fit(train_x,train_y,batch_size=32,epochs=5) score = model.evaluate(train_x, train_y, batch_size=32) #model.save_weights('w1.hdf5') predicted = model.predict(test_x,batch_size=32,verbose=2) predicted = np.reshape(predicted, (predicted.size,)) print(predicted) print(score) plot_results(predicted,test_y) ''' model.add(Dense(128,activation='relu',input_shape=[None,5],input_dim=2)) model.add(Dense(3,activation='softmax')) model.compile(loss='categorical_crossentropy',
def main_fun(args, ctx): import numpy import os import tensorflow as tf import tensorflow.contrib.keras as keras from tensorflow.contrib.keras.api.keras import backend as K from tensorflow.contrib.keras.api.keras.models import Sequential, load_model, save_model from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout from tensorflow.contrib.keras.api.keras.optimizers import RMSprop from tensorflow.contrib.keras.python.keras.callbacks import LambdaCallback, TensorBoard from tensorflow.python.saved_model import builder as saved_model_builder from tensorflow.python.saved_model import tag_constants from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def from tensorflowonspark import TFNode cluster, server = TFNode.start_cluster_server(ctx) if ctx.job_name == "ps": server.join() elif ctx.job_name == "worker": def generate_rdd_data(tf_feed, batch_size): print("generate_rdd_data invoked") while True: batch = tf_feed.next_batch(batch_size) imgs = [] lbls = [] for item in batch: imgs.append(item[0]) lbls.append(item[1]) images = numpy.array(imgs).astype('float32') / 255 labels = numpy.array(lbls).astype('float32') yield (images, labels) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % ctx.task_index, cluster=cluster)): IMAGE_PIXELS = 28 batch_size = 100 num_classes = 10 # the data, shuffled and split between train and test sets if args.input_mode == 'tf': from tensorflow.contrib.keras.api.keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) else: # args.mode == 'spark' x_train = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS], name="x_train") y_train = tf.placeholder(tf.float32, [None, 10], name="y_train") model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784, ))) model.add(Dropout(0.2)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(10, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) saver = tf.train.Saver() with tf.Session(server.target) as sess: K.set_session(sess) def save_checkpoint(epoch, logs=None): if epoch == 1: tf.train.write_graph(sess.graph.as_graph_def(), args.model_dir, 'graph.pbtxt') saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=epoch * args.steps_per_epoch) ckpt_callback = LambdaCallback(on_epoch_end=save_checkpoint) tb_callback = TensorBoard(log_dir=args.model_dir, histogram_freq=1, write_graph=True, write_images=True) # add callbacks to save model checkpoint and tensorboard events (on worker:0 only) callbacks = [ckpt_callback, tb_callback ] if ctx.task_index == 0 else None if args.input_mode == 'tf': # train & validate on in-memory data history = model.fit(x_train, y_train, batch_size=batch_size, epochs=args.epochs, verbose=1, validation_data=(x_test, y_test), callbacks=callbacks) else: # args.input_mode == 'spark': # train on data read from a generator which is producing data from a Spark RDD tf_feed = TFNode.DataFeed(ctx.mgr) history = model.fit_generator( generator=generate_rdd_data(tf_feed, batch_size), steps_per_epoch=args.steps_per_epoch, epochs=args.epochs, verbose=1, callbacks=callbacks) if args.export_dir and ctx.job_name == 'worker' and ctx.task_index == 0: # save a local Keras model, so we can reload it with an inferencing learning_phase save_model(model, "tmp_model") # reload the model K.set_learning_phase(False) new_model = load_model("tmp_model") # export a saved_model for inferencing builder = saved_model_builder.SavedModelBuilder( args.export_dir) signature = predict_signature_def( inputs={'images': new_model.input}, outputs={'scores': new_model.output}) builder.add_meta_graph_and_variables( sess=sess, tags=[tag_constants.SERVING], signature_def_map={'predict': signature}, clear_devices=True) builder.save() if args.input_mode == 'spark': tf_feed.terminate()
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add(BatchNormalization(input_shape=(img_size, img_channels))) self.classifier.add(Conv2D(32, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(64, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(128, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(128, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(256, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(256, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(512, activation='relu')) self.classifier.add(BatchNormalization()) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit(X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def save_weights(self, weight_file_path): self.classifier.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.classifier.load_weights(weight_file_path) def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def map_predictions(self, predictions, labels_map, thresholds): """ Return the predictions mapped to their labels :param predictions: the predictions from the predict() method :param labels_map: the map :param thresholds: The threshold of each class to be considered as existing or not existing :return: the predictions list mapped to their labels """ predictions_labels = [] for prediction in predictions: labels = [labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i]] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
def fizzbuzz(i): if i % 15 == 0: return np.array([0, 0, 0, 1]) elif i % 5 == 0: return np.array([0, 0, 1, 0]) elif i % 3 == 0: return np.array([0, 1, 0, 0]) else: return np.array([1, 0, 0, 0]) def bin(i, num_digits): return np.array([i >> d & 1 for d in range(num_digits)]) NUM_DIGITS = 7 trX = np.array([bin(i, NUM_DIGITS) for i in range(1, 101)]) trY = np.array([fizzbuzz(i) for i in range(1, 101)]) model = Sequential() model.add(Dense(64, input_dim=7)) model.add(Activation('tanh')) model.add(Dense(4, input_dim=64)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(trX, trY, epochs=3600, batch_size=64) model.save('fizzbuzz_model.h5') converter = lite.TFLiteConverter.from_keras_model_file('fizzbuzz_model.h5') tflite_model = converter.convert() open('fizzbuzz_model.tflite', 'wb').write(tflite_model)
# Adding the second hidden layer classifier.add(Dense(units=2, kernel_initializer='uniform', activation='relu')) # Adding the output layer classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) # Compiling the ANN #sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) #classifier.compile(loss='mean_squared_error', optimizer=sgd) classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size=5, epochs=10) # Part 3 - Making predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print(cm) backend.clear_session() #predict a single dataset/single observation
from tensorflow.contrib.keras.api.keras.models import Sequential from tensorflow.contrib.keras.api.keras.layers import Dense, Activation from keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau import tensorflow as tf # input = K.placeholder(shape=(10, 32)) # input2 = tf.placeholder(tf.float32, shape=(10, 32)) # print(input) # print(input2) # model = Sequential() # model.add(Dense(units=16, input_dim=784)) # model.add(Activation('softmax')) model = Sequential( [Dense(units=64, input_shape=(784, ), activation='softmax')]) model.compile( loss='categorical_crossentropy', # optimizer='sgd', optimizer=keras.optimizers.SGD(lr=0.02, momentum=0.8, nesterov=True), metrics=['accuracy']) early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto') model.fit()
import numpy as np from tensorflow.contrib.keras.api.keras.models import Sequential, model_from_json from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout, Activation from tensorflow.contrib.keras.api.keras.optimizers import SGD, Adam import tensorflow.contrib.lite as lite data = np.loadtxt('sin.csv', delimiter=',', unpack=True) model = Sequential() model.add(Dense(30, input_shape=(1, ))) model.add(Activation('sigmoid')) model.add(Dense(40)) model.add(Activation('sigmoid')) model.add(Dense(1)) sgd = Adam(lr=0.1) model.compile(loss='mean_squared_error', optimizer=sgd) model.fit(data[0], data[1], epochs=1000, batch_size=20, verbose=0) model.save('sin_model.h5') converter = lite.TFLiteConverter.from_keras_model_file('sin_model.h5') tflite_model = converter.convert() open('sin_model.tflite', 'wb').write(tflite_model)
# Part 2 - Building the RNN # Initialising the RNN regressor = Sequential() # Adding the input layer and the LSTM layer regressor.add(LSTM(units=4, activation='sigmoid', input_shape=(None, 1))) # Adding the output layer regressor.add(Dense(units=1)) # Compiling the RNN regressor.compile(optimizer='adam', loss='mean_squared_error') # Fitting the RNN to the Training set regressor.fit(X_train, y_train, batch_size=32, epochs=200) # Part 3 - Making the predictions and visualising the results script_dir = os.path.dirname(__file__) test_set_path = os.path.join(script_dir, '../dataset/Google_Stock_Price_Test.csv') # Getting the real stock price of 2017 test_set = pd.read_csv(test_set_path) real_stock_price = test_set.iloc[:, 1:2].values # Getting the predicted stock price of 2017 inputs = real_stock_price inputs = sc.transform(inputs) inputs = np.reshape(inputs, (20, 1, 1)) predicted_stock_price = regressor.predict(inputs)
# Define model input_shape = (mnist.img_rows, mnist.img_cols, 1) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(mnist.n_classes, activation='softmax')) # Fit model model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # Evaluate model score = model.evaluate(x_test, y_test, verbose=0) print('Test accuracy: {:0.2f}%'.format(score[1] * 100)) # Store model model.save('mnist_tfkeras.h5')
import numpy as np from tensorflow.contrib.keras.api.keras.models import Sequential, model_from_json from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout, Activation from tensorflow.contrib.keras.api.keras.optimizers import SGD import tensorflow.contrib.lite as lite model = Sequential() model.add(Dense(8, input_dim = 2)) model.add(Activation('tanh')) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss = 'binary_crossentropy', optimizer = SGD(lr = 0.1)) model.fit( np.array([[0, 0], [0, 1.0], [1.0, 0], [1.0, 1.0]]), np.array([[0.0], [1.0], [1.0], [0.0]]), batch_size = 1, epochs = 300) model.save('xor_model.h5') converter = lite.TFLiteConverter.from_keras_model_file("xor_model.h5") tflite_model = converter.convert() open("xor_model.tflite", "wb").write(tflite_model)
np.random.seed(1337) from jellyfish_eye_k.data_set import load_data from tensorflow.contrib.keras.api.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D from tensorflow.contrib.keras.api.keras.models import Sequential, save_model (x_train, y_train), (x_validation, y_validation), (x_test, y_test) = load_data() model = Sequential(( Conv2D(32, 5, activation='relu', input_shape=x_train[0].shape), Conv2D(64, 5, activation='relu'), MaxPooling2D(), Dropout(0.5), Flatten(), Dense(512, activation='relu'), Dense(256, activation='relu'), Dropout(0.5), Dense(3, activation='softmax'))) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=20, epochs=5, verbose=1, validation_data=(x_validation, y_validation)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss: {0}'.format(score[0])) print('Test accuracy: {0}'.format(score[1])) save_model(model, './jellyfish_eye.h5') del model
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add(BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(16, (2, 2), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(256, activation='relu')) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(512, activation='relu')) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def train_model(self, x_train, y_train, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=validation_split_size) self.classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) self.classifier.fit(X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def map_predictions(self, predictions, labels_map, thresholds): """ Return the predictions mapped to their labels :param predictions: the predictions from the predict() method :param labels_map: the map :param thresholds: The threshold of each class to be considered as existing or not existing :return: the predictions list mapped to their labels """ predictions_labels = [] for prediction in predictions: labels = [labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i]] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
def train(data, file_name, filters, kernels, num_epochs=50, batch_size=128, train_temp=1, init=None, activation=tf.nn.relu, bn=False): """ Train a n-layer CNN for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() model.add( Conv2D(filters[0], kernels[0], input_shape=data.train_data.shape[1:])) if bn: model.add(BatchNormalization()) model.add(Lambda(activation)) for f, k in zip(filters[1:], kernels[1:]): model.add(Conv2D(f, k)) if bn: model.add(BatchNormalization()) # ReLU activation model.add(Lambda(activation)) # the output layer, with 10 classes model.add(Flatten()) model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) # initiate the Adam optimizer sgd = Adam() # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format( len(filters) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return {'model': model, 'history': history}
# Define model input_shape = (mnist.img_rows, mnist.img_cols, 1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(mnist.n_classes, activation='softmax')) # Fit model model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # Evaluate model score = model.evaluate(x_test, y_test, verbose=0) print('Test accuracy: {:0.2f}%'.format(score[1] * 100)) # Store model model.save('mnist_tfkeras.h5')
import tensorflow.contrib.keras.api.keras as keras from tensorflow.contrib.keras.api.keras.models import Sequential from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout, Activation from tensorflow.contrib.keras.api.keras.optimizers import SGD import numpy as np x_train = np.random.random((1000, 20)) y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10) x_test = np.random.random((100, 20)) y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10) model = Sequential() model.add(Dense(64, activation='relu', input_dim=20)) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=20, batch_size=128) score = model.evaluate(x_test, y_test, batch_size=128) y_pred = model.predict(np.random.random((1, 20)), batch_size=128) print(y_pred)
def onBeginTraining(self): ue.log("starting mnist keras cnn training") model_file_name = "mnistKerasCNN" model_directory = ue.get_content_dir() + "/Scripts/" model_sess_path = model_directory + model_file_name + ".tfsess" model_json_path = model_directory + model_file_name + ".json" my_file = Path(model_json_path) #reset the session each time we get training calls K.clear_session() #let's train batch_size = 128 num_classes = 10 epochs = 8 # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 ue.log('x_train shape:' + str(x_train.shape)) ue.log(str(x_train.shape[0]) + 'train samples') ue.log(str(x_test.shape[0]) + 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) # model.add(Dropout(0.2)) # model.add(Flatten()) # model.add(Dense(512, activation='relu')) # model.add(Dropout(0.2)) # model.add(Dense(num_classes, activation='softmax')) #model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test), callbacks=[self.stopcallback]) score = model.evaluate(x_test, y_test, verbose=0) ue.log("mnist keras cnn training complete.") ue.log('Test loss:' + str(score[0])) ue.log('Test accuracy:' + str(score[1])) self.session = K.get_session() self.model = model stored = {'model':model, 'session': self.session} #run a test evaluation ue.log(x_test.shape) result_test = model.predict(np.reshape(x_test[500],(1,28,28,1))) ue.log(result_test) #flush the architecture model data to disk #with open(model_json_path, "w") as json_file: # json_file.write(model.to_json()) #flush the whole model and weights to disk #saver = tf.train.Saver() #save_path = saver.save(K.get_session(), model_sess_path) #model.save(model_path) return stored
def train_cnn_7layer(data, file_name, params, num_epochs=50, batch_size=256, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9, activation="relu", optimizer_name="sgd"): """ Train a 7-layer cnn network for MNIST and CIFAR (same as the cnn model in Clever) mnist: 32 32 64 64 200 200 cifar: 64 64 128 128 256 256 """ # create a Keras sequential model model = Sequential() print("training data shape = {}".format(data.train_data.shape)) # define model structure model.add(Conv2D(params[0], (3, 3), input_shape=data.train_data.shape[1:])) model.add(Activation(activation)) model.add(Conv2D(params[1], (3, 3))) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(params[2], (3, 3))) model.add(Activation(activation)) model.add(Conv2D(params[3], (3, 3))) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(params[4])) model.add(Activation(activation)) model.add(Dropout(0.5)) model.add(Dense(params[5])) model.add(Activation(activation)) model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) if optimizer_name == "sgd": # initiate the SGD optimizer with given hyper parameters optimizer = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) elif optimizer_name == "adam": optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=optimizer, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format( len(params) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) print('model saved to ', file_name) return {'model': model, 'history': history}
regressor.add(LSTM(units=3, return_sequences=True)) # Adding a third LSTM layer regressor.add(LSTM(units=3, return_sequences=True)) # Adding a fourth LSTM layer regressor.add(LSTM(units=3)) # Adding the output layer regressor.add(Dense(units=1)) # Compiling the RNN regressor.compile(optimizer='rmsprop', loss='mean_squared_error') # Fitting the RNN to the Training set regressor.fit(X_train, y_train, epochs=100, batch_size=32) # Part 3 - Making the predictions and visualising the results # Getting the real stock price for February 1st 2012 - January 31st 2017 path = os.path.join(script_dir, '../dataset/Google_Stock_Price_Test.csv') dataset_test = pd.read_csv(path) test_set = dataset_test.iloc[:, 1:2].values real_stock_price = np.concatenate((training_set[0:1258], test_set), axis=0) # Getting the predicted stock price of 2017 scaled_real_stock_price = sc.fit_transform(real_stock_price) inputs = [] for i in range(1258, 1278): inputs.append(scaled_real_stock_price[i - 60:i, 0]) inputs = np.array(inputs)
def model_inference_critisism(model_name, Bayesian, seq_array1, seq_array2, label_array1, label_array2, sequence_length, N, D, H=100, H1=100, H2=50): less_50 = label_array2 <= 50 if not Bayesian: model = Sequential() if model_name == 'Fully Connected Layer': model.add(Dense(H, input_shape=[D], activation='tanh')) elif model_name == 'Simple RNN': model.add(SimpleRNN(input_shape=(sequence_length, D), units=H)) elif model_name == 'LSTM': model.add(LSTM(input_shape=(sequence_length, D), units=H)) elif model_name == 'GRU': model.add(GRU(input_shape=(sequence_length, D), units=H)) model.add(Dense(units=1)) elif model_name == 'Two Layer Simple RNN': model.add( SimpleRNN(input_shape=(sequence_length, D), units=H1, return_sequences=True)) model.add(SimpleRNN(units=H2)) elif model_name == 'Two Layer LSTM': model.add( LSTM(input_shape=(sequence_length, D), units=H1, return_sequences=True)) model.add(LSTM(units=H2)) elif model_name == 'Two Layer GRU': model.add( GRU(input_shape=(sequence_length, D), units=H1, return_sequences=True)) model.add(GRU(units=H2)) else: raise Exception('Please specify a valid model!') model.add(Dense(units=1)) if model_name == 'Fully Connected Layer': # inference nadam = Nadam(lr=0.05) model.compile(loss='mean_squared_error', optimizer=nadam, metrics=['mean_squared_error']) model.fit(seq_array1[:, sequence_length - 1, :], label_array1, epochs=300, batch_size=200, validation_data=(seq_array2[:, sequence_length - 1, :], label_array2), verbose=0) y_pred = np.squeeze( model.predict(seq_array2[:, sequence_length - 1, :])) else: model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['mean_squared_error']) model.fit(seq_array1, label_array1, epochs=300, batch_size=200, validation_data=(seq_array2, label_array2), verbose=0) y_pred = np.squeeze(model.predict(seq_array2)) # Critisim # Histogram sns.distplot(y_pred) plt.title('Histogram of RUL, Frequentist {}'.format(model_name)) plt.xlabel('RUL') plt.show() # RMSE print('Validation RMSE: {}'.format( np.sqrt(mean_squared_error(label_array2, y_pred)))) print('Validation RMSE for RUL under 50: {}'.format( np.sqrt(mean_squared_error(label_array2[less_50], y_pred[less_50])))) # Prediction time series pd.DataFrame([label_array2, y_pred]).transpose().rename(columns={ 0: 'True', 1: 'Pred' })[-1500:].plot() plt.title('Prediction of RUL, Frequentist {}'.format(model_name)) plt.xlabel('RUL') plt.show() elif Bayesian: if model_name == 'Fully Connected Layer': W_0 = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) W_1 = Normal(loc=tf.zeros([H, 1]), scale=tf.ones([H, 1])) b_0 = Normal(loc=tf.zeros(H), scale=tf.ones(H)) b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1)) x = tf.placeholder(tf.float32, [N, D]) y = Normal(loc=neural_network_with_2_layers(x, W_0, W_1, b_0, b_1), scale=tf.ones(N) * 0.1) # constant noise # BACKWARD MODEL A q_W_0 = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_W_1 = Normal(loc=tf.Variable(tf.random_normal([H, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, 1])))) q_b_0 = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_b_1 = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ W_0: q_W_0, b_0: q_b_0, W_1: q_W_1, b_1: q_b_1 }, data={ x: seq_array1[:, sequence_length - 1, :], y: label_array1 }) inference.run(n_samples=5, n_iter=25000) xp = tf.placeholder(tf.float32, seq_array2[:, sequence_length - 1, :].shape) y_preds = [ sess.run( neural_network_with_2_layers(xp, q_W_0, q_W_1, q_b_0, q_b_1), {xp: seq_array2[:, sequence_length - 1, :]}) for _ in range(50) ] elif model_name == 'Simple RNN': Wh = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Wx = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wy = Normal(loc=tf.zeros([H, 1]), scale=tf.ones([H, 1])) bh = Normal(loc=tf.zeros(H), scale=tf.ones(H)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) # X = tf.placeholder(tf.float32,[sequence_length,N,D]) y = Normal(loc=rnn_layer(X, Wh, Wx, bh, Wy, by, H), scale=1.) # BACKWARD MODEL A q_Wh = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Wx = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, 1])))) q_bh = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wh: q_Wh, bh: q_bh, Wx: q_Wx, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run(rnn_layer(Xp, q_Wh, q_Wx, q_bh, q_Wy, q_by, H), {Xp: seq_array2}) for _ in range(50) ] elif model_name == 'LSTM': Wf = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Uf = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wi = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Ui = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wo = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Uo = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wc = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Uc = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wy = Normal(loc=tf.zeros([H, 1]), scale=tf.ones([H, 1])) bf = Normal(loc=tf.zeros(H), scale=tf.ones(H)) bi = Normal(loc=tf.zeros(H), scale=tf.ones(H)) bo = Normal(loc=tf.zeros(H), scale=tf.ones(H)) bc = Normal(loc=tf.zeros(H), scale=tf.ones(H)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) y = Normal(loc=LSTM_layer(X, Wf, Uf, Wi, Ui, Wo, Uo, Wc, Uc, bf, bi, bo, bc, Wy, by, H), scale=1.) # BACKWARD MODEL A q_Wf = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Uf = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wi = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Ui = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wo = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Uo = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wc = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Uc = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, 1])))) q_bf = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_bi = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_bo = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_bc = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wf: q_Wf, Uf: q_Uf, Wi: q_Wi, Ui: q_Ui, Wo: q_Wo, Uo: q_Uo, Wc: q_Wc, Uc: q_Uc, bf: q_bf, bi: q_bi, bo: q_bo, bc: q_bc, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run( LSTM_layer(Xp, q_Wf, q_Uf, q_Wi, q_Ui, q_Wo, q_Uo, q_Wc, q_Uc, q_bf, q_bi, q_bo, q_bc, q_Wy, q_by, H), {Xp: seq_array2}) for _ in range(50) ] elif model_name == 'GRU': Wz = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Uz = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wr = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Ur = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wh = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H])) Uh = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H])) Wy = Normal(loc=tf.zeros([H, 1]), scale=tf.ones([H, 1])) bz = Normal(loc=tf.zeros(H), scale=tf.ones(H)) br = Normal(loc=tf.zeros(H), scale=tf.ones(H)) bh = Normal(loc=tf.zeros(H), scale=tf.ones(H)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) y = Normal(loc=GRU_layer(X, Wz, Uz, Wr, Ur, Wh, Uh, bz, br, bh, Wy, by, H), scale=1.) # BACKWARD MODEL A q_Wz = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Uz = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wr = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Ur = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wh = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, H])))) q_Uh = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H, 1])))) q_bz = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_br = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_bh = Normal(loc=tf.Variable(tf.random_normal([H])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wz: q_Wz, Uz: q_Uz, Wr: q_Wr, Ur: q_Ur, Wh: q_Wh, Uh: q_Uh, bz: q_bz, bz: q_bz, bh: q_bh, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run( GRU_layer(Xp, q_Wz, q_Uz, q_Wr, q_Ur, q_Wh, q_Uh, q_bz, q_br, q_bh, q_Wy, q_by, H), {Xp: seq_array2}) for _ in range(50) ] elif model_name == 'Two Layer Simple RNN': Wh1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Wx1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wh2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Wx2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wy = Normal(loc=tf.zeros([H2, 1]), scale=tf.ones([H2, 1])) bh1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bh2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) y = Normal(loc=two_rnn_layer(X, Wh1, Wx1, bh1, Wh2, Wx2, bh2, Wy, by, H1, H2), scale=1.) # BACKWARD MODEL A q_Wh1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Wx1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wh2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Wx2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H2, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, 1])))) q_bh1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bh2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wh1: q_Wh1, bh1: q_bh1, Wh2: q_Wh2, bh2: q_bh2, Wx1: q_Wx1, Wx2: q_Wx2, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run( two_rnn_layer(Xp, q_Wh1, q_Wx1, q_bh1, q_Wh2, q_Wx2, q_bh2, q_Wy, q_by, H1, H2), {Xp: seq_array2}) for _ in range(50) ] elif model_name == 'Two Layer LSTM': Wf1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Uf1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wi1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Ui1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wo1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Uo1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wc1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Uc1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wf2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Uf2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wi2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Ui2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wo2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Uo2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wc2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Uc2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wy = Normal(loc=tf.zeros([H2, 1]), scale=tf.ones([H2, 1])) bf1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bi1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bo1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bc1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bf2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) bi2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) bo2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) bc2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) y = Normal(loc=two_LSTM_layer(X, Wf1, Uf1, Wi1, Ui1, Wo1, Uo1, Wc1, Uc1, bf1, bi1, bo1, bc1, Wf2, Uf2, Wi2, Ui2, Wo2, Uo2, Wc2, Uc2, bf2, bi2, bo2, bc2, Wy, by, H1, H2), scale=1.) # BACKWARD MODEL A q_Wf1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Uf1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wi1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Ui1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wo1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Uo1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wc1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Uc1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wf2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Uf2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wi2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Ui2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wo2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Uo2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wc2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Uc2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H2, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, 1])))) q_bf1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bi1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bo1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bc1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bf2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_bi2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_bo2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_bc2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wf1: q_Wf1, Uf1: q_Uf1, Wi1: q_Wi1, Ui1: q_Ui1, Wo1: q_Wo1, Uo1: q_Uo1, Wc1: q_Wc1, Uc1: q_Uc1, Wf2: q_Wf2, Uf2: q_Uf2, Wi2: q_Wi2, Ui2: q_Ui2, Wo2: q_Wo2, Uo2: q_Uo2, Wc2: q_Wc2, Uc2: q_Uc2, bf1: q_bf1, bi1: q_bi1, bo1: q_bo1, bc1: q_bc1, bf2: q_bf2, bi2: q_bi2, bo2: q_bo2, bc2: q_bc2, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run( two_LSTM_layer(Xp, q_Wf1, q_Uf1, q_Wi1, q_Ui1, q_Wo1, q_Uo1, q_Wc1, q_Uc1, q_bf1, q_bi1, q_bo1, q_bc1, q_Wf2, q_Uf2, q_Wi2, q_Ui2, q_Wo2, q_Uo2, q_Wc2, q_Uc2, q_bf2, q_bi2, q_bo2, q_bc2, q_Wy, q_by, H1, H2), {Xp: seq_array2}) for _ in range(50) ] elif model_name == 'Two Layer GRU': Wz1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Uz1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wr1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Ur1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wh1 = Normal(loc=tf.zeros([H1, H1]), scale=tf.ones([H1, H1])) Uh1 = Normal(loc=tf.zeros([D, H1]), scale=tf.ones([D, H1])) Wz2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Uz2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wr2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Ur2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wh2 = Normal(loc=tf.zeros([H2, H2]), scale=tf.ones([H2, H2])) Uh2 = Normal(loc=tf.zeros([H1, H2]), scale=tf.ones([H1, H2])) Wy = Normal(loc=tf.zeros([H2, 1]), scale=tf.ones([H2, 1])) bz1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) br1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bh1 = Normal(loc=tf.zeros(H1), scale=tf.ones(H1)) bz2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) br2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) bh2 = Normal(loc=tf.zeros(H2), scale=tf.ones(H2)) by = Normal(loc=tf.zeros(1), scale=tf.ones(1)) X = tf.placeholder(tf.float32, [N, sequence_length, D]) y = Normal(loc=two_GRU_layer(X, Wz1, Uz1, Wr1, Ur1, Wh1, Uh1, bz1, br1, bh1, Wz2, Uz2, Wr2, Ur2, Wh2, Uh2, bz2, br2, bh2, Wy, by, H1, H2), scale=1.) # BACKWARD MODEL A q_Wz1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Uz1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wr1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Ur1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wh1 = Normal(loc=tf.Variable(tf.random_normal([H1, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H1])))) q_Uh1 = Normal(loc=tf.Variable(tf.random_normal([D, H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([D, H1])))) q_Wz2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Uz2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wr2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Ur2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wh2 = Normal(loc=tf.Variable(tf.random_normal([H2, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, H2])))) q_Uh2 = Normal(loc=tf.Variable(tf.random_normal([H1, H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1, H2])))) q_Wy = Normal(loc=tf.Variable(tf.random_normal([H2, 1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2, 1])))) q_bz1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_br1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bh1 = Normal(loc=tf.Variable(tf.random_normal([H1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H1])))) q_bz2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_br2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_bh2 = Normal(loc=tf.Variable(tf.random_normal([H2])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([H2])))) q_by = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([1])))) # INFERENCE A # this will take a couple of minutes inference = ed.KLqp(latent_vars={ Wz1: q_Wz1, Uz1: q_Uz1, Wr1: q_Wr1, Ur1: q_Ur1, Wh1: q_Wh1, Uh1: q_Uh1, Wz2: q_Wz2, Uz2: q_Uz2, Wr2: q_Wr2, Ur2: q_Ur2, Wh2: q_Wh2, Uh2: q_Uh2, bz1: q_bz1, bz1: q_bz1, bh1: q_bh1, bz2: q_bz2, bz2: q_bz2, bh2: q_bh2, Wy: q_Wy, by: q_by }, data={ X: seq_array1, y: label_array1 }) inference.run(n_samples=5, n_iter=2500) Xp = tf.placeholder(tf.float32, seq_array2.shape) y_preds = [ sess.run( two_GRU_layer(Xp, q_Wz1, q_Uz1, q_Wr1, q_Ur1, q_Wh1, q_Uh1, q_bz1, q_br1, q_bh1, q_Wz2, q_Uz2, q_Wr2, q_Ur2, q_Wh2, q_Uh2, q_bz2, q_br2, q_bh2, q_Wy, q_by, H1, H2), {Xp: seq_array2}) for _ in range(50) ] else: raise Exception('Please specify a valid model!') # Critisism # Histogram sns.distplot(y_preds[0]) plt.title('Histogram of RUL, Bayesian {}'.format(model_name)) plt.xlabel('RUL') plt.show() # RMSE print('Average Validation RMSE: {}'.format( np.mean([ np.sqrt(mean_squared_error(label_array2, y_pred)) for y_pred in y_preds ]))) print('Average Validation RMSE for RUL under 50: {}'.format( np.mean([ np.sqrt( mean_squared_error(label_array2[less_50], y_pred[less_50])) for y_pred in y_preds ]))) # Prediction time series pd.DataFrame([label_array2, y_preds[0]]).transpose().rename(columns={ 0: 'True', 1: 'Pred' })[-1500:].plot() plt.title('Prediction of RUL, Bayesian {}'.format(model_name)) plt.xlabel('RUL') plt.show() # Posterior prediction distribution 1500 [ plt.plot(y_pred[-1500:], color='black', alpha=0.1) for y_pred in y_preds ] plt.plot(label_array2[-1500:]) plt.title('Distribution of Prediction of RUL, Bayesian {}'.format( model_name)) plt.xlabel('RUL(last 1500 days)') plt.show() # Posterior prediction distribution 150 [ plt.plot(y_pred[-150:], color='black', alpha=0.1) for y_pred in y_preds ] plt.plot(label_array2[-150:]) plt.title('Distribution of Prediction of RUL, Bayesian {}'.format( model_name)) plt.xlabel('RUL(last 150 days)') plt.show() else: raise Exception( 'Please specify a boolean for use Bayesian inference or not!')