def _save_logs(self, hist, y_pred, y_true, duration, lr=True, plot_test_acc=True): hist_df = pd.DataFrame(hist.history) hist_df.to_csv(self.output_directory + 'history.csv', index=False) df_metrics = self._calculate_metrics(y_true, y_pred, duration) df_metrics.to_csv(self.output_directory + 'df_metrics.csv', index=False) if plot_test_acc: print('using val_loss to find best metrics') index_best_model = hist_df['val_loss'].idxmin() else: print('using loss to find best metrics') index_best_model = hist_df['loss'].idxmin() row_best_model = hist_df.loc[index_best_model] df_best_model = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0], columns=[ 'best_model_train_loss', 'best_model_val_loss', 'best_model_learning_rate', 'best_model_nb_epoch' ]) df_best_model['best_model_train_loss'] = row_best_model['loss'] if plot_test_acc: df_best_model['best_model_val_loss'] = row_best_model['val_loss'] if lr == True: df_best_model['best_model_learning_rate'] = row_best_model['lr'] df_best_model['best_model_nb_epoch'] = index_best_model df_best_model.to_csv(self.output_directory + 'df_best_model.csv', index=False) if plot_test_acc: # plot losses plot_epochs_metric(hist, self.output_directory + 'epochs_loss.png') plot_epochs_metric(hist, self.output_directory + 'epochs_DA.png', metric='tf_pmse_DA') plot_epochs_metric(hist, self.output_directory + 'epochs_5HT.png', metric='tf_pmse_5HT') plot_epochs_metric(hist, self.output_directory + 'epochs_pH.png', metric='tf_pmse_pH') plot_epochs_metric(hist, self.output_directory + 'epochs_NE.png', metric='tf_pmse_NE') return df_metrics
def fit(self, x_train, y_train, x_val, y_val,y_true,batch_size=16,nb_epochs=500): if not tf.test.is_gpu_available: print('error') exit() if len(y_true.shape)>1: y_true = np.argmax(y_true,axis=1) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val,y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') #model = keras.models.load_model(self.output_directory+'best_model.hdf5') #y_pred = model.predict(x_val) # convert the predicted from binary to integer #y_pred = np.argmax(y_pred , axis=1) plot_epochs_metric(hist,'loss') keras.backend.clear_session() return hist
def fit(self, x_train, y_train, x_val, y_val, y_true, batch_size, nb_epochs): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') plot_epochs_metric(hist, 'loss') #y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, # return_df_metrics=False) # save predictions #np.save(self.output_directory + 'y_pred.npy', y_pred) # convert the predicted from binary to integer #y_pred = np.argmax(y_pred, axis=1) keras.backend.clear_session() return hist
class Regression_INCEPTION: def __init__(self, output_directory, input_shape, output_shape, verbose=False, build=True, batch_size=64, nb_filters=32, use_residual=True, use_bottleneck=True, depth=6, kernel_size=41, nb_epochs=100, metrics=None, pre_model=None, normalize_y=(lambda x: x, lambda x: x): self.output_directory = output_directory self.nb_filters = nb_filters self.use_residual = use_residual self.use_bottleneck = use_bottleneck self.depth = depth self.kernel_size = kernel_size - 1 self.callbacks = None self.batch_size = batch_size self.bottleneck_size = 32 self.nb_epochs = nb_epochs self.metrics = metrics self.pre_model = pre_model self.normalize_data = normalize_y[0] self.revert_data = normalize_y[1] if build == True: self.model = self.build_model(input_shape, output_shape, pre_model=pre_model) # if (verbose == True): # self.model.summary() self.verbose = verbose self.model.save(self.output_directory + 'model_init.hdf5') def _calculate_metrics(self, y_true, y_pred, duration): res = pd.DataFrame(data=np.zeros((1, 5), dtype=np.float), index=[0], columns=['rmse_DA', 'rmse_5HT', 'rmse_pH', 'rmse_NE', 'duration']) y_pred = np.apply_along_axis(self.revert_data, axis=1, arr=y_pred) y_true = np.apply_along_axis(self.revert_data, axis=1, arr=y_true) rmse4 = rmse(y_true, y_pred) res['rmse_DA'] = rmse4[0] res['rmse_5HT'] = rmse4[1] res['rmse_pH'] = rmse4[2] res['rmse_NE'] = rmse4[3] res['duration'] = duration return res def _save_logs(self, hist, y_pred, y_true, duration, lr=True, plot_test_acc=True): hist_df = pd.DataFrame(hist.history) hist_df.to_csv(self.output_directory + 'history.csv', index=False) df_metrics = self._calculate_metrics(y_true, y_pred, duration) df_metrics.to_csv(self.output_directory + 'df_metrics.csv', index=False) if plot_test_acc: print('using val_loss to find best metrics') index_best_model = hist_df['val_loss'].idxmin() else: print('using loss to find best metrics') index_best_model = hist_df['loss'].idxmin() row_best_model = hist_df.loc[index_best_model] df_best_model = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0], columns=['best_model_train_loss', 'best_model_val_loss', 'best_model_learning_rate', 'best_model_nb_epoch']) df_best_model['best_model_train_loss'] = row_best_model['loss'] if plot_test_acc: df_best_model['best_model_val_loss'] = row_best_model['val_loss'] if lr == True: df_best_model['best_model_learning_rate'] = row_best_model['lr'] df_best_model['best_model_nb_epoch'] = index_best_model df_best_model.to_csv(self.output_directory + 'df_best_model.csv', index=False) if plot_test_acc: # plot losses plot_epochs_metric(hist, self.output_directory + 'epochs_loss.png') plot_epochs_metric(hist, self.output_directory + 'epochs_DA.png', metric='tf_pmse_DA') plot_epochs_metric(hist, self.output_directory + 'epochs_5HT.png', metric='tf_pmse_5HT') plot_epochs_metric(hist, self.output_directory + 'epochs_pH.png', metric='tf_pmse_pH') plot_epochs_metric(hist, self.output_directory + 'epochs_NE.png', metric='tf_pmse_NE') return df_metrics def _inception_module(self, input_tensor, stride=1, activation='linear'): if self.use_bottleneck and int(input_tensor.shape[-1]) > 1: input_inception = keras.layers.Conv1D(filters=self.bottleneck_size, kernel_size=1, padding='same', activation=activation, use_bias=False)(input_tensor) else: input_inception = input_tensor # kernel_size_s = [3, 5, 8, 11, 17] kernel_size_s = [self.kernel_size // (2 ** i) for i in range(3)] conv_list = [] for i in range(len(kernel_size_s)): conv_list.append(keras.layers.Conv1D(filters=self.nb_filters, kernel_size=kernel_size_s[i], strides=stride, padding='same', activation=activation, use_bias=False)( input_inception)) max_pool_1 = keras.layers.MaxPool1D(pool_size=3, strides=stride, padding='same')(input_tensor) conv_6 = keras.layers.Conv1D(filters=self.nb_filters, kernel_size=1, padding='same', activation=activation, use_bias=False)(max_pool_1) conv_list.append(conv_6) x = keras.layers.Concatenate(axis=2)(conv_list) x = keras.layers.BatchNormalization()(x) x = keras.layers.Activation(activation='relu')(x) return x def _shortcut_layer(self, input_tensor, out_tensor): shortcut_y = keras.layers.Conv1D(filters=int(out_tensor.shape[-1]), kernel_size=1, padding='same', use_bias=False)(input_tensor) shortcut_y = keras.layers.normalization.BatchNormalization()(shortcut_y) x = keras.layers.Add()([shortcut_y, out_tensor]) x = keras.layers.Activation('relu')(x) return x def build_model(self, input_shape, output_shape, pre_model=None): mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): input_layer = keras.layers.Input(input_shape) x = input_layer input_res = input_layer for d in range(self.depth): x = self._inception_module(x) if self.use_residual and d % 3 == 2: x = self._shortcut_layer(input_res, x) input_res = x # print('') # print('NO GAP LAYER!!!') # print('') # gap_layer = x gap_layer = keras.layers.GlobalAveragePooling1D()(x) # output_layer = keras.layers.Dense(output_shape, activation='relu')(gap_layer) output_layer = keras.layers.Dense(output_shape, activation='softplus')(gap_layer) model = keras.models.Model(inputs=input_layer, outputs=output_layer) if not pre_model is None: print('loading previous weights (L-1 layers)...') for i in range(len(model.layers)-1): model.layers[i].set_weights(pre_model.layers[i].get_weights()) else: print('starting model from scratch...') # model.compile(loss='mse', optimizer=keras.optimizers.Adam(), metrics=[]) # model.compile(loss='mse', optimizer=keras.optimizers.Adam(), metrics=[tf_pmse]) if self.metrics is None: metrics = [] else: metrics = self.metrics # print('Compiling with Adadelta and metrics: ', [m.__name__ for m in metrics]) # model.compile(loss='mse', optimizer=keras.optimizers.Adadelta(), metrics=metrics) print('Compiling with Adam and metrics: ', [m.__name__ for m in metrics]) model.compile(loss='mse', optimizer=keras.optimizers.Adam(), metrics=metrics) # model.compile(loss='mse', optimizer=keras.optimizers.Adam(), metrics=['root_mean_squared_error']) reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001) file_path = self.output_directory + 'best_model.hdf5' model_checkpoint_val = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='val_loss', save_best_only=True) file_path = self.output_directory + 'best_train_model.hdf5' model_checkpoint_train = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss', save_best_only=True) self.callbacks = [reduce_lr, model_checkpoint_train, model_checkpoint_val] return model def fit(self, x_train, y_train, x_val, y_val, plot_test_acc=False): if len(keras.backend.tensorflow_backend._get_available_gpus()) == 0: print('error no gpu') exit() # x_val and y_val are only used to monitor the test loss and NOT for training if self.batch_size is None: mini_batch_size = int(min(x_train.shape[0] / 20, 16)) else: mini_batch_size = self.batch_size print(f'mini batch size: {mini_batch_size}') start_time = time.time() print(f'projecting y_train and y_val with {self.normalize_data.__name__}') y_train = np.apply_along_axis(self.normalize_data, axis=1, arr=y_train) y_val = np.apply_along_axis(self.normalize_data, axis=1, arr=y_val) if plot_test_acc: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) else: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') print('predicting validation set...', end = '') y_pred = self.predict(x_val, y_val, x_train, y_train, return_df_metrics=False) print(' done.') # save predictions np.save(self.output_directory + 'y_pred.npy', np.apply_along_axis(self.revert_data, axis=1, arr=y_pred)) np.save(self.output_directory + 'y_true.npy', np.apply_along_axis(self.revert_data, axis=1, arr=y_true)) df_metrics = self._save_logs(hist, y_pred, y_val, duration, plot_test_acc=plot_test_acc) keras.backend.clear_session() return df_metrics def predict(self, x_test, y_test, x_train, y_train, return_df_metrics=True): start_time = time.time() model = self.get_best_model() y_pred = model.predict(x_test, batch_size=self.batch_size) if return_df_metrics: df_metrics = self._calculate_metrics(y_test, y_pred, 0.0) return df_metrics else: test_duration = time.time() - start_time save_test_duration(self.output_directory + 'test_duration.csv', test_duration) return y_pred def get_best_model(self): model_path = self.output_directory + 'best_model.hdf5' # "tf_pmse_DA": tf_pmse_DA, "tf_pmse_5HT": tf_pmse_5HT, "tf_pmse_pH": tf_pmse_pH, "tf_pmse_NE": tf_pmse_NE custom_objects = {} if not self.metrics is None: for metric in self.metrics: custom_objects[metric.__name__] = metric # print('custom_objects: ', custom_objects) return keras.models.load_model(model_path, custom_objects=custom_objects)