def fit(self, x_train, y_train, x_val, y_val, y_true): if len(keras.backend.tensorflow_backend._get_available_gpus())==0: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training mini_batch_size = 16 nb_epochs = 2000 start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory+'last_model.hdf5') model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration,lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_test, y_test, y_true): if not tf.test.is_gpu_available: print('error') exit() self.x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1]) self.x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1])) self.y_train = y_train self.y_test = y_test self.y_true = y_true print('x_train.shape: {0}'.format(self.x_train.shape)) self.batch_size = 128 self.nb_epochs = 2000 self.model, hist, duration = self.train() #Finally predict on test y_pred = self.model.predict(self.x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_dir, hist, y_pred, self.y_true, duration, self.verbose)
def fit(self, x, y, x_test, y_test, y_true): if not tf.test.is_gpu_available: print('error') exit() mini_batch_size = 16 nb_epochs = 120 x_train, x_val, y_train, y_val = \ train_test_split(x, y, test_size=0.33) x_test = self.prepare_input(x_test) x_train = self.prepare_input(x_train) x_val = self.prepare_input(x_val) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory+'last_model.hdf5') model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration,lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 16 nb_epochs = 5000 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=False, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_dir + 'last_model.hdf5') model = keras.models.load_model(self.output_dir + 'best_model.hdf5') y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_dir, hist, y_pred, y_true, duration, self.verbose) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 200 nb_epochs = 50 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_val) # convert the predicted from binary to integer #y_pred = np.argmax(y_pred , axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true, x_train_agg, x_val_agg): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 64 nb_epochs = 1500 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit([x_train, x_train_agg], y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=([x_val, x_val_agg], y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') y_pred = self.predict([x_val, x_val_agg], y_true, x_train, y_train, y_val, return_df_metrics=False) # save predictions np.save(self.output_directory + 'y_pred.npy', y_pred) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session() return df_metrics
def fit(self, x_train, y_train, x_val, y_val, y_true): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 200 nb_epochs = 40 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit(x_train, x_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, x_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model(self.output_directory + 'best_model.hdf5') rec = model.predict(x_val) y_pred = np.array([np.linalg.norm(a - b) for a, b in zip(x_val, rec)]) print("generating tests embeding vector ##########################") encod = self.encoder.predict(x_val) decod = self.decoder.predict(x_val) y_pred_emb = np.array( [np.linalg.norm(a - b) for a, b in zip(encod, decod)]) print("embeding vector shape is {}".format(encod.shape)) np.save(self.output_directory + 'encod_test.npy', encod) np.save(self.output_directory + 'decod_test.npy', decod) np.save(self.output_directory + 'score_embd.npy', y_pred_emb) #print(y_pred.shape, y_true.shape) # convert the predicted from binary to integer #y_pred = np.argmax(y_pred , axis=1) print(y_pred.shape, y_true.shape) save_logs(self.output_directory, hist, y_pred, y_true, duration, le_type="reconstruction") keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true, plot_test_acc=False): if len(keras.backend.tensorflow_backend._get_available_gpus()) == 0: print('error no gpu') exit() # x_val and y_val are only used to monitor the test loss and NOT for training if self.batch_size is None: mini_batch_size = int(min(x_train.shape[0] / 10, 16)) else: mini_batch_size = self.batch_size start_time = time.time() if plot_test_acc: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) else: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, return_df_metrics=False) # save predictions np.save(self.output_directory + 'y_pred.npy', y_pred) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration, plot_test_acc=plot_test_acc) keras.backend.clear_session() return df_metrics
def fit(self, x_train, y_train, x_val, y_val, y_true, test_info_str, metrics_file_str, png_str): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 12 #nb_epochs = 100 nb_epochs = ENCODER_EPOCHS mini_batch_size = batch_size start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration, test_info_str, metrics_file_str, png_str, lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): batch_size = 16 nb_epochs = 100 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) print(x_val.shape[0] / 2) l = int(x_val.shape[0] / 2) x_test = x_val[l:] y_test = y_val[l:] x_val = x_val[:l] y_val = y_val[:l] y_true = y_true[int(y_true.shape[0] / 2):] print("train:") print(x_train) print("train label:") print(y_train) print("val:") print(x_val) print("val label:") print(y_val) print("test:") print(x_test) print("train label:") print(y_test) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model(self.output_directory+'best_model.hdf5') model_val = keras.models.load_model(self.output_directory + 'best_model_val.hdf5') y_pred = model.predict(x_test) y_pred_val = model_val.predict(x_test) y_pred = np.argmax(y_pred, axis=1) y_pred_val = np.argmax(y_pred_val, axis=1) save_logs(self.output_directory, hist, y_pred, y_pred_val, y_true, duration,lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): # x_val and y_val are only used to monitor the test loss and NOT for training mini_batch_size = 16 nb_epochs = 2000 start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration, lr=False) history = np.genfromtxt(self.output_directory + 'history.csv', delimiter=',', names=True) fig, ax = plt.subplots(1) ax.plot(history['epoch'], history['loss'], label='training') ax.plot(history['epoch'], history['val_loss'], label='validation') ax.legend() ax.set(xlabel='epoch', ylabel='loss') plt.savefig('%s/loss.png' % (path_outfile)) plt.close() keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): print('TRAINING') if not tf.test.is_gpu_available: print('error') # exit() # x_val and y_val are only used to monitor the test loss and NOT for training mini_batch_size = int(min(x_train.shape[0] / 10, self.batch_size)) start_time = time.time() # hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, # epochs=nb_epochs, verbose=self.verbose, # validation_data=(x_val, y_val), # callbacks=self.callbacks) hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred, gap = model.predict(x_val) print(gap.shape) # print(hist.accuracy) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) print(y_pred) save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session()
def fit(self, x_train, y_train, x_test, y_true): # convert to binary # transform the labels from integers to one hot vectors self.enc = sklearn.preprocessing.OneHotEncoder() self.enc.fit(np.concatenate((y_train, y_true), axis=0).reshape(-1, 1)) y_train_int = y_train y_train = self.enc.transform(y_train.reshape(-1, 1)).toarray() y_test = self.enc.transform(y_true.reshape(-1, 1)).toarray() # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 16 nb_epochs = 1 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) if len(x_train) > 4000: # for ElectricDevices mini_batch_size = 128 hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_test, y_test), callbacks=self.callbacks) model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) keras.backend.clear_session() save_logs(self.output_directory, hist, y_pred, y_true, 0.0) return y_pred
def fit(self, x_train, y_train, x_val, y_val, y_true='', class_weight=None): if not tf.test.is_gpu_available: print('error no gpu') exit() # x_val and y_val are only used to monitor the test loss and NOT for training if self.batch_size is None: mini_batch_size = int(min(x_train.shape[0] / 10, 16)) else: mini_batch_size = self.batch_size start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks, class_weight=None) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') if y_true != '': y_pred, cam = self.predict(x_val, y_true, x_train, y_train, y_val, return_df_metrics=False) # save predictions np.save(self.output_directory + 'y_pred.npy', y_pred) # np.save(self.output_directory + 'cam.npy', cam) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 12 nb_epochs = 100 mini_batch_size = batch_size start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model( self.output_directory + 'best_model.hdf5', custom_objects={'InstanceNormalization': InstanceNormalization}) y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.output_directory, hist, y_pred, y_true, duration, lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training # mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() self.callbacks = [ ComputeRDP(BATCH_SIZE, len(x_train), NOISE_MULTIPLIER) ] hist = self.model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NUMBER_OF_EPOCHS, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, return_df_metrics=False) # save predictions np.save(self.output_directory + 'y_pred.npy', y_pred) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session() return df_metrics
def old_fit(self, x_train, y_train, x_val, y_val, y_true): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training mini_batch_size = int(min(x_train.shape[0] / 10, self.batch_size)) # Train model start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time # Save model model_file = self.get_file_path('last_model.hdf5') self.model.save(model_file) # Run prediction ... y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, return_df_metrics=False) # ... and save them pred_file = self.get_file_path('y_pred.npy') np.save(pred_file, y_pred) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) # Save logs df_metrics = utils.save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session() return df_metrics
def fit(self, x_train, y_train, x_val, y_val, y_true, batch_size=16, nb_epochs=5000, x_test=None, shuffle=True): mini_batch_size = int(min(x_train[0].shape[0] / 10, batch_size)) GLOBAL_LOGGER.info("Fitting model") start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks, shuffle=shuffle) duration = time.time() - start_time GLOBAL_LOGGER.info(f"Loading weights and predicting") self.model.load_weights(self.output_directory + 'best_model.hdf5') y_pred_probabilities = self.model.predict(x_test if x_test else x_val) y_pred = np.argmax(y_pred_probabilities, axis=1) return save_logs(self.output_directory, hist, y_pred, y_pred_probabilities, y_true, duration)
def fit(self, x_train, y_train, x_val, y_val, x_true, y_true): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 128 nb_epochs = 40 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, validation_data=(x_val, y_val), callbacks=self.callbacks) model = keras.models.load_model(self.output_directory + 'best_model.hdf5') predicted = model.predict(x_true) print(predicted) #predicted = np.exp(predicted) #predict_label = np.exp(predict_label) print('mean_predicted: %d' % (np.mean(predicted))) print('std: %d' % (np.std(predicted))) predicted = np.exp(predicted) diff = (predicted - y_true) rel_diff = diff / (predicted + y_true) print(rel_diff, np.min(rel_diff), np.max(rel_diff)) mean_diff = np.mean(rel_diff) std = np.std(rel_diff) print('std: %d' % std) print('mean %d' % mean_diff) #can modell predict unnoised data #no_noise = (predicted_no_noise - predict_label)/predicted_no_noise abs_rel_diff = np.abs(rel_diff) maximum = np.max(abs_rel_diff) #logbinning = 10**np.linspace(np.log10(1e-10), np.log10(maximum) , 100) bad_points = [] for i, j in zip(rel_diff, y_true): if np.abs(i) > 0.5: #print(j) bad_points.append(j) print(len(bad_points)) history = np.genfromtxt(self.output_directory + 'history.csv', delimiter=',', names=True) fig, ax = plt.subplots(1) ax.plot(history['epoch'], history['loss'], label='training') ax.plot(history['epoch'], history['val_loss'], label='validation') ax.legend() ax.set(xlabel='epoch', ylabel='loss') plt.savefig('%s/loss.png' % (path_outfile)) plt.close() plt.subplot(211) plt.hist(rel_diff, bins=50) plt.title('(e_predict - e_real)/(e_predict+e_real)') plt.yscale('log') plt.subplot(212) plt.hist(diff, bins=50) plt.title('(e_predict - e_real)') plt.yscale('log') plt.savefig('%s/acc.png' % (path_outfile)) plt.close() save_logs(self.output_directory, hist, y_pred, y_true) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): if not tf.test.is_gpu_available: print('error') exit() # x_val and y_val are only used to monitor the test loss and NOT for training # mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) # for DP self.callbacks = [ ComputeRDP(BATCH_SIZE, len(x_train), NOISE_MULTIPLIER, self.threshold, self.model_dp_path), self.save_model(str(self.model_dp_path / 'best_model.hdf5')) ] start_time = time.time() hist = self.model_dp.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NUMBER_OF_EPOCHS, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model_dp.save(str(self.model_dp_path / 'last_model.hdf5')) model = tf.keras.models.load_model( str(self.model_dp_path / 'best_model.hdf5')) y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.model_dp_path, hist, y_pred, y_true, duration, lr=False) # non-DP stopped_epoch = len(hist.epoch) # reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=200, min_lr=0.00001) self.callbacks = [ self.save_model(str(self.model_path / 'best_model.hdf5')) ] start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=stopped_epoch, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time self.model.save(str(self.model_path / 'last_model.hdf5')) model = tf.keras.models.load_model( str(self.model_path / 'best_model.hdf5')) y_pred = model.predict(x_val) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) save_logs(self.model_path, hist, y_pred, y_true, duration, lr=False) tf.keras.backend.clear_session()
def fit(self, x_train, y_train, x_test, y_test, y_true): nb_epochs = 1000 batch_size = 256 nb_classes = y_train.shape[1] # limit the number of augmented time series if series too long or too many if x_train.shape[1] > 500 or x_train.shape[0] > 2000 or x_test.shape[ 0] > 2000: self.warping_ratios = [1] self.slice_ratio = 0.9 # increase the slice if series too short if x_train.shape[1] * self.slice_ratio < 8: self.slice_ratio = 8 / x_train.shape[1] #################### ## pre-processing ## #################### x_train, y_train, x_test, y_test, tot_increase_num = self.pre_processing( x_train, y_train, x_test, y_test) print('Total increased number for each MTS: ', tot_increase_num) ######################### ## done pre-processing ## ######################### input_shape = x_train.shape[1:] model = self.build_model(input_shape, nb_classes) if self.verbose == True: model.summary() start_time = time.time() hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_test, y_test), callbacks=self.callbacks) model = keras.models.load_model(self.output_directory + 'best_model.hdf5') y_pred = model.predict(x_test, batch_size=batch_size) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) # get the true predictions of the test set y_predicted = [] test_num_batch = int(x_test.shape[0] / tot_increase_num) for i in range(test_num_batch): unique_value, sub_ind, correspond_ind, count = np.unique( y_pred, True, True, True) idx_max = np.argmax(count) predicted_label = unique_value[idx_max] y_predicted.append(predicted_label) y_pred = np.array(y_predicted) duration = time.time() - start_time save_logs(self.output_directory, hist, y_pred, y_true, duration) keras.backend.clear_session()
def train(pre_model=None): # read train, val and test sets x_train = datasets_dict[dataset_name_tranfer][0] y_train = datasets_dict[dataset_name_tranfer][1] y_true_val = None y_pred_val = None x_test = datasets_dict[dataset_name_tranfer][-2] y_test = datasets_dict[dataset_name_tranfer][-1] mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) # transform the labels from integers to one hot vectors y_train = keras.utils.to_categorical(y_train, nb_classes) y_test = keras.utils.to_categorical(y_test, nb_classes) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) start_time = time.time() # remove last layer to replace with a new one input_shape = (None, x_train.shape[2]) model = build_model(input_shape, nb_classes, pre_model) pre_model = None if verbose == True: model.summary() # b = model.layers[1].get_weights() hist = model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=verbose, validation_data=(x_test, y_test), callbacks=callbacks) # a = model.layers[1].get_weights() # compare_weights(a,b) model = keras.models.load_model(file_path) y_pred = model.predict(x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) duration = time.time() - start_time df_metrics = save_logs(write_output_dir, hist, y_pred, y_true, duration, y_true_val, y_pred_val) print(df_metrics) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true): self.y_true = y_true self.x_test = x_val self.y_test = y_val self.x_train, self.x_val, self.y_train, self.y_val = \ train_test_split(x_train, y_train, test_size=0.2) # 1. Tune ESN and num_filter self.x_train, self.y_train, self.x_val, self.x_test, model_init, hist_init, duration_init, acc_init, num_filter = self.tune_esn() current_acc = acc_init hist_final = hist_init model_final = model_init duration_final = duration_init ratio_final = [0.1, 0.2] for ratio in self.ratio[1:]: # 1. Build Model input_shape = (self.len_series, self.units, 1) model = self.build_model( input_shape, self.nb_classes, self.len_series, ratio, num_filter) #if(self.verbose == True): #model.summary() # 3. Train Model batch = self.batch epoch = self.epoch start_time = time.time() hist = model.fit(self.x_train, self.y_train, batch_size=batch, epochs=epoch, verbose=False, validation_data=(self.x_val, self.y_val), callbacks=self.callbacks) duration = time.time() - start_time model_loss, model_acc = model.evaluate( self.x_val, self.y_val, verbose=False) print('val_loss: {0}, val_acc: {1}'.format( model_loss, model_acc)) y_pred = model.predict(self.x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = calculate_metrics(self.y_true, y_pred, duration) temp_output_dir = self.output_dir + str(self.it)+'/' create_directory(temp_output_dir) df_metrics.to_csv(temp_output_dir + 'df_metrics.csv', index=False) model.save(temp_output_dir + 'model.hdf5') params = [self.final_params_selected[0], self.final_params_selected[1], self.final_params_selected[2], ratio] param_print = pd.DataFrame(np.array([params], dtype=object), columns=[ 'input_scaling', 'connectivity', 'num_filter', 'ratio']) param_print.to_csv(temp_output_dir + 'df_params.csv', index=False) if (model_acc > current_acc): print('New winner') hist_final = hist model_final = model duration_final = duration ratio_final = ratio current_acc = model_acc keras.backend.clear_session() self.it += 1 print('Final ratio: {0}'.format(ratio_final)) self.final_params_selected.append(ratio_final) self.model = model_final self.hist = hist_final y_pred = self.model.predict(self.x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) param_print = pd.DataFrame(np.array([self.final_params_selected], dtype=object), columns=[ 'input_scaling', 'connectivity', 'num_filter', 'ratio']) param_print.to_csv(self.output_dir + 'df_final_params.csv', index=False) save_logs(self.output_dir, self.hist, y_pred, self.y_true, duration_final, self.verbose, lr=False) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true, plot_test_acc=False): # if len(keras.backend.tensorflow) == 0: # print('error no gpu') # exit() # else: # keras # print('GPU found') # x_val and y_val are only used to monitor the test loss and NOT for training sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) #batch_size defines the number of samples that will be propagated through the network, at least 16 if none is given #x_train.shape[0] = länge der trainingsdaten if self.batch_size is None: mini_batch_size = int(min(x_train.shape[0] / 10, 16)) else: mini_batch_size = self.batch_size start_time = time.time() if plot_test_acc: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) else: hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=self.nb_epochs, verbose=self.verbose, callbacks=self.callbacks) duration = time.time() - start_time self.model.save(self.output_directory + 'last_model.hdf5') y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, return_df_metrics=False) # save predictions np.save(self.output_directory + 'y_pred.npy', y_pred) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration, plot_test_acc=plot_test_acc) keras.backend.clear_session() return df_metrics
def fit(self, x_train, y_train, x_val, y_val, y_true, nb_classes): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 16 nb_epochs = 200 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) # 将测试集划分为val和test print(x_val.shape[0] / 2) l = int(x_val.shape[0] / 2) x_test = x_val[l:] y_test = y_val[l:] x_val = x_val[:l] y_val = y_val[:l] y_true = y_true[int(y_true.shape[0] / 2):] print("train:") print(x_train) print("train label:") print(y_train) print("val:") print(x_val) print("val label:") print(y_val) print("test:") print(x_test) print("train label:") print(y_test) start_time = time.time() hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) duration = time.time() - start_time model = keras.models.load_model(self.output_directory + 'best_model.hdf5', custom_objects={ 'precision': precision, 'recall': recall, 'f1': f1 }) model_val = keras.models.load_model(self.output_directory + 'best_model_val.hdf5', custom_objects={ 'precision': precision, 'recall': recall, 'f1': f1 }) # y_pred = model.predict(x_val) y_pred = model.predict(x_test) y_pred_val = model_val.predict(x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) y_pred_val = np.argmax(y_pred_val, axis=1) save_logs(self.output_directory, hist, y_pred, y_pred_val, y_true, duration, nb_classes) keras.backend.clear_session()
def fit(self, x_train, y_train, x_val, y_val, y_true, test_spe=False): # x_val and y_val are only used to monitor the test loss and NOT for training batch_size = 400 nb_epochs = 100 mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) start_time = time.time() hist = self.model.fit(x_train, x_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=self.verbose, validation_data=(x_val, x_val), callbacks=self.callbacks) #hist=None duration = time.time() - start_time print(self.output_directory) model = keras.models.load_model(self.output_directory + 'best_model.hdf5') x_rec = model.predict(x_train) y_rec = np.array( [np.linalg.norm(a - b) for a, b in zip(x_train, x_rec)]) tresh_tr = np.percentile(y_rec, 99) print(x_val.shape) rec = model.predict(x_val) y_pred = np.array([np.linalg.norm(a - b) for a, b in zip(x_val, rec)]) tresh = np.percentile(y_pred, 99) print("generating tests embeding vector ##########################") #encod = self.encoder.predict(x_val) #decod = self.decoder.predict(x_val) emb = self.emb.predict(x_val) #y_pred_emb = np.array([np.linalg.norm(a-b) for a,b in zip(encod, decod)]) #print("embeding vector shape is {}".format(encod.shape)) print(y_pred.shape, y_true.shape) #np.save(self.output_directory+'encod_test.npy', encod) #np.save(self.output_directory+'decod_test.npy', decod) #np.save(self.output_directory+'score_embd.npy', y_pred_emb) np.save(self.output_directory + 'embd_test.npy', emb) # convert the predicted from binary to integer #y_pred = np.argmax(y_pred , axis=1) #hist=None print(y_pred.shape, y_true.shape) save_logs(self.output_directory, hist, y_pred, y_true, duration, tresh=tresh) print("testing !!!") if test_spe: hist = None l_atk = [ 'continuous', 'plateau', 'suppress', 'flooding', 'playback' ] if 'nofrq' in self.output_directory: print("nofreq testing*********") path_in = "/scratch/Project-CTI/data/SynCAN/classification_SOA/archives/mts_archive/const_nofrq_" else: print("with_freq testing*********") path_in = "/scratch/Project-CTI/data/SynCAN/classification_SOA/archives/mts_archive/const_" #x_nor = x_val #y_nor = y_true.reshape(-1) for atk in l_atk: start_time = time.time() outp_atk = self.output_directory + "const_nofrq_" + atk + "/" if not os.path.exists(outp_atk): os.makedirs(outp_atk) x_val = np.load(path_in + atk + "/x_test.npy") #x_val = np.concatenate([x_val,x_nor]) y_true = np.load(path_in + atk + "/y_test.npy") #print(y_true.shape, y_nor.shape) #y_true = np.concatenate([y_true,y_nor]) rec = model.predict(x_val) y_pred = np.array( [np.linalg.norm(a - b) for a, b in zip(x_val, rec)]) #tresh = y_pred[-3000] #encod = self.encoder.predict(x_val) #decod = self.decoder.predict(x_val) emb = self.emb.predict(x_val) #y_pred_emb = np.array([np.linalg.norm(a-b) for a,b in zip(encod, decod)]) #print("embeding vector shape is {}".format(encod.shape)) #print(y_pred.shape, y_true.shape) #np.save(outp_atk+'encod_test.npy', encod) #np.save(outp_atk+'decod_test.npy', decod) #np.save(outp_atk+'score_embd.npy', y_pred_emb) np.save(outp_atk + 'embd_test.npy', emb) duration = time.time() - start_time save_logs(outp_atk, hist, y_pred, y_true, duration, tresh=tresh) """trash test""" print("Trash testing") x_val = np.load(path_in + atk + "/x_trash.npy") #x_val = np.concatenate([x_val,x_nor]) y_true = np.load(path_in + atk + "/y_trash.npy") #print(y_true.shape, y_nor.shape) #y_true = np.concatenate([y_true,y_nor]) rec = model.predict(x_val) y_pred = np.array( [np.linalg.norm(a - b) for a, b in zip(x_val, rec)]) #tresh = y_pred[-3000] #encod = self.encoder.predict(x_val) #decod = self.decoder.predict(x_val) emb = self.emb.predict(x_val) #y_pred_emb = np.array([np.linalg.norm(a-b) for a,b in zip(encod, decod)]) #print("embeding vector shape is {}".format(encod.shape)) #print(y_pred.shape, y_true.shape) #np.save(outp_atk+'encod_test.npy', encod) #np.save(outp_atk+'decod_test.npy', decod) #np.save(outp_atk+'score_embd.npy', y_pred_emb) np.save(outp_atk + 'embd_trash.npy', emb) duration = time.time() - start_time outp_atk = outp_atk + "trash_" save_logs(outp_atk, hist, y_pred, y_true, duration, tresh=tresh) print(tresh, tresh_tr) keras.backend.clear_session()