def main(): config = Configuration() checker = ConfigChecker(config, None, 'preprocessing', training=None) checker.pre_init_checks() nbr_datasets = len(config.datasets) for i in range(0, nbr_datasets): print('-------------------------------') print('Importing dataset', i) print('-------------------------------') import_dataset(i) print('-------------------------------') print()
def main(): try: # suppress debugging messages of TensorFlow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = Configuration() if config.case_base_for_inference: dataset: CBSDataset = CBSDataset(config.case_base_folder, config, training=False) else: dataset: CBSDataset = CBSDataset(config.training_data_folder, config, training=False) dataset.load() checker = ConfigChecker(config, dataset, 'cbs', training=False) checker.pre_init_checks() cbs = CBS(config, False, dataset) inference = Inference(config, cbs, dataset) checker.post_init_checks(cbs) print('Ensure right model file is used:') print(config.directory_model_to_use, '\n') inference.infer_test_dataset() cbs.kill_threads() except KeyboardInterrupt: try: cbs.kill_threads() except: pass
def main(): try: # suppress debugging messages of TensorFlow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # os.environ['report_tensor_allocations_upon_oom'] = '1' config = Configuration() dataset = CBSDataset(config.training_data_folder, config, training=True) dataset.load() checker = ConfigChecker(config, dataset, 'cbs', training=True) checker.pre_init_checks() print('Initializing case based similarity measure ...\n') cbs = CBS(config, True, dataset) checker.post_init_checks(cbs) print('\nTraining:\n') optimizer = CBSOptimizer(cbs, dataset, config) optimizer.optimize() except KeyboardInterrupt: try: cbs.kill_threads() except: pass
def main(): config = Configuration() if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) checker = ConfigChecker(config, dataset, 'snn', training=False) checker.pre_init_checks() architecture = initialise_snn(config, dataset, False) checker.post_init_checks(architecture) inference = Inference(config, architecture, dataset) if config.print_model: tf.keras.utils.plot_model(architecture.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True) print('Ensure right model file is used:') print(config.directory_model_to_use, '\n') inference.infer_test_dataset()
def main(): config = Configuration() # Get config for data directory checker = ConfigChecker(config, None, 'preprocessing', training=None) checker.pre_init_checks() number_data_sets = len(config.datasets) for i in range(number_data_sets): print('\n\nImporting dataframe ' + str(i) + '/' + str(number_data_sets - 1) + ' from file') # read the imported dataframe from the saved file path_to_file = config.datasets[i][0] + config.filename_pkl df: pd.DataFrame = pd.read_pickle(path_to_file) df = clean_up_dataframe(df, config) print('\nSaving datafrane as pickle file in', config.datasets[i][0]) path_to_file = config.datasets[i][0] + config.filename_pkl_cleaned df.to_pickle(path_to_file) print('Saving finished') del df gc.collect()
def main(): config = Configuration() config.print_detailed_config_used_for_training() dataset = FullDataset(config.training_data_folder, config, training=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() if config.print_model: tf.keras.utils.plot_model(snn.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True) checker.post_init_checks(snn) start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S") print('---------------------------------------------') print('Training:') print('---------------------------------------------') print() optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize() print() print('---------------------------------------------') print('Inference:') print('---------------------------------------------') print() change_model(config, start_time_string) if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) snn = initialise_snn(config, dataset, False) inference = Inference(config, snn, dataset) inference.infer_test_dataset()
def main(): # suppress debugging messages of TensorFlow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = Configuration() config.print_detailed_config_used_for_training() dataset = FullDataset(config.training_data_folder, config, training=True) dataset.load() checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() checker.post_init_checks(snn) start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S") print('---------------------------------------------') print('Training:') print('---------------------------------------------') print() optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize() print() print('---------------------------------------------') print('Inference:') print('---------------------------------------------') print() change_model(config, start_time_string) if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False) dataset.load() snn = initialise_snn(config, dataset, False) inference = Inference(config, snn, dataset) inference.infer_test_dataset()
def main(): config = Configuration() dataset = FullDataset(config.training_data_folder, config, training=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() checker.post_init_checks(snn) print('Training:') optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize()
def main(): # suppress debugging messages of TensorFlow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = Configuration() dataset = FullDataset(config.training_data_folder, config, training=True) dataset.load() checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() checker.post_init_checks(snn) print('Training:') optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize()
def main(): # suppress debugging messages of TensorFlow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = Configuration() if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False) dataset.load() checker = ConfigChecker(config, dataset, 'snn', training=False) checker.pre_init_checks() architecture = initialise_snn(config, dataset, False) checker.post_init_checks(architecture) inference = Inference(config, architecture, dataset) if config.print_model: tf.keras.utils.plot_model(architecture.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True) print('Ensure right model file is used:') print(config.directory_model_to_use, '\n') inference.infer_test_dataset()
def main(): config = Configuration() config.print_detailed_config_used_for_training() dataset = FullDataset(config.training_data_folder, config, training=True, model_selection=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation(config, dataset) checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() if config.print_model: tf.keras.utils.plot_model(snn.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True) checker.post_init_checks(snn) start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S") print('---------------------------------------------') print('Training:') print('---------------------------------------------') print() optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize() print() print('---------------------------------------------') print('Selecting (of the model for final evaluation):') print('---------------------------------------------') print() num_of_selection_tests = config.number_of_selection_tests config.use_masking_regularization = False score_valid_to_model_loss = {} for i in range(num_of_selection_tests): loss_of_selected_model = change_model(config, start_time_string, num_of_selction_iteration=i) if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False, model_selection=True) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False, model_selection=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation(config, dataset) snn = initialise_snn(config, dataset, False) inference = Inference(config, snn, dataset) curr_model_score = inference.infer_test_dataset() score_valid_to_model_loss[curr_model_score] = loss_of_selected_model print("score_valid_to_model_loss: ", score_valid_to_model_loss) print() print('---------------------------------------------') print('Inference:') print('---------------------------------------------') print() max_score = max(list(score_valid_to_model_loss.keys())) min_loss = score_valid_to_model_loss[max_score] print("Model with the following loss is selected for the final evaluation:", min_loss) change_model(config, start_time_string, get_model_by_loss_value=min_loss) if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation(config, dataset) snn = initialise_snn(config, dataset, False) inference = Inference(config, snn, dataset) inference.infer_test_dataset()
def main(): config = Configuration() # Define different version of original configuration #config_2 = copy(config) #config_2.hyper_file = config_2.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_a.json' # wie Standard, aber owl2vec als Graph Features added config_3 = copy(config) config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_b.json' # wie Standard, aber Linear transformation an config_4 = copy(config) config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_c.json' # wie Standard, aber nur Context Ausgabe #### ''' config_2 = copy(config) config_2.batch_distribution = { BatchSubsetType.DISTRIB_BASED_ON_DATASET: 0.75, BatchSubsetType.EQUAL_CLASS_DISTRIB: 0.25 } config_3 = copy(config) config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2.json' # Owl2vec after 2DCNN Removed, film on config_4 = copy(config) config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_3.json' # Owl2vec after 2DCNN Removed, film off config_5 = copy(config) config_5.hyper_file = config_5.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_4.json' # wie Standard, aber Gradient Cap 1 config_6 = copy(config) config_6.hyper_file = config_6.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_5.json' # wie Standard, aber 256,128,64 config_7 = copy(config) config_7.hyper_file = config_7.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_6.json' # wie Standard, aber 512,256,128 config_8 = copy(config) config_8.hyper_file = config_8.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_7.json' # wie Standard, aber 128,64,32 config_9 = copy(config) config_9.hyper_file = config_9.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_8.json' # wie Standard, aber 256,128,128 config_10 = copy(config) config_10.hyper_file = config_10.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_9.json' # wie Standard, aber 128,128,128, FC 386-256, CNN2d 128,64,3 config_11 = copy(config) config_11.hyper_file = config_11.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_10.json' # wie Standard, aber 128,64,64, FC 386-256, CNN2d 128,64,3 config_12 = copy(config) config_12.hyper_file = config_12.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_11.json' # wie Standard, aber 256,128,128 nur mit allem aktiviert ''' ''' config_3 = copy(config) config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_with_graph_test_Readout_WOowl2vec.json' config_4 = copy(config) config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_with_graph_test_Readout_lrSmaller.json' config_5 = copy(config) config_5.hyper_file = config_5.hyper_file_folder + 'cnn2d_with_graph_test_Readout_WOAttributeWise.json' ''' #list_of_configs = [config_3, config_4, config_5, config_6, config_7, config_8, config_9,config_10, config_11] list_of_configs = [config, config_3, config_4] #list_of_configs = [config, config_2, config_3,config_4,config_5, config_6,config_7,config_8,config_9,config_10,config_11] for i, config in enumerate(list_of_configs): print("Run number of config:", i) config.print_detailed_config_used_for_training() dataset = FullDataset(config.training_data_folder, config, training=True, model_selection=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) checker = ConfigChecker(config, dataset, 'snn', training=True) checker.pre_init_checks() snn = initialise_snn(config, dataset, True) snn.print_detailed_model_info() if config.print_model: tf.keras.utils.plot_model(snn.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True) checker.post_init_checks(snn) start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S") print('---------------------------------------------') print('Training:') print('---------------------------------------------') print() optimizer = SNNOptimizer(snn, dataset, config) optimizer.optimize() print() print('---------------------------------------------') print('Evaluation of the current config:') print('---------------------------------------------') print() num_of_selection_tests = config.number_of_selection_tests score_valid_to_model_loss = {} for i in range(num_of_selection_tests): loss_of_selected_model = change_model(config, start_time_string, num_of_selction_iteration=i) if config.case_base_for_inference: dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False, model_selection=True) else: dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False, model_selection=True) dataset.load() dataset = Representation.convert_dataset_to_baseline_representation( config, dataset) snn = initialise_snn(config, dataset, False) inference = Inference(config, snn, dataset) curr_model_score = inference.infer_test_dataset() score_valid_to_model_loss[ curr_model_score] = loss_of_selected_model # loop to sum all values to compute the mean: res = 0 for val in score_valid_to_model_loss.values(): res += val loss_mean = res / len(score_valid_to_model_loss) for val in score_valid_to_model_loss.keys(): res += val mean_score = res / len(score_valid_to_model_loss) # printing result print("Run: ", i, " loss mean:" + str(loss_mean), " score mean: " + str(mean_score)) print("Run: ", i, " score_valid_to_model_loss:", score_valid_to_model_loss) '''
def main(): config = Configuration() # Get config for data directory checker = ConfigChecker(config, None, 'preprocessing', training=None) checker.pre_init_checks() config.import_timestamps() number_data_sets = len(config.datasets) # list of all examples examples: [np.ndarray] = [] labels_of_examples: [str] = [] failure_times_of_examples: [str] = [] window_times_of_examples: [str] = [] attributes = None for i in range(number_data_sets): print('\n\nImporting dataframe ' + str(i) + '/' + str(number_data_sets - 1) + ' from file') # read the imported dataframe from the saved file path_to_file = config.datasets[i][0] + config.filename_pkl_cleaned with open(path_to_file, 'rb') as f: df: pd.DataFrame = pickle.load(f) # cleaning moved to separate script because of computational demands # df = clean_up_dataframe(df, config) # split the dataframe into the configured cases cases_df, labels_df, failures_df = split_by_cases(df, i, config) print("cases_df: ", len(cases_df)) print("labels_df: ", len(labels_df)) print("failures_df: ", len(failures_df), ": ", failures_df) if i == 0: attributes = np.stack(df.columns, axis=0) del df gc.collect() # split the case into examples, which are added to the list of of all examples number_cases = len(cases_df) for y in range(number_cases): df = cases_df[y] if len(df) <= 0: print(i, y, 'empty') print( "df: ", df, ) continue start = df.index[0] end = df.index[-1] secs = (end - start).total_seconds() print('\nSplitting case', y, '/', number_cases - 1, 'into examples. Length:', secs, " start: ", start, " end: ", end) split_into_examples(df, labels_df[y], examples, labels_of_examples, config.time_series_length, config.interval_in_seconds, config, failure_times_of_examples, failures_df[y], window_times_of_examples, y, i) del cases_df, labels_df, failures_df gc.collect() # convert lists of arrays to numpy array examples_array = np.stack(examples, axis=0) labels_array = np.stack(labels_of_examples, axis=0) failure_times_array = np.stack(failure_times_of_examples, axis=0) window_times_array = np.stack(window_times_of_examples, axis=0) del examples, labels_of_examples, failure_times_of_examples, window_times_of_examples gc.collect() # print("config.use_over_lapping_windows: ", config.use_over_lapping_windows) if config.use_over_lapping_windows: print('\nExecute train/test split with failure case consideration') # define groups for GroupShuffleSplit enc = OrdinalEncoder() enc.fit(failure_times_array.reshape(-1, 1)) failure_times_array_groups = enc.transform( failure_times_array.reshape(-1, 1)) # print("groups: ",failure_times_array_groups) # group_kfold = GroupKFold(n_splits=2) gss = GroupShuffleSplit(n_splits=1, test_size=config.test_split_size, random_state=config.random_seed) for train_idx, test_idx in gss.split(examples_array, labels_array, failure_times_array_groups): print("TRAIN:", train_idx, "TEST:", test_idx) # split_idx in gss.split(examples_array, labels_array, failure_times_array_groups) # train_idx = split_idx[0] # test_idx = split_idx[1] # print("train_idx:",train_idx) x_train, x_test = examples_array[train_idx], examples_array[test_idx] y_train, y_test = labels_array[train_idx], labels_array[test_idx] failure_times_train, failure_times_test = failure_times_array[ train_idx], failure_times_array[test_idx] window_times_train, window_times_test = window_times_array[ train_idx], window_times_array[test_idx] print("X_train: ", x_train.shape, " X_test: ", x_test.shape) print("Y_train: ", y_train.shape, " Y_train: ", y_test.shape) print("Failure_times_train: ", failure_times_train.shape, " Failure_times_test: ", failure_times_test.shape) print("Window_times_train: ", window_times_train.shape, " Window_times_test: ", window_times_test.shape) print("Classes in the train set: ", np.unique(y_train)) print("Classes in the test set: ", np.unique(y_test)) # print("Classes in train and test set: ", np.unique(np.concatenate(y_train, y_test))) else: # split into train and test data set print('\nExecute train/test split') x_train, x_test, y_train, y_test = train_test_split( examples_array, labels_array, test_size=config.test_split_size, random_state=config.random_seed) # Sort both datasets by the cases for easier handling ''' x_train = x_train[y_train.argsort()] y_train = np.sort(y_train) x_test = x_test[y_test.argsort()] y_test = np.sort(y_test) ''' print('Training data set shape: ', x_train.shape) print('Training label set shape: ', y_train.shape) print('Test data set shape: ', x_test.shape) print('Test label set shape: ', y_test.shape, '\n') # normalize each sensor stream to contain values in [0,1] x_train, x_test = normalise(x_train, x_test, config) x_train, x_test, = x_train.astype('float32'), x_test.astype('float32') # save the np arrays print('\nSave to np arrays in ' + config.training_data_folder) print('Step 1/5') np.save(config.training_data_folder + 'train_features_4_.npy', x_train) print('Step 2/5') np.save(config.training_data_folder + 'test_features_4_.npy', x_test) print('Step 3/5') np.save(config.training_data_folder + 'train_labels_4_.npy', y_train) print('Step 4/5') np.save(config.training_data_folder + 'test_labels_4_.npy', y_test) print('Step 5/5') np.save(config.training_data_folder + 'feature_names_4_.npy', attributes) print() if config.use_over_lapping_windows: print('Saving additional data if overlapping windows are used') # Contains the associated time of a failure (if not no failure) for each example print('Step 1/4') np.save(config.training_data_folder + 'train_failure_times_4_.npy', failure_times_train) print('Step 2/4') np.save(config.training_data_folder + 'test_failure_times_4_.npy', failure_times_test) print('Step 3/4') # Contains the start and end time stamp for each training example np.save(config.training_data_folder + 'train_window_times_4_.npy', window_times_train) print('Step 4/4') np.save(config.training_data_folder + 'test_window_times_4_.npy', window_times_test)