예제 #1
0
def main():
    config = Configuration()

    checker = ConfigChecker(config, None, 'preprocessing', training=None)
    checker.pre_init_checks()

    nbr_datasets = len(config.datasets)
    for i in range(0, nbr_datasets):
        print('-------------------------------')
        print('Importing dataset', i)
        print('-------------------------------')

        import_dataset(i)

        print('-------------------------------')
        print()
예제 #2
0
def main():
    try:
        # suppress debugging messages of TensorFlow
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        config = Configuration()

        if config.case_base_for_inference:
            dataset: CBSDataset = CBSDataset(config.case_base_folder, config, training=False)
        else:
            dataset: CBSDataset = CBSDataset(config.training_data_folder, config, training=False)

        dataset.load()

        checker = ConfigChecker(config, dataset, 'cbs', training=False)
        checker.pre_init_checks()

        cbs = CBS(config, False, dataset)
        inference = Inference(config, cbs, dataset)

        checker.post_init_checks(cbs)

        print('Ensure right model file is used:')
        print(config.directory_model_to_use, '\n')

        inference.infer_test_dataset()
        cbs.kill_threads()

    except KeyboardInterrupt:
        try:
            cbs.kill_threads()
        except:
            pass
예제 #3
0
def main():
    try:
        # suppress debugging messages of TensorFlow
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        # os.environ['report_tensor_allocations_upon_oom'] = '1'
        config = Configuration()

        dataset = CBSDataset(config.training_data_folder, config, training=True)
        dataset.load()

        checker = ConfigChecker(config, dataset, 'cbs', training=True)
        checker.pre_init_checks()

        print('Initializing case based similarity measure ...\n')
        cbs = CBS(config, True, dataset)

        checker.post_init_checks(cbs)

        print('\nTraining:\n')
        optimizer = CBSOptimizer(cbs, dataset, config)
        optimizer.optimize()

    except KeyboardInterrupt:
        try:
            cbs.kill_threads()
        except:
            pass
예제 #4
0
def main():
    config = Configuration()

    if config.case_base_for_inference:
        dataset: FullDataset = FullDataset(config.case_base_folder,
                                           config,
                                           training=False)
    else:
        dataset: FullDataset = FullDataset(config.training_data_folder,
                                           config,
                                           training=False)

    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(
        config, dataset)

    checker = ConfigChecker(config, dataset, 'snn', training=False)
    checker.pre_init_checks()

    architecture = initialise_snn(config, dataset, False)

    checker.post_init_checks(architecture)

    inference = Inference(config, architecture, dataset)

    if config.print_model:
        tf.keras.utils.plot_model(architecture.encoder.model,
                                  to_file='model.png',
                                  show_shapes=True,
                                  expand_nested=True)

    print('Ensure right model file is used:')
    print(config.directory_model_to_use, '\n')

    inference.infer_test_dataset()
예제 #5
0
def main():
    config = Configuration()  # Get config for data directory

    checker = ConfigChecker(config, None, 'preprocessing', training=None)
    checker.pre_init_checks()

    number_data_sets = len(config.datasets)
    for i in range(number_data_sets):
        print('\n\nImporting dataframe ' + str(i) + '/' + str(number_data_sets - 1) + ' from file')

        # read the imported dataframe from the saved file
        path_to_file = config.datasets[i][0] + config.filename_pkl
        df: pd.DataFrame = pd.read_pickle(path_to_file)

        df = clean_up_dataframe(df, config)

        print('\nSaving datafrane as pickle file in', config.datasets[i][0])
        path_to_file = config.datasets[i][0] + config.filename_pkl_cleaned
        df.to_pickle(path_to_file)
        print('Saving finished')

        del df
        gc.collect()
예제 #6
0
def main():
    config = Configuration()
    config.print_detailed_config_used_for_training()

    dataset = FullDataset(config.training_data_folder, config, training=True)
    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(
        config, dataset)

    checker = ConfigChecker(config, dataset, 'snn', training=True)
    checker.pre_init_checks()

    snn = initialise_snn(config, dataset, True)
    snn.print_detailed_model_info()

    if config.print_model:
        tf.keras.utils.plot_model(snn.encoder.model,
                                  to_file='model.png',
                                  show_shapes=True,
                                  expand_nested=True)

    checker.post_init_checks(snn)

    start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S")

    print('---------------------------------------------')
    print('Training:')
    print('---------------------------------------------')
    print()
    optimizer = SNNOptimizer(snn, dataset, config)
    optimizer.optimize()

    print()
    print('---------------------------------------------')
    print('Inference:')
    print('---------------------------------------------')
    print()
    change_model(config, start_time_string)

    if config.case_base_for_inference:
        dataset: FullDataset = FullDataset(config.case_base_folder,
                                           config,
                                           training=False)
    else:
        dataset: FullDataset = FullDataset(config.training_data_folder,
                                           config,
                                           training=False)

    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(
        config, dataset)

    snn = initialise_snn(config, dataset, False)

    inference = Inference(config, snn, dataset)
    inference.infer_test_dataset()
예제 #7
0
def main():
    # suppress debugging messages of TensorFlow
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    config = Configuration()
    config.print_detailed_config_used_for_training()

    dataset = FullDataset(config.training_data_folder, config, training=True)
    dataset.load()

    checker = ConfigChecker(config, dataset, 'snn', training=True)
    checker.pre_init_checks()

    snn = initialise_snn(config, dataset, True)
    snn.print_detailed_model_info()

    checker.post_init_checks(snn)

    start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S")

    print('---------------------------------------------')
    print('Training:')
    print('---------------------------------------------')
    print()
    optimizer = SNNOptimizer(snn, dataset, config)
    optimizer.optimize()

    print()
    print('---------------------------------------------')
    print('Inference:')
    print('---------------------------------------------')
    print()
    change_model(config, start_time_string)

    if config.case_base_for_inference:
        dataset: FullDataset = FullDataset(config.case_base_folder,
                                           config,
                                           training=False)
    else:
        dataset: FullDataset = FullDataset(config.training_data_folder,
                                           config,
                                           training=False)

    dataset.load()

    snn = initialise_snn(config, dataset, False)

    inference = Inference(config, snn, dataset)
    inference.infer_test_dataset()
예제 #8
0
def main():
    config = Configuration()

    dataset = FullDataset(config.training_data_folder, config, training=True)
    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(
        config, dataset)

    checker = ConfigChecker(config, dataset, 'snn', training=True)
    checker.pre_init_checks()

    snn = initialise_snn(config, dataset, True)
    snn.print_detailed_model_info()

    checker.post_init_checks(snn)

    print('Training:')
    optimizer = SNNOptimizer(snn, dataset, config)
    optimizer.optimize()
예제 #9
0
def main():
    # suppress debugging messages of TensorFlow
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    config = Configuration()

    dataset = FullDataset(config.training_data_folder, config, training=True)
    dataset.load()

    checker = ConfigChecker(config, dataset, 'snn', training=True)
    checker.pre_init_checks()

    snn = initialise_snn(config, dataset, True)
    snn.print_detailed_model_info()

    checker.post_init_checks(snn)

    print('Training:')
    optimizer = SNNOptimizer(snn, dataset, config)
    optimizer.optimize()
예제 #10
0
def main():
    # suppress debugging messages of TensorFlow
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    config = Configuration()

    if config.case_base_for_inference:
        dataset: FullDataset = FullDataset(config.case_base_folder,
                                           config,
                                           training=False)
    else:
        dataset: FullDataset = FullDataset(config.training_data_folder,
                                           config,
                                           training=False)

    dataset.load()

    checker = ConfigChecker(config, dataset, 'snn', training=False)
    checker.pre_init_checks()

    architecture = initialise_snn(config, dataset, False)

    checker.post_init_checks(architecture)

    inference = Inference(config, architecture, dataset)

    if config.print_model:
        tf.keras.utils.plot_model(architecture.encoder.model,
                                  to_file='model.png',
                                  show_shapes=True,
                                  expand_nested=True)

    print('Ensure right model file is used:')
    print(config.directory_model_to_use, '\n')

    inference.infer_test_dataset()
예제 #11
0
def main():
    config = Configuration()
    config.print_detailed_config_used_for_training()

    dataset = FullDataset(config.training_data_folder, config, training=True, model_selection=True)
    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(config, dataset)

    checker = ConfigChecker(config, dataset, 'snn', training=True)
    checker.pre_init_checks()

    snn = initialise_snn(config, dataset, True)
    snn.print_detailed_model_info()

    if config.print_model:
        tf.keras.utils.plot_model(snn.encoder.model, to_file='model.png', show_shapes=True, expand_nested=True)

    checker.post_init_checks(snn)

    start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S")

    print('---------------------------------------------')
    print('Training:')
    print('---------------------------------------------')
    print()
    optimizer = SNNOptimizer(snn, dataset, config)
    optimizer.optimize()

    print()
    print('---------------------------------------------')
    print('Selecting (of the model for final evaluation):')
    print('---------------------------------------------')
    print()
    num_of_selection_tests = config.number_of_selection_tests
    config.use_masking_regularization = False
    score_valid_to_model_loss = {}
    for i in range(num_of_selection_tests):
        loss_of_selected_model = change_model(config, start_time_string, num_of_selction_iteration=i)

        if config.case_base_for_inference:
            dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False, model_selection=True)
        else:
            dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False, model_selection=True)
        dataset.load()
        dataset = Representation.convert_dataset_to_baseline_representation(config, dataset)

        snn = initialise_snn(config, dataset, False)

        inference = Inference(config, snn, dataset)
        curr_model_score = inference.infer_test_dataset()

        score_valid_to_model_loss[curr_model_score] = loss_of_selected_model

    print("score_valid_to_model_loss: ", score_valid_to_model_loss)

    print()
    print('---------------------------------------------')
    print('Inference:')
    print('---------------------------------------------')
    print()

    max_score = max(list(score_valid_to_model_loss.keys()))
    min_loss = score_valid_to_model_loss[max_score]
    print("Model with the following loss is selected for the final evaluation:", min_loss)

    change_model(config, start_time_string, get_model_by_loss_value=min_loss)

    if config.case_base_for_inference:
        dataset: FullDataset = FullDataset(config.case_base_folder, config, training=False)
    else:
        dataset: FullDataset = FullDataset(config.training_data_folder, config, training=False)

    dataset.load()
    dataset = Representation.convert_dataset_to_baseline_representation(config, dataset)

    snn = initialise_snn(config, dataset, False)

    inference = Inference(config, snn, dataset)
    inference.infer_test_dataset()
def main():
    config = Configuration()

    # Define different version of original configuration
    #config_2 = copy(config)
    #config_2.hyper_file = config_2.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_a.json'  # wie Standard, aber owl2vec als Graph Features added
    config_3 = copy(config)
    config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_b.json'  # wie Standard, aber Linear transformation an
    config_4 = copy(config)
    config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2_c.json'  # wie Standard, aber nur Context Ausgabe

    ####
    '''
    config_2 = copy(config)
    config_2.batch_distribution = {
        BatchSubsetType.DISTRIB_BASED_ON_DATASET: 0.75,
        BatchSubsetType.EQUAL_CLASS_DISTRIB: 0.25
        }
    
    config_3 = copy(config)
    config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_2.json' # Owl2vec after 2DCNN Removed, film on
    config_4 = copy(config)
    config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_3.json'  # Owl2vec after 2DCNN Removed, film off
    config_5 = copy(config)
    config_5.hyper_file = config_5.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_4.json'  # wie Standard, aber Gradient Cap 1
    config_6 = copy(config)
    config_6.hyper_file = config_6.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_5.json'  # wie Standard, aber 256,128,64
    config_7 = copy(config)
    config_7.hyper_file = config_7.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_6.json'  # wie Standard, aber 512,256,128
    config_8 = copy(config)
    config_8.hyper_file = config_8.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_7.json'  # wie Standard, aber 128,64,32
    config_9 = copy(config)
    config_9.hyper_file = config_9.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_8.json'  # wie Standard, aber 256,128,128
    config_10 = copy(config)
    config_10.hyper_file = config_10.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_9.json'  # wie Standard, aber 128,128,128, FC 386-256, CNN2d 128,64,3
    config_11 = copy(config)
    config_11.hyper_file = config_11.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_10.json'  # wie Standard, aber 128,64,64, FC 386-256, CNN2d 128,64,3
    config_12 = copy(config)
    config_12.hyper_file = config_12.hyper_file_folder + 'cnn2d_withAddInput_Graph_o1_GlobAtt_o2_2_HO_11.json'  # wie Standard, aber 256,128,128 nur mit allem aktiviert
    '''
    '''
    config_3 = copy(config)
    config_3.hyper_file = config_3.hyper_file_folder + 'cnn2d_with_graph_test_Readout_WOowl2vec.json'
    config_4 = copy(config)
    config_4.hyper_file = config_4.hyper_file_folder + 'cnn2d_with_graph_test_Readout_lrSmaller.json'
    config_5 = copy(config)
    config_5.hyper_file = config_5.hyper_file_folder + 'cnn2d_with_graph_test_Readout_WOAttributeWise.json'
    '''
    #list_of_configs = [config_3, config_4, config_5, config_6, config_7, config_8, config_9,config_10, config_11]
    list_of_configs = [config, config_3, config_4]
    #list_of_configs = [config, config_2, config_3,config_4,config_5, config_6,config_7,config_8,config_9,config_10,config_11]

    for i, config in enumerate(list_of_configs):
        print("Run number of config:", i)
        config.print_detailed_config_used_for_training()

        dataset = FullDataset(config.training_data_folder,
                              config,
                              training=True,
                              model_selection=True)
        dataset.load()
        dataset = Representation.convert_dataset_to_baseline_representation(
            config, dataset)

        checker = ConfigChecker(config, dataset, 'snn', training=True)
        checker.pre_init_checks()

        snn = initialise_snn(config, dataset, True)
        snn.print_detailed_model_info()

        if config.print_model:
            tf.keras.utils.plot_model(snn.encoder.model,
                                      to_file='model.png',
                                      show_shapes=True,
                                      expand_nested=True)

        checker.post_init_checks(snn)

        start_time_string = datetime.now().strftime("%m-%d_%H-%M-%S")

        print('---------------------------------------------')
        print('Training:')
        print('---------------------------------------------')
        print()
        optimizer = SNNOptimizer(snn, dataset, config)
        optimizer.optimize()

        print()
        print('---------------------------------------------')
        print('Evaluation of the current config:')
        print('---------------------------------------------')
        print()
        num_of_selection_tests = config.number_of_selection_tests
        score_valid_to_model_loss = {}
        for i in range(num_of_selection_tests):
            loss_of_selected_model = change_model(config,
                                                  start_time_string,
                                                  num_of_selction_iteration=i)

            if config.case_base_for_inference:
                dataset: FullDataset = FullDataset(config.case_base_folder,
                                                   config,
                                                   training=False,
                                                   model_selection=True)
            else:
                dataset: FullDataset = FullDataset(config.training_data_folder,
                                                   config,
                                                   training=False,
                                                   model_selection=True)
            dataset.load()
            dataset = Representation.convert_dataset_to_baseline_representation(
                config, dataset)

            snn = initialise_snn(config, dataset, False)

            inference = Inference(config, snn, dataset)
            curr_model_score = inference.infer_test_dataset()

            score_valid_to_model_loss[
                curr_model_score] = loss_of_selected_model

        # loop to sum all values to compute the mean:
        res = 0
        for val in score_valid_to_model_loss.values():
            res += val
        loss_mean = res / len(score_valid_to_model_loss)

        for val in score_valid_to_model_loss.keys():
            res += val
        mean_score = res / len(score_valid_to_model_loss)

        # printing result
        print("Run: ", i, " loss mean:" + str(loss_mean),
              " score mean: " + str(mean_score))
        print("Run: ", i, " score_valid_to_model_loss:",
              score_valid_to_model_loss)
        '''
예제 #13
0
def main():
    config = Configuration()  # Get config for data directory

    checker = ConfigChecker(config, None, 'preprocessing', training=None)
    checker.pre_init_checks()

    config.import_timestamps()
    number_data_sets = len(config.datasets)

    # list of all examples
    examples: [np.ndarray] = []
    labels_of_examples: [str] = []
    failure_times_of_examples: [str] = []
    window_times_of_examples: [str] = []

    attributes = None

    for i in range(number_data_sets):
        print('\n\nImporting dataframe ' + str(i) + '/' +
              str(number_data_sets - 1) + ' from file')

        # read the imported dataframe from the saved file
        path_to_file = config.datasets[i][0] + config.filename_pkl_cleaned

        with open(path_to_file, 'rb') as f:
            df: pd.DataFrame = pickle.load(f)

        # cleaning moved to separate script because of computational demands
        # df = clean_up_dataframe(df, config)

        # split the dataframe into the configured cases
        cases_df, labels_df, failures_df = split_by_cases(df, i, config)
        print("cases_df: ", len(cases_df))
        print("labels_df: ", len(labels_df))
        print("failures_df: ", len(failures_df), ": ", failures_df)

        if i == 0:
            attributes = np.stack(df.columns, axis=0)

        del df
        gc.collect()

        # split the case into examples, which are added to the list of of all examples
        number_cases = len(cases_df)
        for y in range(number_cases):
            df = cases_df[y]

            if len(df) <= 0:
                print(i, y, 'empty')
                print(
                    "df: ",
                    df,
                )
                continue

            start = df.index[0]
            end = df.index[-1]
            secs = (end - start).total_seconds()
            print('\nSplitting case', y, '/', number_cases - 1,
                  'into examples. Length:', secs, " start: ", start, " end: ",
                  end)
            split_into_examples(df, labels_df[y], examples, labels_of_examples,
                                config.time_series_length,
                                config.interval_in_seconds, config,
                                failure_times_of_examples, failures_df[y],
                                window_times_of_examples, y, i)
        del cases_df, labels_df, failures_df
        gc.collect()

    # convert lists of arrays to numpy array
    examples_array = np.stack(examples, axis=0)
    labels_array = np.stack(labels_of_examples, axis=0)
    failure_times_array = np.stack(failure_times_of_examples, axis=0)
    window_times_array = np.stack(window_times_of_examples, axis=0)

    del examples, labels_of_examples, failure_times_of_examples, window_times_of_examples
    gc.collect()

    # print("config.use_over_lapping_windows: ", config.use_over_lapping_windows)
    if config.use_over_lapping_windows:
        print('\nExecute train/test split with failure case consideration')
        # define groups for GroupShuffleSplit
        enc = OrdinalEncoder()
        enc.fit(failure_times_array.reshape(-1, 1))
        failure_times_array_groups = enc.transform(
            failure_times_array.reshape(-1, 1))
        # print("groups: ",failure_times_array_groups)
        # group_kfold = GroupKFold(n_splits=2)

        gss = GroupShuffleSplit(n_splits=1,
                                test_size=config.test_split_size,
                                random_state=config.random_seed)

        for train_idx, test_idx in gss.split(examples_array, labels_array,
                                             failure_times_array_groups):
            print("TRAIN:", train_idx, "TEST:", test_idx)
        # split_idx in gss.split(examples_array, labels_array, failure_times_array_groups)
        # train_idx = split_idx[0]
        # test_idx = split_idx[1]
        # print("train_idx:",train_idx)

        x_train, x_test = examples_array[train_idx], examples_array[test_idx]
        y_train, y_test = labels_array[train_idx], labels_array[test_idx]
        failure_times_train, failure_times_test = failure_times_array[
            train_idx], failure_times_array[test_idx]
        window_times_train, window_times_test = window_times_array[
            train_idx], window_times_array[test_idx]

        print("X_train: ", x_train.shape, " X_test: ", x_test.shape)
        print("Y_train: ", y_train.shape, " Y_train: ", y_test.shape)
        print("Failure_times_train: ", failure_times_train.shape,
              " Failure_times_test: ", failure_times_test.shape)
        print("Window_times_train: ", window_times_train.shape,
              " Window_times_test: ", window_times_test.shape)
        print("Classes in the train set: ", np.unique(y_train))
        print("Classes in the test set: ", np.unique(y_test))
        # print("Classes in train and test set: ", np.unique(np.concatenate(y_train, y_test)))

    else:
        # split into train and test data set
        print('\nExecute train/test split')
        x_train, x_test, y_train, y_test = train_test_split(
            examples_array,
            labels_array,
            test_size=config.test_split_size,
            random_state=config.random_seed)

    # Sort both datasets by the cases for easier handling
    '''
    x_train = x_train[y_train.argsort()]
    y_train = np.sort(y_train)

    x_test = x_test[y_test.argsort()]
    y_test = np.sort(y_test)
    '''

    print('Training data set shape: ', x_train.shape)
    print('Training label set shape: ', y_train.shape)
    print('Test data set shape: ', x_test.shape)
    print('Test label set shape: ', y_test.shape, '\n')

    # normalize each sensor stream to contain values in [0,1]
    x_train, x_test = normalise(x_train, x_test, config)

    x_train, x_test, = x_train.astype('float32'), x_test.astype('float32')

    # save the np arrays
    print('\nSave to np arrays in ' + config.training_data_folder)

    print('Step 1/5')
    np.save(config.training_data_folder + 'train_features_4_.npy', x_train)
    print('Step 2/5')
    np.save(config.training_data_folder + 'test_features_4_.npy', x_test)
    print('Step 3/5')
    np.save(config.training_data_folder + 'train_labels_4_.npy', y_train)
    print('Step 4/5')
    np.save(config.training_data_folder + 'test_labels_4_.npy', y_test)
    print('Step 5/5')
    np.save(config.training_data_folder + 'feature_names_4_.npy', attributes)
    print()

    if config.use_over_lapping_windows:
        print('Saving additional data if overlapping windows are used')

        # Contains the associated time of a failure (if not no failure) for each example
        print('Step 1/4')
        np.save(config.training_data_folder + 'train_failure_times_4_.npy',
                failure_times_train)
        print('Step 2/4')
        np.save(config.training_data_folder + 'test_failure_times_4_.npy',
                failure_times_test)
        print('Step 3/4')
        # Contains the start and end time stamp for each training example
        np.save(config.training_data_folder + 'train_window_times_4_.npy',
                window_times_train)
        print('Step 4/4')
        np.save(config.training_data_folder + 'test_window_times_4_.npy',
                window_times_test)