Python get_sliding_window_matrix Exemples, timely.utils.tools.get_sliding_window_matrix Python Exemples

Exemple #1

0

Afficher le fichier

def predict_anomaly(ds, model, kernel, with_skip=True):
    if with_skip:
        stride = kernel
    else:
        stride = 1

    # Create set
    print("Create testing set")
    x_test = get_sliding_window_matrix(ds.values, kernel, stride)

    # Testing
    print('Testing...')
    y_pred = model.predict(x_test)

    # Expand results
    y_pred = [val for val in y_pred for _ in range(stride)]
    res = np.zeros((len(ds)))

    if with_skip:
        res[:len(y_pred)] = y_pred
    else:
        res[-len(y_pred):] = y_pred

    y_pred = pd.Series(res, index=ds.index, name='features')

    return y_pred

Exemple #2

0

Afficher le fichier

def prepare_data(ds_list, labels, kernel, stride=1):
    # Create slide window matrix for each train
    x_list = [
        get_sliding_window_matrix(ds.values, kernel, stride) for ds in ds_list
    ]

    # Assign labels for each matrix values
    y = np.hstack([[i] * len(x) for i, x in zip(labels, x_list)])

    # Concat each matrix
    x = np.vstack(x_list)

    return x, y

Exemple #3

0

Afficher le fichier

Fichier : anomaly.py Projet : softlab-unimore/SBDIOI40

def main():
    params = get_argument()

    # model input
    train_file = params['train']
    test_file = params['test']

    # feature params
    features_list = params['features_list']
    kernel = params['kernel']
    stride = params['stride']

    # feature extraction
    resample_rate = params.get('resample_rate', 6400)
    custom_resample = params.get('custom_resample', False)

    # model params
    model_type = params['model_type']
    params_file = params['model_params']

    # Read train file
    print("Read Train File: ", os.path.basename(train_file))
    ds_train = pd.read_csv(train_file)

    # Select features
    if features_list:
        ds_train = ds_train[features_list]

    # Resample
    train_len = len(ds_train)
    if custom_resample:
        ds_train = resample_with_feature_extractor(ds_train, resample_rate)
    else:
        if resample_rate > 1:
            ds_train = resample(ds_train, resample_rate)

    print('Train Original File Length: ', train_len)
    print('New File Length {} {:.02f}'.format(len(ds_train),
                                              100 * len(ds_train) / train_len))

    # Create training set
    print("Create training set")
    x_train = get_sliding_window_matrix(ds_train.values, kernel, stride)
    print('Train shape ', x_train.shape)

    # Model initialization
    print("Model initialization: {}".format(model_type))
    model = get_model(model_type, params_file=params_file)

    # Training
    print("Training...")
    model.fit(x_train)

    print("Read Test File: ", os.path.basename(test_file))
    ds_test = pd.read_csv(test_file)

    # Select features
    if features_list:
        ds_test = ds_test[features_list]

    # Resample
    test_len = len(ds_test)
    if custom_resample:
        ds_test = resample_with_feature_extractor(ds_test, resample_rate)
    else:
        if resample_rate > 1:
            ds_test = resample(ds_test, resample_rate)

    print('Test Original File Length: ', test_len)
    print('New File Length {} {:.02f}'.format(len(ds_test),
                                              100 * len(ds_test) / test_len))

    print('Testing...')
    y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False)

    # Encoding results into triplet formats
    results = create_triplet_time_series(y_pred, with_support=True)

    # Show results
    print("Results:")
    results = pd.DataFrame(results)
    print(tabulate(results, headers='keys', tablefmt='psql'))

Exemple #4

0

Afficher le fichier

def main():
    # Get all .CSV files in global folder
    files = get_files(global_dir, ext='.CSV')
    print('Found {} files'.format(len(files)))

    # Get settings dataset, where each row represent a new setting entry
    ds_settings = get_time_series_dataset(settings_file, sep=';', col='DT')
    print('Found {} settings'.format(len(ds_settings)))

    # Identify settings label
    label_settings(ds_settings, ds_settings.columns[:13])
    ds_settings.ltime = pd.to_datetime(ds_settings.ltime)
    ds_settings.rtime = pd.to_datetime(ds_settings.rtime)
    print('Found {} unique settings'.format(len(np.unique(ds_settings.label))))

    settings_map = {}
    setup_files = []

    # Create settings map that associates a setting to each file
    print('\nSettings File identification')
    for file in files:
        # Read dataset
        ds = get_time_series_dataset(file, sep=';', col='DT')

        # Get nearest left setting
        setting = get_settings(ds, ds_settings)

        # Update settings_map
        if str(setting.label) not in settings_map:
            settings_map[str(setting.label)] = [file]
        else:
            settings_map[str(setting.label)] += [file]

        # Check if the setting start overlap with file timely interval
        if check_setup(ds, setting):
            print('Found setup {}: {} - {} in ds {} - {}'.format(
                setting.label, setting.ltime, setting.rtime, ds.index.min(),
                ds.index.max()))
            setup_files += [file]

        elif lazy_check_setup(ds, setting):
            print('Found lazy setup {}: {} - {} in ds {} - {}'.format(
                setting.label, setting.ltime, setting.rtime, ds.index.min(),
                ds.index.max()))
            setup_files += [file]

    print('Number of timely series with setup: {}'.format(len(setup_files)))

    y_pred_single = {}
    y_true_single = {}

    normal_files = {}

    # Save settings_map and setup_files list
    # with open('../results/settings_map.json', 'w') as outfile:
    #     json.dump(settings_map, outfile)
    #
    # with open('../results/setup_files.json', 'w') as outfile:
    #     json.dump(setup_files, outfile)

    # For each state we train a models with a "normal" file and predict anomalies
    print('\nTraining and Testing - {}'.format(model_type))
    for k, val in settings_map.items():
        print('\nState {} has {} files'.format(k, len(val)))

        # Get normal file from constant_normal_files dictionary
        if k not in constant_normal_files:
            print('Skip, normal files founded')
            continue

        normal_file = constant_normal_files[k]
        normal_files[k] = normal_file

        if normal_file is None:
            print('Impossible get normal file')
            return

        # Training
        ds_train = get_time_series_dataset(filename=normal_file,
                                           sep=';',
                                           col='DT')
        # Check train
        if ds_train is None:
            print('Impossible read train file')
            return

        y_pred_single[k] = {}
        y_true_single[k] = {}

        for col in ds_train.columns:
            x_train = ds_train[[col]]
            x_train = get_sliding_window_matrix(x_train.values, kernel, stride)

            # Selected models
            if model_type == 'pca':
                model = PCA(n_components=0.95, threshold=100, c_alpha=3.2905)
            elif model_type == 'clustering':
                model = SetupClustering(distance="cosine",
                                        max_dist=0.001,
                                        anomaly_threshold=0.0001)
            elif model_type == 'svm':
                model = OneClassSVM(nu=0.001,
                                    tol=0.001,
                                    kernel="rbf",
                                    gamma="scale")
            elif model_type == 'lof':
                model = LOF(n_neighbors=50,
                            algorithm='auto',
                            metric='minkowski',
                            contamination='auto')
            elif model_type == 'if':
                model = IsolationForest(n_estimators=200,
                                        max_samples=512,
                                        contamination=0.0003,
                                        max_features=0.8)
            else:
                print("Select the wrong models")
                return

            # Training
            print("Training... state {} col {}".format(k, col))
            model.fit(x_train)

            y_pred_single[k][col] = []
            y_true_single[k][col] = []

            print("Testing...")
            for file in val:
                # y_true_single is useless
                # setup_files doesn't have value for label
                if file in setup_files:
                    y_true_single[k][col].append(1)
                else:
                    y_true_single[k][col].append(0)

                x_test = get_time_series_dataset(filename=file,
                                                 sep=';',
                                                 col='DT')

                # Check test
                if x_test is None:
                    print('Impossible read test file')
                    return

                # Create testing values
                x_test = x_test[[col]]
                x_test = get_sliding_window_matrix(x_test.values, kernel,
                                                   kernel)

                # Testing
                y_pred = model.predict(x_test)

                # Save number of detected anomalies
                y_pred_single[k][col].append(len(y_pred[y_pred == 1]))

        # break

    print('\nSelected normal files:')
    for k, file in normal_files.items():
        print("State {} -> {}".format(k, file))

    # Create result dataset
    y_pred = []
    y_true = []
    cols = []
    files = []
    states = []

    for k in y_pred_single.keys():
        for col in y_pred_single[k].keys():
            i = 0
            for pred, true in zip(y_pred_single[k][col],
                                  y_true_single[k][col]):
                y_pred.append(pred)
                y_true.append(true)
                cols.append(col)
                files.append(settings_map[k][i])
                states.append(k)
                i += 1

    res_ds = pd.DataFrame({
        'file': files,
        'cols': cols,
        'states': states,
        'y_pred': y_pred,
        'y_true': y_true
    })

    # Create real ground truth
    res_ds['file'] = res_ds['file'].apply(lambda x: x.split('\\')[-1])
    normal_file_list = ["File ({}).CSV".format(x) for x in normal_file_id_list]
    res_ds['y_true'] = 1
    res_ds.loc[res_ds['file'].isin(normal_file_list), 'y_true'] = 0

    # Save results
    res_ds.to_csv('../results/{}_evaluation.CSV'.format(model_type),
                  sep=';',
                  index=False)

    # Evaluation
    print("\nEvaluation")
    true_positive = len(res_ds[(res_ds['y_pred'] > 0)
                               & (res_ds['y_true'] > 0)])
    false_positive = len(res_ds[(res_ds['y_pred'] > 0)
                                & (res_ds['y_true'] == 0)])
    true_negative = len(res_ds[(res_ds['y_pred'] <= 0)
                               & (res_ds['y_true'] == 0)])
    false_negative = len(res_ds[(res_ds['y_pred'] <= 0)
                                & (res_ds['y_true'] > 0)])

    acc = 100 * (true_positive + true_negative) / len(res_ds)
    print("Accuracy: {}".format(acc))

    precision = 100 * true_positive / (true_positive + false_positive)
    print("Precision: {}".format(precision))
    recall = 100 * true_positive / (true_positive + false_negative)
    print("Recall: {}".format(recall))
    f_score = 2 * precision * recall / (precision + recall)
    print("F-score: {}".format(f_score))

Exemple #5

0

Afficher le fichier

Fichier : evaluation_behaviour_multi.py Projet : softlab-unimore/SBDIOI40

def main():
    params = get_argument()
    all_state_folder = params['all_state_folder']
    features_list = params['features_list']
    kernel = params['kernel']
    stride = params['stride']
    model_type = params['model_type']
    resample_rate = params.get('resample_rate', 6400)
    with_decision_score = params.get('with_decision_score', False)
    custom_resample = params.get('custom_resample', False)

    # resample_rate = 12800  # 12800 sample are 1 second
    # num_sample = 1000000
    with_skip = False

    params_file = './params/params_{}.json'.format(model_type)
    save_result = True
    overwrite = True
    output_dir = './results'

    result_array = []

    # Get list of list of files, where for each state we have a list of file
    curr_files = []

    # Get list of test files
    test_files = []

    for folder in all_state_folder:
        files = get_files(folder, ext='lvm')
        curr_files.append(files)
        test_files += files

    max_size = min([len(files) for files in curr_files[:3]])

    # Get train files where each element is a list of files for a single train
    train_files = []
    for i in range(max_size):
        train_pack = [files[i] for files in curr_files[:3]]

        for j in range(1, len(train_pack)):
            train_files.append(train_pack[:j + 1])

    for train_pack in train_files:
        if len(train_pack) < 3:
            continue

        print('\n' + '\\\\//' * 20)

        selected_files = []
        train_states = []
        x_states = []

        print('\n Train Pack')
        for train_file in train_pack:
            train_state = os.path.split(os.path.dirname(train_file))[-1]
            print("State: ", train_state)
            print("Read File: ", os.path.basename(train_file))
            ds_train = read_ds_lvm(train_file, get_header=False)

            # Check train
            if ds_train is None or ds_train.empty:
                print('Impossible read train file')
                continue

            # Select features
            ds_train = ds_train[features_list]

            # Resample
            train_len = len(ds_train)
            if custom_resample:
                ds_train = resample_with_feature_extractor(
                    ds_train, resample_rate)
            else:
                ds_train = resample(ds_train, resample_rate)

            # ds_train = ds_train[:num_sample]
            print('Original File Length: ', train_len)
            print('New File Length {} {:.02f}'.format(
                len(ds_train), 100 * len(ds_train) / train_len))

            # Create training set
            print("Create set")
            x_train = get_sliding_window_matrix(ds_train.values, kernel,
                                                stride)
            print('Shape ', x_train.shape)

            selected_files.append(train_file)
            train_states.append(train_state)
            x_states.append(x_train)

        x_states = np.vstack(x_states)
        print('\n Train Size: ', x_states.shape)
        print('Train state: ', train_states)

        # Model initialization
        print("Model initialization: {}".format(model_type))
        model = get_model(model_type, params_file=params_file)

        # Training
        print("Training...")
        model.fit(x_states)

        for test_file in test_files:

            test_state = os.path.split(os.path.dirname(test_file))[-1]

            if test_file in selected_files:
                continue

            # if test_state in train_states:
            #     continue

            print("\n State Test: ", test_state)
            print("Read Test File: ", os.path.basename(test_file))
            ds_test = read_ds_lvm(test_file, get_header=False)

            # t1 = datetime.now()

            # Check test
            if ds_test is None or ds_test.empty:
                print('Impossible read test file')
                continue

            # Select features
            ds_test = ds_test[features_list]

            # Resample
            test_len = len(ds_test)
            if custom_resample:
                ds_test = resample_with_feature_extractor(
                    ds_test, resample_rate)
            else:
                ds_test = resample(ds_test, resample_rate)
            # ds_test = ds_test[:num_sample]
            print('Test Original File Length: ', test_len)
            print('New File Length {} {:.02f}'.format(
                len(ds_test), 100 * len(ds_test) / test_len))

            if with_skip:
                test_stride = kernel
            else:
                test_stride = 1

            # Create set
            print("Create testing set")
            x_test = get_sliding_window_matrix(ds_test.values, kernel,
                                               test_stride)
            print('Test shape ', x_test.shape)

            # Testing
            print('Testing...')
            if with_decision_score:
                y_pred = model.decision_score(x_test)
            else:
                y_pred = model.predict(x_test)

            num_error = np.sum(y_pred > 0)
            mean_error = np.mean(y_pred)
            if num_error > 0:
                mean_only_error = np.mean(y_pred[y_pred > 0])
            else:
                mean_only_error = 0

            if not np.sum(y_pred > 0):
                print("Results: NO Anomaly founded")
            else:
                print("Results: {} anomalies "
                      "({:.05f} total {})".format(num_error, mean_error,
                                                  len(x_test)))

            result_record = {
                'MODEL':
                model_type,
                'KERNEL':
                kernel,
                'STRIDE':
                stride,
                'TRAIN_STATE':
                train_states,
                'TRAIN': [
                    os.path.basename(train_file)
                    for train_file in selected_files
                ],
                'TEST_STATE':
                test_state,
                'TEST':
                os.path.basename(test_file),
                'NUM_SINGLE_ANOMALY':
                num_error,
                'PCT_ANOMALY':
                mean_error,
                'NUM_SAMPLE_ANOMALY':
                mean_only_error,
                'NUM_SAMPLE':
                len(x_test),
                'LABEL':
                test_state not in train_states
            }

            result_array.append(result_record)

    if save_result:
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        filename = os.path.join(output_dir,
                                'results_multi_' + model_type + '.csv')

        result_ds = pd.DataFrame(result_array)

        if os.path.isfile(filename) and not overwrite:
            prev_result_ds = pd.read_csv(filename)
            result_ds = pd.concat([prev_result_ds, result_ds],
                                  axis=0,
                                  ignore_index=True)

        result_ds.to_csv(filename, index=False)

Exemple #6

0

Afficher le fichier

Fichier : evaluation_behaviour.py Projet : softlab-unimore/SBDIOI40

def main():
    train_state = os.path.split(os.path.dirname(train_file))[-1]
    print("\n State Train: ", train_state)
    print("Read Train File: ", os.path.basename(train_file))
    ds_train = read_ds_lvm(train_file, get_header=False)

    # Check train
    if ds_train is None or ds_train.empty:
        print('Impossible read train file')
        return

    # Select features
    ds_train = ds_train[features_list]

    # Resample
    train_len = len(ds_train)
    if custom_resample:
        ds_train = resample_with_feature_extractor(ds_train, resample_rate)
    else:
        ds_train = resample(ds_train, resample_rate)
    # ds_train = ds_train[:num_sample]
    print('Train Original File Length: ', train_len)
    print('New File Length {} {:.02f}'.format(len(ds_train),
                                              100 * len(ds_train) / train_len))

    # Create training set
    print("Create training set")
    x_train = get_sliding_window_matrix(ds_train.values, kernel, stride)
    print('Train shape ', x_train.shape)

    # Model initialization
    print("Model initialization: {}".format(model_type))
    model = get_model(model_type, params_file=params_file)

    # Training
    print("Training...")
    model.fit(x_train)

    test_state = os.path.split(os.path.dirname(test_file))[-1]
    print("\n State Test: ", test_state)
    print("Read Test File: ", os.path.basename(test_file))
    ds_test = read_ds_lvm(test_file, get_header=False)

    # Check test
    if ds_test is None or ds_test.empty:
        print('Impossible read test file')
        return

    # Select features
    ds_test = ds_test[features_list]

    # Resample
    test_len = len(ds_test)
    if custom_resample:
        ds_test = resample_with_feature_extractor(ds_test, resample_rate)
    else:
        ds_test = resample(ds_test, resample_rate)
    # ds_test = ds_test[:num_sample]
    print('Test Original File Length: ', test_len)
    print('New File Length {} {:.02f}'.format(len(ds_test),
                                              100 * len(ds_test) / test_len))

    # Testing
    # y_pred = predict_anomaly(ds_test, model, kernel, with_skip=with_skip)

    if with_skip:
        test_stride = kernel
    else:
        test_stride = 1

    # Create set
    print("Create testing set")
    x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride)
    print('Test shape ', x_test.shape)

    # Testing
    print('Testing...')
    if with_decision_score:
        y_pred = model.decision_score(x_test)
    else:
        y_pred = model.predict(x_test)

    num_error = np.sum(y_pred > 0)
    mean_error = np.mean(y_pred)
    if num_error > 0:
        mean_only_error = np.mean(y_pred[y_pred > 0])
    else:
        mean_only_error = 0

    if not np.sum(y_pred > 0):
        print("Results: NO Anomaly founded")
    else:
        print("Results: {} anomalies "
              "({:.05f} {:.05f} total {})".format(num_error,
                                                  mean_error, mean_only_error,
                                                  len(x_test)))

    # Encoding results into triplet formats
    results = create_triplet_time_series(y_pred, with_support=True)

    # Show results
    results = pd.DataFrame(results)
    if results.empty:
        print("Results: NO Anomaly founded")
    else:
        # print(tabulate(results, headers='keys', tablefmt='psql'))

        test_stride = kernel if with_skip else 1

        # Number of test samples of kernel length
        test_sample = int((len(ds_test) - kernel) / test_stride) + 1

        # Number of single anomaly point
        tot = results['support'].sum()
        pct_tot = 100 * tot / (test_sample * test_stride)

        print("Results: {} (record {:.02f})".format(tot, pct_tot))

        if with_skip:
            # Number of anomaly sample
            tot_sample = int(tot / test_stride)
            print("Anomaly Sample: {} (test sample {:.02f})".format(
                int(tot_sample), test_sample))

Exemple #7

0

Afficher le fichier

Fichier : performance.py Projet : softlab-unimore/SBDIOI40

def main():
    output_dir = './results'

    selected_files = [
        "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm",
        "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm",
        "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 3/testaccelerometri_1.lvm",
        "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 4/testaccelerometri.lvm",
    ]

    features_list = [
        "Acceleration_X1",
        "Acceleration_Y1",
        "Acceleration_Z1",
        "Acceleration_X2",
        "Acceleration_Y2",
        "Acceleration_Z2",
        "Acceleration_X3",
        "Acceleration_Y3",
        "Acceleration_Z3"
    ]

    stride = 1

    model_list = [
        'cnn',
        'lstm',
        'deep',
        'isolation_forest',
        'setup_clustering',
        'pca',
        'lof',
        'svm',
    ]

    kernel_list = [180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list]

    resample_rate = 6400

    # Initialize result array to memorize performance result
    result_array = []

    # Model cycle
    for model_type, kernel in zip(model_list, kernel_list):

        print('\n\n')
        print('\nModel: {}\n'.format(model_type))

        params_file = './params/params_{}.json'.format(model_type)

        # Train cycle
        for i in range(len(selected_files)):
            x_train = []

            # Get train
            for pos, train_file in enumerate(selected_files[:i + 1]):
                if i > 0 and pos == 0:
                    continue

                ds_train = read_ds_lvm(train_file, get_header=False)

                if ds_train is None or ds_train.empty:
                    raise ValueError('Impossible read train file')

                ds_train = ds_train[features_list]
                ds_train = resample(ds_train, resample_rate)
                x = get_sliding_window_matrix(ds_train.values, kernel, stride)

                if pos == 0:
                    x = x[:len(x) // 2]

                x_train.append(x)

            # Train set
            x_train = np.vstack(x_train)

            print('\nTrain size: {}\n'.format(len(x_train)))

            # Model init
            model = get_model(model_type, params_file=params_file)

            # Model training
            train_start = datetime.now()
            model.fit(x_train)
            train_end = datetime.now()

            # Test cycle
            for j in range(len(selected_files)):

                x_test = []

                # Get test
                for pos, test_file in enumerate(selected_files[:j + 1]):
                    if j > 0 and pos == 0:
                        continue

                    ds_test = read_ds_lvm(test_file, get_header=False)

                    if ds_test is None or ds_test.empty:
                        raise ValueError('Impossible read test file')

                    ds_test = ds_test[features_list]
                    ds_test = resample(ds_test, resample_rate)
                    x = get_sliding_window_matrix(ds_test.values, kernel, stride)

                    if pos == 0:
                        x = x[:1]

                    x_test.append(x)

                # Test set
                x_test = np.vstack(x_test)

                print('\nTest size: {}\n'.format(len(x_test)))

                # Model predict
                test_start = datetime.now()
                model.predict(x_test)
                test_end = datetime.now()

                result_record = {
                    'model': model_type,
                    'train_size': len(x_train),
                    'train_time': train_end - train_start,
                    'test_size': len(x_test),
                    'test_time': test_end - test_start,
                }

                result_array.append(result_record)

    # Save results
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir, exist_ok=True)
    filename = os.path.join(output_dir, 'performance.csv')
    result_ds = pd.DataFrame(result_array)
    result_ds.to_csv(filename, index=False)

Exemple #8

0

Afficher le fichier

def main():
    print('Read input data')

    # Get train dataset
    print('train: {}'.format(train_file))
    ds_train = get_time_series_dataset(filename=train_file, sep=sep, col=datetime_col)
    # Check train
    if ds_train is None:
        raise ValueError('Impossible read train file')

    # Get test dataset
    print('test: {}'.format(test_file))
    ds_test = get_time_series_dataset(filename=test_file, sep=sep, col=datetime_col)
    # Check test
    if ds_test is None:
        raise ValueError('Impossible read test file')
    print('from {} to {}'.format(ds_test.index.min(), ds_test.index.max()))

    # Get features
    print('Select features')
    features = features_list
    if not features:
        features = ds_train.columns.to_list()
    elif set(features).difference(set(ds_train.columns)):
        raise ValueError('Select the wrong features')

    assert np.all(ds_train.columns == ds_test.columns), 'Train and Test file have different features'

    # Select features
    ds_train = ds_train[features]
    ds_test = ds_test[features]

    # if visualize:
    #     fig, ax = plt.subplots(2, 1, figsize=(20, 10))
    #     ds_train.plot(ax=ax[0])
    #     ax[0].set_title('Training Data')
    #
    #     ds_test.plot(ax=ax[1])
    #     ax[1].set_title('Test Data')
    #     plt.show()

    # Model initialization
    print("Model initialization: {}".format(model_type))
    model = get_model(model_type, params_file=params_file)

    # Create training set
    print("Create training set")
    x_train = get_sliding_window_matrix(ds_train.values, kernel, stride)

    # Training
    print("Training...")
    model.fit(x_train)

    # Option 1: Save trained models
    if save_model:
        # Create output directory
        filename = os.path.join(output_dir, 'model_{}.pkl'.format(model_type))
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        # Save trained models
        joblib.dump(model, filename)

        # Load trained models
        model = joblib.load(filename)

    # Testing
    print('Testing...')
    y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False)

    # Encoding results into triplet formats
    results = create_triplet_time_series(y_pred, with_support=True)

    # Show results
    print("Results:")
    results = pd.DataFrame(results)
    print(tabulate(results, headers='keys', tablefmt='psql'))

    # Save results
    if save:
        filename = os.path.basename(test_file)
        filename = os.path.join(output_dir, 'results_' + model_type + '_' + filename)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        results.to_csv(filename, sep=sep, index=False)

Exemple #9

0

Afficher le fichier

Fichier : full_evaluation_big.py Projet : softlab-unimore/SBDIOI40

def main():
    params = get_argument()
    all_state_folder = params['all_state_folder']

    size = 3

    features_list = [
        "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1",
        "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2",
        "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3"
    ]

    stride = 1

    model_list = [
        'cnn',
        'lstm',
        'deep',
        'isolation_forest',
        'setup_clustering',
        'pca',
        'lof',
        'svm',
    ]

    kernel_list = [
        180 if model_type in ['cnn', 'lstm', 'deep'] else 10
        for model_type in model_list
    ]

    resample_rate = 6400

    save_result = True
    output_dir = './results'

    # Initialize result array to memorize result
    # for each train and test step
    result_array = []

    # Get files from selected folder to use for training and testing
    curr_files = []
    for folder in all_state_folder:
        curr_files += get_files(folder, ext='lvm')[:]

    test_files = curr_files

    for model_type, kernel in zip(model_list, kernel_list):
        print('\n' + '\\\\//' * 20)
        print('\n Model: {}\n'.format(model_type))

        params_file = './params/params_{}.json'.format(model_type)

        for pos, train_file in enumerate(curr_files):
            skip_step = False
            train_state = os.path.split(os.path.dirname(train_file))[-1]

            x_train = []

            print("\n State Train: ", train_state)

            for i in range(size):
                if pos + i >= len(curr_files):
                    print('Not enough files')
                    skip_step = True
                    break

                tmp_file = curr_files[pos + i]
                tmp_state = os.path.split(os.path.dirname(tmp_file))[-1]

                if tmp_state != train_state:
                    print('Different state and skip current train')
                    skip_step = True
                    break

                print("Read {} Train File: {}".format(
                    i, os.path.basename(tmp_file)))

                ds_tmp = read_ds_lvm(tmp_file, get_header=False)

                # Check train
                if ds_tmp is None or ds_tmp.empty:
                    print('Impossible read train file')
                    skip_step = True
                    break

                # Select features
                ds_tmp = ds_tmp[features_list]
                # Resample
                ds_tmp = resample(ds_tmp, resample_rate)
                # Create training set
                x_tmp = get_sliding_window_matrix(ds_tmp.values, kernel,
                                                  stride)

                x_train.append(x_tmp)

            if skip_step:
                print('Skip current train')
                continue

            # Train set
            x_train = np.vstack(x_train)
            train_len = len(x_train)
            print('\nTrain size: {}\n'.format(x_train.shape))

            # Model initialization
            print("Model initialization: {}".format(model_type))
            model = get_model(model_type, params_file=params_file)

            # Training
            print("Training...")
            model.fit(x_train)

            for test_file in test_files:

                test_state = os.path.split(os.path.dirname(test_file))[-1]

                if train_state == test_state \
                        and test_file == train_file:
                    continue

                print("\n State Test: ", test_state)
                print("Read Test File: ", os.path.basename(test_file))
                ds_test = read_ds_lvm(test_file, get_header=False)

                # t1 = datetime.now()

                # Check test
                if ds_test is None or ds_test.empty:
                    print('Impossible read test file')
                    continue

                # Select features
                ds_test = ds_test[features_list]

                # Resample
                test_len = len(ds_test)
                ds_test = resample(ds_test, resample_rate)
                # ds_test = ds_test[:num_sample]
                print('Test Original File Length: ', test_len)
                print('New File Length {} {:.02f}'.format(
                    len(ds_test), 100 * len(ds_test) / test_len))

                test_stride = 1

                # Create set
                print("Create testing set")
                x_test = get_sliding_window_matrix(ds_test.values, kernel,
                                                   test_stride)
                print('Test shape ', x_test.shape)

                # Testing
                print('Testing...')
                y_pred = model.predict(x_test)

                num_error = np.sum(y_pred > 0)
                mean_error = np.mean(y_pred)
                if num_error > 0:
                    mean_only_error = np.mean(y_pred[y_pred > 0])
                else:
                    mean_only_error = 0

                if not np.sum(y_pred > 0):
                    print("Results: NO Anomaly founded")
                else:
                    print("Results: {} anomalies "
                          "({:.05f} total {})".format(num_error, mean_error,
                                                      len(x_test)))

                result_record = {
                    'MODEL': model_type,
                    'KERNEL': kernel,
                    'STRIDE': stride,
                    'TRAIN_STATE': train_state,
                    'TRAIN': os.path.basename(train_file),
                    'TRAIN_SIZE': train_len,
                    'TEST_STATE': test_state,
                    'TEST': os.path.basename(test_file),
                    'TEST_LEN': test_len,
                    'NUM_SINGLE_ANOMALY': num_error,
                    'PCT_ANOMALY': mean_error,
                    'NUM_SAMPLE_ANOMALY': mean_only_error,
                    'NUM_SAMPLE': len(x_test),
                    'LABEL': train_state != test_state
                }

                result_array.append(result_record)

        if save_result:
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir, exist_ok=True)

            filename = os.path.join(
                output_dir,
                'results_single_{}'.format(size) + model_type + '.csv')

            result_ds = pd.DataFrame(result_array)

            result_ds.to_csv(filename, index=False)