def predict_anomaly(ds, model, kernel, with_skip=True): if with_skip: stride = kernel else: stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds.values, kernel, stride) # Testing print('Testing...') y_pred = model.predict(x_test) # Expand results y_pred = [val for val in y_pred for _ in range(stride)] res = np.zeros((len(ds))) if with_skip: res[:len(y_pred)] = y_pred else: res[-len(y_pred):] = y_pred y_pred = pd.Series(res, index=ds.index, name='features') return y_pred
def prepare_data(ds_list, labels, kernel, stride=1): # Create slide window matrix for each train x_list = [ get_sliding_window_matrix(ds.values, kernel, stride) for ds in ds_list ] # Assign labels for each matrix values y = np.hstack([[i] * len(x) for i, x in zip(labels, x_list)]) # Concat each matrix x = np.vstack(x_list) return x, y
def main(): params = get_argument() # model input train_file = params['train'] test_file = params['test'] # feature params features_list = params['features_list'] kernel = params['kernel'] stride = params['stride'] # feature extraction resample_rate = params.get('resample_rate', 6400) custom_resample = params.get('custom_resample', False) # model params model_type = params['model_type'] params_file = params['model_params'] # Read train file print("Read Train File: ", os.path.basename(train_file)) ds_train = pd.read_csv(train_file) # Select features if features_list: ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor(ds_train, resample_rate) else: if resample_rate > 1: ds_train = resample(ds_train, resample_rate) print('Train Original File Length: ', train_len) print('New File Length {} {:.02f}'.format(len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Train shape ', x_train.shape) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) print("Read Test File: ", os.path.basename(test_file)) ds_test = pd.read_csv(test_file) # Select features if features_list: ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor(ds_test, resample_rate) else: if resample_rate > 1: ds_test = resample(ds_test, resample_rate) print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format(len(ds_test), 100 * len(ds_test) / test_len)) print('Testing...') y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results print("Results:") results = pd.DataFrame(results) print(tabulate(results, headers='keys', tablefmt='psql'))
def main(): # Get all .CSV files in global folder files = get_files(global_dir, ext='.CSV') print('Found {} files'.format(len(files))) # Get settings dataset, where each row represent a new setting entry ds_settings = get_time_series_dataset(settings_file, sep=';', col='DT') print('Found {} settings'.format(len(ds_settings))) # Identify settings label label_settings(ds_settings, ds_settings.columns[:13]) ds_settings.ltime = pd.to_datetime(ds_settings.ltime) ds_settings.rtime = pd.to_datetime(ds_settings.rtime) print('Found {} unique settings'.format(len(np.unique(ds_settings.label)))) settings_map = {} setup_files = [] # Create settings map that associates a setting to each file print('\nSettings File identification') for file in files: # Read dataset ds = get_time_series_dataset(file, sep=';', col='DT') # Get nearest left setting setting = get_settings(ds, ds_settings) # Update settings_map if str(setting.label) not in settings_map: settings_map[str(setting.label)] = [file] else: settings_map[str(setting.label)] += [file] # Check if the setting start overlap with file timely interval if check_setup(ds, setting): print('Found setup {}: {} - {} in ds {} - {}'.format( setting.label, setting.ltime, setting.rtime, ds.index.min(), ds.index.max())) setup_files += [file] elif lazy_check_setup(ds, setting): print('Found lazy setup {}: {} - {} in ds {} - {}'.format( setting.label, setting.ltime, setting.rtime, ds.index.min(), ds.index.max())) setup_files += [file] print('Number of timely series with setup: {}'.format(len(setup_files))) y_pred_single = {} y_true_single = {} normal_files = {} # Save settings_map and setup_files list # with open('../results/settings_map.json', 'w') as outfile: # json.dump(settings_map, outfile) # # with open('../results/setup_files.json', 'w') as outfile: # json.dump(setup_files, outfile) # For each state we train a models with a "normal" file and predict anomalies print('\nTraining and Testing - {}'.format(model_type)) for k, val in settings_map.items(): print('\nState {} has {} files'.format(k, len(val))) # Get normal file from constant_normal_files dictionary if k not in constant_normal_files: print('Skip, normal files founded') continue normal_file = constant_normal_files[k] normal_files[k] = normal_file if normal_file is None: print('Impossible get normal file') return # Training ds_train = get_time_series_dataset(filename=normal_file, sep=';', col='DT') # Check train if ds_train is None: print('Impossible read train file') return y_pred_single[k] = {} y_true_single[k] = {} for col in ds_train.columns: x_train = ds_train[[col]] x_train = get_sliding_window_matrix(x_train.values, kernel, stride) # Selected models if model_type == 'pca': model = PCA(n_components=0.95, threshold=100, c_alpha=3.2905) elif model_type == 'clustering': model = SetupClustering(distance="cosine", max_dist=0.001, anomaly_threshold=0.0001) elif model_type == 'svm': model = OneClassSVM(nu=0.001, tol=0.001, kernel="rbf", gamma="scale") elif model_type == 'lof': model = LOF(n_neighbors=50, algorithm='auto', metric='minkowski', contamination='auto') elif model_type == 'if': model = IsolationForest(n_estimators=200, max_samples=512, contamination=0.0003, max_features=0.8) else: print("Select the wrong models") return # Training print("Training... state {} col {}".format(k, col)) model.fit(x_train) y_pred_single[k][col] = [] y_true_single[k][col] = [] print("Testing...") for file in val: # y_true_single is useless # setup_files doesn't have value for label if file in setup_files: y_true_single[k][col].append(1) else: y_true_single[k][col].append(0) x_test = get_time_series_dataset(filename=file, sep=';', col='DT') # Check test if x_test is None: print('Impossible read test file') return # Create testing values x_test = x_test[[col]] x_test = get_sliding_window_matrix(x_test.values, kernel, kernel) # Testing y_pred = model.predict(x_test) # Save number of detected anomalies y_pred_single[k][col].append(len(y_pred[y_pred == 1])) # break print('\nSelected normal files:') for k, file in normal_files.items(): print("State {} -> {}".format(k, file)) # Create result dataset y_pred = [] y_true = [] cols = [] files = [] states = [] for k in y_pred_single.keys(): for col in y_pred_single[k].keys(): i = 0 for pred, true in zip(y_pred_single[k][col], y_true_single[k][col]): y_pred.append(pred) y_true.append(true) cols.append(col) files.append(settings_map[k][i]) states.append(k) i += 1 res_ds = pd.DataFrame({ 'file': files, 'cols': cols, 'states': states, 'y_pred': y_pred, 'y_true': y_true }) # Create real ground truth res_ds['file'] = res_ds['file'].apply(lambda x: x.split('\\')[-1]) normal_file_list = ["File ({}).CSV".format(x) for x in normal_file_id_list] res_ds['y_true'] = 1 res_ds.loc[res_ds['file'].isin(normal_file_list), 'y_true'] = 0 # Save results res_ds.to_csv('../results/{}_evaluation.CSV'.format(model_type), sep=';', index=False) # Evaluation print("\nEvaluation") true_positive = len(res_ds[(res_ds['y_pred'] > 0) & (res_ds['y_true'] > 0)]) false_positive = len(res_ds[(res_ds['y_pred'] > 0) & (res_ds['y_true'] == 0)]) true_negative = len(res_ds[(res_ds['y_pred'] <= 0) & (res_ds['y_true'] == 0)]) false_negative = len(res_ds[(res_ds['y_pred'] <= 0) & (res_ds['y_true'] > 0)]) acc = 100 * (true_positive + true_negative) / len(res_ds) print("Accuracy: {}".format(acc)) precision = 100 * true_positive / (true_positive + false_positive) print("Precision: {}".format(precision)) recall = 100 * true_positive / (true_positive + false_negative) print("Recall: {}".format(recall)) f_score = 2 * precision * recall / (precision + recall) print("F-score: {}".format(f_score))
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] kernel = params['kernel'] stride = params['stride'] model_type = params['model_type'] resample_rate = params.get('resample_rate', 6400) with_decision_score = params.get('with_decision_score', False) custom_resample = params.get('custom_resample', False) # resample_rate = 12800 # 12800 sample are 1 second # num_sample = 1000000 with_skip = False params_file = './params/params_{}.json'.format(model_type) save_result = True overwrite = True output_dir = './results' result_array = [] # Get list of list of files, where for each state we have a list of file curr_files = [] # Get list of test files test_files = [] for folder in all_state_folder: files = get_files(folder, ext='lvm') curr_files.append(files) test_files += files max_size = min([len(files) for files in curr_files[:3]]) # Get train files where each element is a list of files for a single train train_files = [] for i in range(max_size): train_pack = [files[i] for files in curr_files[:3]] for j in range(1, len(train_pack)): train_files.append(train_pack[:j + 1]) for train_pack in train_files: if len(train_pack) < 3: continue print('\n' + '\\\\//' * 20) selected_files = [] train_states = [] x_states = [] print('\n Train Pack') for train_file in train_pack: train_state = os.path.split(os.path.dirname(train_file))[-1] print("State: ", train_state) print("Read File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') continue # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor( ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Original File Length: ', train_len) print('New File Length {} {:.02f}'.format( len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Shape ', x_train.shape) selected_files.append(train_file) train_states.append(train_state) x_states.append(x_train) x_states = np.vstack(x_states) print('\n Train Size: ', x_states.shape) print('Train state: ', train_states) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_states) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if test_file in selected_files: continue # if test_state in train_states: # continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor( ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_states, 'TRAIN': [ os.path.basename(train_file) for train_file in selected_files ], 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': test_state not in train_states } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'results_multi_' + model_type + '.csv') result_ds = pd.DataFrame(result_array) if os.path.isfile(filename) and not overwrite: prev_result_ds = pd.read_csv(filename) result_ds = pd.concat([prev_result_ds, result_ds], axis=0, ignore_index=True) result_ds.to_csv(filename, index=False)
def main(): train_state = os.path.split(os.path.dirname(train_file))[-1] print("\n State Train: ", train_state) print("Read Train File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') return # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor(ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Train Original File Length: ', train_len) print('New File Length {} {:.02f}'.format(len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Train shape ', x_train.shape) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) test_state = os.path.split(os.path.dirname(test_file))[-1] print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') return # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor(ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format(len(ds_test), 100 * len(ds_test) / test_len)) # Testing # y_pred = predict_anomaly(ds_test, model, kernel, with_skip=with_skip) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} {:.05f} total {})".format(num_error, mean_error, mean_only_error, len(x_test))) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results results = pd.DataFrame(results) if results.empty: print("Results: NO Anomaly founded") else: # print(tabulate(results, headers='keys', tablefmt='psql')) test_stride = kernel if with_skip else 1 # Number of test samples of kernel length test_sample = int((len(ds_test) - kernel) / test_stride) + 1 # Number of single anomaly point tot = results['support'].sum() pct_tot = 100 * tot / (test_sample * test_stride) print("Results: {} (record {:.02f})".format(tot, pct_tot)) if with_skip: # Number of anomaly sample tot_sample = int(tot / test_stride) print("Anomaly Sample: {} (test sample {:.02f})".format( int(tot_sample), test_sample))
def main(): output_dir = './results' selected_files = [ "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 3/testaccelerometri_1.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 4/testaccelerometri.lvm", ] features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list] resample_rate = 6400 # Initialize result array to memorize performance result result_array = [] # Model cycle for model_type, kernel in zip(model_list, kernel_list): print('\n\n') print('\nModel: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) # Train cycle for i in range(len(selected_files)): x_train = [] # Get train for pos, train_file in enumerate(selected_files[:i + 1]): if i > 0 and pos == 0: continue ds_train = read_ds_lvm(train_file, get_header=False) if ds_train is None or ds_train.empty: raise ValueError('Impossible read train file') ds_train = ds_train[features_list] ds_train = resample(ds_train, resample_rate) x = get_sliding_window_matrix(ds_train.values, kernel, stride) if pos == 0: x = x[:len(x) // 2] x_train.append(x) # Train set x_train = np.vstack(x_train) print('\nTrain size: {}\n'.format(len(x_train))) # Model init model = get_model(model_type, params_file=params_file) # Model training train_start = datetime.now() model.fit(x_train) train_end = datetime.now() # Test cycle for j in range(len(selected_files)): x_test = [] # Get test for pos, test_file in enumerate(selected_files[:j + 1]): if j > 0 and pos == 0: continue ds_test = read_ds_lvm(test_file, get_header=False) if ds_test is None or ds_test.empty: raise ValueError('Impossible read test file') ds_test = ds_test[features_list] ds_test = resample(ds_test, resample_rate) x = get_sliding_window_matrix(ds_test.values, kernel, stride) if pos == 0: x = x[:1] x_test.append(x) # Test set x_test = np.vstack(x_test) print('\nTest size: {}\n'.format(len(x_test))) # Model predict test_start = datetime.now() model.predict(x_test) test_end = datetime.now() result_record = { 'model': model_type, 'train_size': len(x_train), 'train_time': train_end - train_start, 'test_size': len(x_test), 'test_time': test_end - test_start, } result_array.append(result_record) # Save results if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'performance.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)
def main(): print('Read input data') # Get train dataset print('train: {}'.format(train_file)) ds_train = get_time_series_dataset(filename=train_file, sep=sep, col=datetime_col) # Check train if ds_train is None: raise ValueError('Impossible read train file') # Get test dataset print('test: {}'.format(test_file)) ds_test = get_time_series_dataset(filename=test_file, sep=sep, col=datetime_col) # Check test if ds_test is None: raise ValueError('Impossible read test file') print('from {} to {}'.format(ds_test.index.min(), ds_test.index.max())) # Get features print('Select features') features = features_list if not features: features = ds_train.columns.to_list() elif set(features).difference(set(ds_train.columns)): raise ValueError('Select the wrong features') assert np.all(ds_train.columns == ds_test.columns), 'Train and Test file have different features' # Select features ds_train = ds_train[features] ds_test = ds_test[features] # if visualize: # fig, ax = plt.subplots(2, 1, figsize=(20, 10)) # ds_train.plot(ax=ax[0]) # ax[0].set_title('Training Data') # # ds_test.plot(ax=ax[1]) # ax[1].set_title('Test Data') # plt.show() # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) # Training print("Training...") model.fit(x_train) # Option 1: Save trained models if save_model: # Create output directory filename = os.path.join(output_dir, 'model_{}.pkl'.format(model_type)) if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) # Save trained models joblib.dump(model, filename) # Load trained models model = joblib.load(filename) # Testing print('Testing...') y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results print("Results:") results = pd.DataFrame(results) print(tabulate(results, headers='keys', tablefmt='psql')) # Save results if save: filename = os.path.basename(test_file) filename = os.path.join(output_dir, 'results_' + model_type + '_' + filename) if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) results.to_csv(filename, sep=sep, index=False)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] size = 3 features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [ 180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list ] resample_rate = 6400 save_result = True output_dir = './results' # Initialize result array to memorize result # for each train and test step result_array = [] # Get files from selected folder to use for training and testing curr_files = [] for folder in all_state_folder: curr_files += get_files(folder, ext='lvm')[:] test_files = curr_files for model_type, kernel in zip(model_list, kernel_list): print('\n' + '\\\\//' * 20) print('\n Model: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) for pos, train_file in enumerate(curr_files): skip_step = False train_state = os.path.split(os.path.dirname(train_file))[-1] x_train = [] print("\n State Train: ", train_state) for i in range(size): if pos + i >= len(curr_files): print('Not enough files') skip_step = True break tmp_file = curr_files[pos + i] tmp_state = os.path.split(os.path.dirname(tmp_file))[-1] if tmp_state != train_state: print('Different state and skip current train') skip_step = True break print("Read {} Train File: {}".format( i, os.path.basename(tmp_file))) ds_tmp = read_ds_lvm(tmp_file, get_header=False) # Check train if ds_tmp is None or ds_tmp.empty: print('Impossible read train file') skip_step = True break # Select features ds_tmp = ds_tmp[features_list] # Resample ds_tmp = resample(ds_tmp, resample_rate) # Create training set x_tmp = get_sliding_window_matrix(ds_tmp.values, kernel, stride) x_train.append(x_tmp) if skip_step: print('Skip current train') continue # Train set x_train = np.vstack(x_train) train_len = len(x_train) print('\nTrain size: {}\n'.format(x_train.shape)) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if train_state == test_state \ and test_file == train_file: continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_state, 'TRAIN': os.path.basename(train_file), 'TRAIN_SIZE': train_len, 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'TEST_LEN': test_len, 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': train_state != test_state } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_single_{}'.format(size) + model_type + '.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)