def main(): params = get_argument() all_state_folder = params['all_state_folder'][:] features_list = params['features_list'] model_type = params['model_type'] resample_rate = 6400 stride = 1 epochs = 300 save_result = True output_dir = './results' params_grid = { 'kernel': [40, 80, 120, 200, 240, 360], 'transform_type': ['minmax'], 'with_lazy': [0.00], # , 0.01, 0.015, 0.02], # 'loss': ['mae', 'mse'], # 'activation': [layers.LeakyReLU(alpha=0.3), 'relu', 'tanh'] } # if model_type == 'bilstm': # params_grid['activation'] = ['relu', 'tanh'] for model_type in [ 'pca', 'svm', 'cluster', 'cnn', 'deep', 'lstm', 'bilstm' ]: skip_list = [0] train_list = [1] combs = [] states = list(range(len(all_state_folder))) for i in range(len(all_state_folder) - 1): r = i + 1 l = list(itertools.combinations(states, r=r)) l = [list(x) for x in l] combs += l for selected_states in combs: ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Evaluation state: {}'.format(selected_states)) for state_id, folder in enumerate(all_state_folder): print('Read state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') for i, filename in enumerate(files): if i in skip_list: print('Skip: ', filename) continue ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in train_list and state_id not in selected_states: ds_train_list.append(ds) print('Train state {} file: {}'.format( state_id, filename)) y_train_list.append(state_id) else: ds_test_list.append(ds) print('Test state {} file: {}'.format( state_id, filename)) y_test_list.append(state_id) ds_res = [] for grid in ParameterGrid(params_grid): print('\n Params:') print(grid) kernel = grid['kernel'] transform_type = grid['transform_type'] model_params = dict(grid) model_params.pop('kernel', None) model_params.pop('transform_type', None) if 'skernel' in model_params: model_params['kernel'] = model_params['skernel'] model_params.pop('skernel', None) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data( ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] # Model initialization print("Model initialization: {}".format(model_type)) model = get_deep_model(model_type, model_params=model_params) # Training print("Training...") model.fit(x=x_new, epochs=epochs, batch_size=64, verbose=2) if model_type in ['cnn', 'deep', 'lstm', 'bilstm']: original_threshold = model.threshold print('Anomaly threshold: ', original_threshold) thresholds = [ 0.00, 0.01, 0.015, 0.02, 0.03, 0.05, 0.07, 0.1, 0.125, 0.15 ] for th in thresholds: model.threshold = original_threshold + th print('\n Lazy ', model.threshold) print("Anomaly accuracy") y_pred = model.predict(x_test, classifier=False) y_true = np.zeros(len(y_test)) for selected_state_id in selected_states: y_true[y_test == selected_state_id] = 1 print(classification_report(y_true, y_pred)) report_dict = classification_report(y_true, y_pred, output_dict=True) record = get_classification_report_record(report_dict) record.update(grid) record['with_lazy'] = th ds_res.append(record) else: print("Anomaly accuracy") y_pred = model.predict(x_test) y_true = np.zeros(len(y_test)) for selected_state_id in selected_states: y_true[y_test == selected_state_id] = 1 print(classification_report(y_true, y_pred)) report_dict = classification_report(y_true, y_pred, output_dict=True) record = get_classification_report_record(report_dict) record.update(grid) ds_res.append(record) ds_res = pd.DataFrame(ds_res) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) name = [str(x) for x in selected_states] name = '_'.join(name) filename = os.path.join( output_dir, 'results_grid_anomaly__{}__{}.csv'.format( name, model_type)) ds_res.to_csv(filename, index=True)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] resample_rate = 6400 stride = 1 epochs = 500 transform_type = 'minmax' save_result = True output_dir = './results' cluster_models = { 'agglomerative': AgglomerativeClustering, 'kmeans': KMeans, 'spectral': SpectralClustering } for train_id in [1, 2]: skip_list = [] train_list = [train_id] ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Read all datasets') for state_id, folder in enumerate(all_state_folder): print('\nRead state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') selected_train_id = [ x for x in range(len(files)) if x in train_list ] if not len(selected_train_id): selected_train_id = [1] for i, filename in enumerate(files): if i in skip_list: print('Skip: {}'.format(filename)) continue # ds = None ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in selected_train_id: print('Train state {} file: {}'.format(state_id, filename)) ds_train_list.append(ds) y_train_list.append(state_id) else: print('Test state {} file: {}'.format(state_id, filename)) ds_test_list.append(ds) y_test_list.append(state_id) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data(ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list for kernel in [40, 80, 120, 200, 240, 360]: # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] record = {} for cluster_name, cluster_model in cluster_models.items(): print('\n', cluster_name) # ToDo: remove n_clusters params cls = cluster_model(n_clusters=4) enc_pred = x_test.reshape(len(x_test), -1) print(enc_pred.shape) y_pred = cls.fit_predict(enc_pred) ami = adjusted_mutual_info_score(y_test, y_pred) r_score = adjusted_rand_score(y_test, y_pred) hom_score = homogeneity_score(y_test, y_pred) record[cluster_name] = { 'adjusted_mutual_info_score': ami, 'adjusted_rand_score': r_score, 'homogenity_score': hom_score } print(record[cluster_name]) ds_res = pd.DataFrame(record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_cluster_{}_{}.csv'.format( train_id, 'raw', kernel)) ds_res.to_csv(filename, index=True) for model_type in ['cnn', 'deep', 'lstm', 'bilstm']: # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type) # Training print("Training...") model.fit(x=x_new, epochs=epochs, verbose=2) enc_pred = model.encoder.predict(x_test) enc_pred = enc_pred.reshape((len(x_test), -1)) record = {} for cluster_name, cluster_model in cluster_models.items(): print('\n', cluster_name) print(enc_pred.shape) # ToDo: remove n_clusters params cls = cluster_model(n_clusters=4) y_pred = cls.fit_predict(enc_pred) ami = adjusted_mutual_info_score(y_test, y_pred) r_score = adjusted_rand_score(y_test, y_pred) hom_score = homogeneity_score(y_test, y_pred) record[cluster_name] = { 'adjusted_mutual_info_score': ami, 'adjusted_rand_score': r_score, 'homogenity_score': hom_score } print(record[cluster_name]) ds_res = pd.DataFrame(record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_cluster_{}_{}.csv'.format( train_id, model_type, kernel)) ds_res.to_csv(filename, index=True)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] kernel = params['kernel'] stride = params['stride'] model_type = params['model_type'] resample_rate = params.get('resample_rate', 6400) with_decision_score = params.get('with_decision_score', False) custom_resample = params.get('custom_resample', False) # resample_rate = 12800 # 12800 sample are 1 second # num_sample = 1000000 with_skip = False params_file = './params/params_{}.json'.format(model_type) save_result = True overwrite = True output_dir = './results' result_array = [] # Get list of list of files, where for each state we have a list of file curr_files = [] # Get list of test files test_files = [] for folder in all_state_folder: files = get_files(folder, ext='lvm') curr_files.append(files) test_files += files max_size = min([len(files) for files in curr_files[:3]]) # Get train files where each element is a list of files for a single train train_files = [] for i in range(max_size): train_pack = [files[i] for files in curr_files[:3]] for j in range(1, len(train_pack)): train_files.append(train_pack[:j + 1]) for train_pack in train_files: if len(train_pack) < 3: continue print('\n' + '\\\\//' * 20) selected_files = [] train_states = [] x_states = [] print('\n Train Pack') for train_file in train_pack: train_state = os.path.split(os.path.dirname(train_file))[-1] print("State: ", train_state) print("Read File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') continue # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor( ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Original File Length: ', train_len) print('New File Length {} {:.02f}'.format( len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Shape ', x_train.shape) selected_files.append(train_file) train_states.append(train_state) x_states.append(x_train) x_states = np.vstack(x_states) print('\n Train Size: ', x_states.shape) print('Train state: ', train_states) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_states) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if test_file in selected_files: continue # if test_state in train_states: # continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor( ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_states, 'TRAIN': [ os.path.basename(train_file) for train_file in selected_files ], 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': test_state not in train_states } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'results_multi_' + model_type + '.csv') result_ds = pd.DataFrame(result_array) if os.path.isfile(filename) and not overwrite: prev_result_ds = pd.read_csv(filename) result_ds = pd.concat([prev_result_ds, result_ds], axis=0, ignore_index=True) result_ds.to_csv(filename, index=False)
def main(): train_state = os.path.split(os.path.dirname(train_file))[-1] print("\n State Train: ", train_state) print("Read Train File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') return # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor(ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Train Original File Length: ', train_len) print('New File Length {} {:.02f}'.format(len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Train shape ', x_train.shape) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) test_state = os.path.split(os.path.dirname(test_file))[-1] print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') return # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor(ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format(len(ds_test), 100 * len(ds_test) / test_len)) # Testing # y_pred = predict_anomaly(ds_test, model, kernel, with_skip=with_skip) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} {:.05f} total {})".format(num_error, mean_error, mean_only_error, len(x_test))) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results results = pd.DataFrame(results) if results.empty: print("Results: NO Anomaly founded") else: # print(tabulate(results, headers='keys', tablefmt='psql')) test_stride = kernel if with_skip else 1 # Number of test samples of kernel length test_sample = int((len(ds_test) - kernel) / test_stride) + 1 # Number of single anomaly point tot = results['support'].sum() pct_tot = 100 * tot / (test_sample * test_stride) print("Results: {} (record {:.02f})".format(tot, pct_tot)) if with_skip: # Number of anomaly sample tot_sample = int(tot / test_stride) print("Anomaly Sample: {} (test sample {:.02f})".format( int(tot_sample), test_sample))
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] # model_type = params['model_type'] resample_rate = 6400 kernel = 120 # 40, 80, 120, 200 stride = 1 # model_type = 'cnn' # 'cnn', 'deep', 'lstm' transform_type = 'minmax' # 'std', 'minmax', None epochs = 200 save_result = True output_dir = './results' model_params = { 'with_lazy': 0.02, # 0.00, 0.01, 0.015, 0.02 # 'loss': 'mae' # 'mae', 'mse' } skip_list = [0] train_list = [1] for selected_state_id, selected_state in enumerate(all_state_folder): ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Evaluation state: {}'.format(selected_state_id)) for state_id, folder in enumerate(all_state_folder): print('Read state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') for i, filename in enumerate(files): if i in skip_list: continue ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in train_list and state_id != selected_state_id: ds_train_list.append(ds) print('Train state {} file: {}'.format(state_id, filename)) y_train_list.append(state_id) else: ds_test_list.append(ds) print('Test state {} file: {}'.format(state_id, filename)) y_test_list.append(state_id) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data(ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] for model_type in ['cnn', 'deep', 'lstm', 'bilstm']: # Model initialization print("Model initialization: {}".format(model_type)) model = get_deep_model(model_type, model_params=model_params) # Training print("Training...") model.fit(x=x_new, epochs=epochs, verbose=2) print("Anomaly accuracy") y_pred = model.predict(x_test, classifier=False) y_true = np.zeros(len(y_test)) y_true[y_test == selected_state_id] = 1 print(classification_report(y_true, y_pred)) ds_res = pd.DataFrame( classification_report(y_true, y_pred, output_dict=True)) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_anomaly_{}_{}_.csv'.format( selected_state_id, model_type)) ds_res.to_csv(filename, index=True) print("Locate Anomaly") x_selected = x_test[y_test == selected_state_id] y_selected = y_test[y_test == selected_state_id] x_reconstructed = model.model.predict(x_selected) ds_res = [] num_records = len(x_selected) for i in range(num_records): x_true = x_selected[i] x_pred = x_reconstructed[i] if transformer is not None: x_true = transformer.inverse_transform(x_true) x_pred = transformer.inverse_transform(x_pred) diff = np.mean(np.abs(x_true - x_pred), axis=0) res = {k: val for k, val in zip(features_list, diff)} res['threshold'] = model.threshold res['score'] = y_selected[i] ds_res.append(res) ds_res = pd.DataFrame(ds_res) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_locate__{}__{}__{}.csv'.format( kernel, selected_state_id, model_type)) ds_res.to_csv(filename, index=False)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] resample_rate = 6400 stride = 1 epochs = 300 transform_type = 'minmax' # 'minmax' save_result = True output_dir = './results' for train_id in [1, 2, 0]: skip_list = [] train_list = [train_id] ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Read all datasets') for state_id, folder in enumerate(all_state_folder): print('\nRead state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') selected_train_id = [ x for x in range(len(files)) if x in train_list ] if not len(selected_train_id): selected_train_id = [1] for i, filename in enumerate(files): if i in skip_list: print('Skip: {}'.format(filename)) continue # ds = None ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in selected_train_id: print('Train state {} file: {}'.format(state_id, filename)) ds_train_list.append(ds) y_train_list.append(state_id) else: print('Test state {} file: {}'.format(state_id, filename)) ds_test_list.append(ds) y_test_list.append(state_id) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data(ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list for kernel in [40, 80, 120, 200, 240, 360]: # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] for model_type in [ 'classifier', 'linear', 'cnn', 'deep', 'lstm', 'bilstm' ]: # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type) # Training print("Training...") model.fit(x=x_new, y=y_new, epochs=epochs, batch_size=32, verbose=2) y_pred = model.predict(x_test, classifier=True) print(classification_report(y_test, y_pred)) ds_res = pd.DataFrame( classification_report(y_test, y_pred, output_dict=True)) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_accuracy_{}_{}.csv'.format( train_id, model_type, kernel)) ds_res.to_csv(filename, index=True)
def main(): output_dir = './results' selected_files = [ "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 3/testaccelerometri_1.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 4/testaccelerometri.lvm", ] features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list] resample_rate = 6400 # Initialize result array to memorize performance result result_array = [] # Model cycle for model_type, kernel in zip(model_list, kernel_list): print('\n\n') print('\nModel: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) # Train cycle for i in range(len(selected_files)): x_train = [] # Get train for pos, train_file in enumerate(selected_files[:i + 1]): if i > 0 and pos == 0: continue ds_train = read_ds_lvm(train_file, get_header=False) if ds_train is None or ds_train.empty: raise ValueError('Impossible read train file') ds_train = ds_train[features_list] ds_train = resample(ds_train, resample_rate) x = get_sliding_window_matrix(ds_train.values, kernel, stride) if pos == 0: x = x[:len(x) // 2] x_train.append(x) # Train set x_train = np.vstack(x_train) print('\nTrain size: {}\n'.format(len(x_train))) # Model init model = get_model(model_type, params_file=params_file) # Model training train_start = datetime.now() model.fit(x_train) train_end = datetime.now() # Test cycle for j in range(len(selected_files)): x_test = [] # Get test for pos, test_file in enumerate(selected_files[:j + 1]): if j > 0 and pos == 0: continue ds_test = read_ds_lvm(test_file, get_header=False) if ds_test is None or ds_test.empty: raise ValueError('Impossible read test file') ds_test = ds_test[features_list] ds_test = resample(ds_test, resample_rate) x = get_sliding_window_matrix(ds_test.values, kernel, stride) if pos == 0: x = x[:1] x_test.append(x) # Test set x_test = np.vstack(x_test) print('\nTest size: {}\n'.format(len(x_test))) # Model predict test_start = datetime.now() model.predict(x_test) test_end = datetime.now() result_record = { 'model': model_type, 'train_size': len(x_train), 'train_time': train_end - train_start, 'test_size': len(x_test), 'test_time': test_end - test_start, } result_array.append(result_record) # Save results if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'performance.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] size = 3 features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [ 180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list ] resample_rate = 6400 save_result = True output_dir = './results' # Initialize result array to memorize result # for each train and test step result_array = [] # Get files from selected folder to use for training and testing curr_files = [] for folder in all_state_folder: curr_files += get_files(folder, ext='lvm')[:] test_files = curr_files for model_type, kernel in zip(model_list, kernel_list): print('\n' + '\\\\//' * 20) print('\n Model: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) for pos, train_file in enumerate(curr_files): skip_step = False train_state = os.path.split(os.path.dirname(train_file))[-1] x_train = [] print("\n State Train: ", train_state) for i in range(size): if pos + i >= len(curr_files): print('Not enough files') skip_step = True break tmp_file = curr_files[pos + i] tmp_state = os.path.split(os.path.dirname(tmp_file))[-1] if tmp_state != train_state: print('Different state and skip current train') skip_step = True break print("Read {} Train File: {}".format( i, os.path.basename(tmp_file))) ds_tmp = read_ds_lvm(tmp_file, get_header=False) # Check train if ds_tmp is None or ds_tmp.empty: print('Impossible read train file') skip_step = True break # Select features ds_tmp = ds_tmp[features_list] # Resample ds_tmp = resample(ds_tmp, resample_rate) # Create training set x_tmp = get_sliding_window_matrix(ds_tmp.values, kernel, stride) x_train.append(x_tmp) if skip_step: print('Skip current train') continue # Train set x_train = np.vstack(x_train) train_len = len(x_train) print('\nTrain size: {}\n'.format(x_train.shape)) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if train_state == test_state \ and test_file == train_file: continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_state, 'TRAIN': os.path.basename(train_file), 'TRAIN_SIZE': train_len, 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'TEST_LEN': test_len, 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': train_state != test_state } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_single_{}'.format(size) + model_type + '.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)