def load_sdrs(file_path): traces = loadTraces(file_path) num_records = len(traces['sensorValue']) input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 1 start_idx = 1000 end_idx = -1 if start_idx < 0: start = num_records + start_idx else: start = start_idx if end_idx < 0: end = num_records + end_idx else: end = end_idx categories = traces['actualCategory'][start:end] active_cells = traces['tmActiveCells'][start:end] predicted_active_cells = traces['tmPredictedActiveCells'][start:end] # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_active_cells_sdrs = np.array( convert_to_sdrs(predicted_active_cells, input_width)) sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) + float(predicted_active_cells_weight) * predicted_active_cells_sdrs) return sdrs, categories
def load_sdrs(start_idx, end_idx, exp_name): # Params input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 1 network_config = 'sp=True_tm=True_tp=False_SDRClassifier' # load traces file_name = get_file_name(exp_name, network_config) traces = loadTraces(file_name) num_records = len(traces['sensorValue']) # start and end if start_idx < 0: start = num_records + start_idx else: start = start_idx if end_idx < 0: end = num_records + end_idx else: end = end_idx # input data sensor_values = traces['sensorValue'][start:end] categories = traces['actualCategory'][start:end] active_cells = traces['tmActiveCells'][start:end] predicted_active_cells = traces['tmPredictedActiveCells'][start:end] # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_active_cells_sdrs = np.array( convert_to_sdrs(predicted_active_cells, input_width)) sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) + float(predicted_active_cells_weight) * predicted_active_cells_sdrs) return sdrs, categories
def main(): distance_functions = [euclidian_distance] clustering_classes = [PerfectClustering, OnlineClusteringV2] # Exp params moving_average_window = 2 # for all moving averages of the experiment ClusteringClass = clustering_classes[1] distance_func = distance_functions[0] merge_threshold = 30 # Cutoff distance to merge clusters. 'None' to ignore. start_idx = 0 end_idx = -1 input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 10 max_num_clusters = 3 num_cluster_snapshots = 1 show_plots = True distance_matrix_ignore_noise = False # ignore label 0 if used to label noise. exp_name = 'body_acc_x_inertial_signals_train' # Clean an create output directory for the graphs plots_output_dir = 'plots/%s' % exp_name if os.path.exists(plots_output_dir): shutil.rmtree(plots_output_dir) os.makedirs(plots_output_dir) # load traces file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'htm', 'traces', 'trace_%s.csv' % exp_name) traces = loadTraces(file_path) num_records = len(traces['scalarValue']) # start and end for the x axis of the graphs if start_idx < 0: start = num_records + start_idx else: start = start_idx if end_idx < 0: end = num_records + end_idx else: end = end_idx xlim = [0, end - start] # input data sensor_values = traces['scalarValue'][start:end] categories = traces['label'][start:end] active_cells = traces['tmActiveCells'][start:end] predicted_active_cells = traces['tmPredictedActiveCells'][start:end] raw_anomaly_scores = traces['rawAnomalyScore'][start:end] anomaly_scores = [] anomaly_score_ma = 0.0 for raw_anomaly_score in raw_anomaly_scores: anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window) anomaly_scores.append(anomaly_score_ma) # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_active_cells_sdrs = np.array( convert_to_sdrs(predicted_active_cells, input_width)) sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) + float(predicted_active_cells_weight) * predicted_active_cells_sdrs) # list of timesteps specifying when a snapshot of the clusters will be taken step = (end - start) / num_cluster_snapshots - 1 cluster_snapshot_indices = range(step, end - start, step) # run clustering (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run(sdrs, categories, anomaly_scores, distance_func, moving_average_window, max_num_clusters, ClusteringClass, merge_threshold, cluster_snapshot_indices) # cluster_categories = [] # for c in closest_cluster_history: # if c is not None: # cluster_categories.append(c.label_distribution()[0]['label']) # plot cluster assignments over time for i in range(num_cluster_snapshots): clusters = cluster_snapshots[i] snapshot_index = cluster_snapshot_indices[i] plot_cluster_assignments(plots_output_dir, clusters, snapshot_index) # plot inter-cluster distance matrix # plot_id = 'inter-cluster_t=%s' % snapshot_index # plot_inter_sequence_distances(plots_output_dir, # plot_id, # distance_func, # sdrs[:snapshot_index], # cluster_categories[:snapshot_index], # distance_matrix_ignore_noise) # plot inter-category distance matrix plot_id = 'inter-category_t=%s ' % snapshot_index plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func, sdrs[:snapshot_index], categories[:snapshot_index], distance_matrix_ignore_noise) # plot clustering accuracy over time plot_id = 'file=%s | moving_average_window=%s' % (exp_name, moving_average_window) plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim) if show_plots: plt.show()
def main(): distance_functions = [euclidian_distance] clustering_classes = [PerfectClustering, OnlineClustering] network_config = 'sp=True_tm=True_tp=False_SDRClassifier' exp_names = [ 'binary_ampl=10.0_mean=0.0_noise=0.0', 'binary_ampl=10.0_mean=0.0_noise=1.0', 'sensortag_z' ] # Exp params moving_average_window = 1 # for all moving averages of the experiment ClusteringClass = clustering_classes[0] distance_func = distance_functions[0] exp_name = exp_names[0] start_idx = 0 end_idx = 100 input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 1 max_num_clusters = 3 num_cluster_snapshots = 2 show_plots = False distance_matrix_ignore_noise = True # whether to ignore label 0 (noise) # Clean an create output directory for the graphs plots_output_dir = 'plots/%s' % exp_name if os.path.exists(plots_output_dir): shutil.rmtree(plots_output_dir) os.makedirs(plots_output_dir) # load traces file_name = get_file_name(exp_name, network_config) traces = loadTraces(file_name) sensor_values = traces['sensorValue'][start_idx:end_idx] categories = traces['actualCategory'][start_idx:end_idx] raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx] anomaly_scores = [] anomaly_score_ma = 0.0 for raw_anomaly_score in raw_anomaly_scores: anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window) anomaly_scores.append(anomaly_score_ma) active_cells = traces['tmActiveCells'][start_idx:end_idx] predicted_active_cells = traces['tmPredictedActiveCells'][ start_idx:end_idx] # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_activeCells_sdrs = np.array( convert_to_sdrs(predicted_active_cells, input_width)) sdrs = (active_cells_weight * np.array(active_cells_sdrs) + predicted_active_cells_weight * predicted_activeCells_sdrs) # start and end for the x axis of the graphs start = start_idx if end_idx < 0: end = len(sdrs) - end_idx - 1 else: end = end_idx xlim = [start, end] # list of timesteps specifying when a snapshot of the clusters will be taken step = (end - start) / num_cluster_snapshots - 1 cluster_snapshot_indices = range(start + step, end, step) # run clustering (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run(sdrs, categories, distance_func, moving_average_window, max_num_clusters, ClusteringClass, cluster_snapshot_indices) # plot cluster assignments over time for i in range(num_cluster_snapshots): clusters = cluster_snapshots[i] plot_cluster_assignments(plots_output_dir, clusters, cluster_snapshot_indices[i]) # plot inter-cluster distance matrix cluster_ids = [c.id for c in closest_cluster_history if c is not None] plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i] plot_inter_sequence_distances( plots_output_dir, plot_id, distance_func, sdrs[:cluster_snapshot_indices[i]], cluster_ids[:cluster_snapshot_indices[i]], distance_matrix_ignore_noise) # plot inter-category distance matrix plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i] plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func, sdrs[:cluster_snapshot_indices[i]], categories[:cluster_snapshot_indices[i]], distance_matrix_ignore_noise) # plot clustering accuracy over time plot_id = 'file=%s | moving_average_window=%s' % (exp_name, moving_average_window) plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim) if show_plots: plt.show()
def main(): distance_functions = [euclidian_distance] clustering_classes = [PerfectClustering, OnlineClustering] network_config = 'sp=True_tm=True_tp=False_SDRClassifier' exp_names = ['binary_ampl=10.0_mean=0.0_noise=0.0', 'binary_ampl=10.0_mean=0.0_noise=1.0', 'sensortag_z'] # Exp params moving_average_window = 1 # for all moving averages of the experiment ClusteringClass = clustering_classes[0] distance_func = distance_functions[0] exp_name = exp_names[0] start_idx = 0 end_idx = 100 input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 1 max_num_clusters = 3 num_cluster_snapshots = 2 show_plots = False distance_matrix_ignore_noise = True # whether to ignore label 0 (noise) # Clean an create output directory for the graphs plots_output_dir = 'plots/%s' % exp_name if os.path.exists(plots_output_dir): shutil.rmtree(plots_output_dir) os.makedirs(plots_output_dir) # load traces file_name = get_file_name(exp_name, network_config) traces = loadTraces(file_name) sensor_values = traces['sensorValue'][start_idx:end_idx] categories = traces['actualCategory'][start_idx:end_idx] raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx] anomaly_scores = [] anomaly_score_ma = 0.0 for raw_anomaly_score in raw_anomaly_scores: anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window) anomaly_scores.append(anomaly_score_ma) active_cells = traces['tmActiveCells'][start_idx:end_idx] predicted_active_cells = traces['tmPredictedActiveCells'][start_idx:end_idx] # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_activeCells_sdrs = np.array(convert_to_sdrs(predicted_active_cells, input_width)) sdrs = (active_cells_weight * np.array(active_cells_sdrs) + predicted_active_cells_weight * predicted_activeCells_sdrs) # start and end for the x axis of the graphs start = start_idx if end_idx < 0: end = len(sdrs) - end_idx - 1 else: end = end_idx xlim = [start, end] # list of timesteps specifying when a snapshot of the clusters will be taken step = (end - start) / num_cluster_snapshots - 1 cluster_snapshot_indices = range(start + step, end, step) # run clustering (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run(sdrs, categories, distance_func, moving_average_window, max_num_clusters, ClusteringClass, cluster_snapshot_indices) # plot cluster assignments over time for i in range(num_cluster_snapshots): clusters = cluster_snapshots[i] plot_cluster_assignments(plots_output_dir, clusters, cluster_snapshot_indices[i]) # plot inter-cluster distance matrix cluster_ids = [c.id for c in closest_cluster_history if c is not None] plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i] plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func, sdrs[:cluster_snapshot_indices[i]], cluster_ids[:cluster_snapshot_indices[i]], distance_matrix_ignore_noise) # plot inter-category distance matrix plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i] plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func, sdrs[:cluster_snapshot_indices[i]], categories[:cluster_snapshot_indices[i]], distance_matrix_ignore_noise) # plot clustering accuracy over time plot_id = 'file=%s | moving_average_window=%s' % (exp_name, moving_average_window) plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim) if show_plots: plt.show()
def main(): distance_functions = [euclidian_distance] clustering_classes = [PerfectClustering, OnlineClusteringV2] network_config = "sp=True_tm=True_tp=False_SDRClassifier" exp_names = [ "body_acc_x", "binary_ampl=10.0_mean=0.0_noise=0.0", "binary_ampl=10.0_mean=0.0_noise=1.0", "sensortag_z", ] # Exp params moving_average_window = 2 # for all moving averages of the experiment ClusteringClass = clustering_classes[1] distance_func = distance_functions[0] exp_name = exp_names[0] start_idx = 1000 end_idx = 12000 input_width = 2048 * 32 active_cells_weight = 0 predicted_active_cells_weight = 10 max_num_clusters = 3 num_cluster_snapshots = 1 show_plots = True distance_matrix_ignore_noise = True # whether to ignore label 0 (noise) # Clean an create output directory for the graphs plots_output_dir = "plots/%s" % exp_name if os.path.exists(plots_output_dir): shutil.rmtree(plots_output_dir) os.makedirs(plots_output_dir) # load traces file_name = get_file_name(exp_name, network_config) traces = loadTraces(file_name) num_records = len(traces["sensorValue"]) # start and end for the x axis of the graphs if start_idx < 0: start = num_records + start_idx else: start = start_idx if end_idx < 0: end = num_records + end_idx else: end = end_idx xlim = [0, end - start] # input data sensor_values = traces["sensorValue"][start:end] categories = traces["actualCategory"][start:end] active_cells = traces["tmActiveCells"][start:end] predicted_active_cells = traces["tmPredictedActiveCells"][start:end] raw_anomaly_scores = traces["rawAnomalyScore"][start:end] anomaly_scores = [] anomaly_score_ma = 0.0 for raw_anomaly_score in raw_anomaly_scores: anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window) anomaly_scores.append(anomaly_score_ma) # generate sdrs to cluster active_cells_sdrs = convert_to_sdrs(active_cells, input_width) predicted_active_cells_sdrs = np.array(convert_to_sdrs(predicted_active_cells, input_width)) sdrs = ( float(active_cells_weight) * np.array(active_cells_sdrs) + float(predicted_active_cells_weight) * predicted_active_cells_sdrs ) # list of timesteps specifying when a snapshot of the clusters will be taken step = (end - start) / num_cluster_snapshots - 1 cluster_snapshot_indices = range(step, end - start, step) # run clustering (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run( sdrs, categories, anomaly_scores, distance_func, moving_average_window, max_num_clusters, ClusteringClass, cluster_snapshot_indices, ) # cluster_categories = [] # for c in closest_cluster_history: # if c is not None: # cluster_categories.append(c.label_distribution()[0]['label']) # plot cluster assignments over time for i in range(num_cluster_snapshots): clusters = cluster_snapshots[i] snapshot_index = cluster_snapshot_indices[i] plot_cluster_assignments(plots_output_dir, clusters, snapshot_index) # plot inter-cluster distance matrix # plot_id = 'inter-cluster_t=%s' % snapshot_index # plot_inter_sequence_distances(plots_output_dir, # plot_id, # distance_func, # sdrs[:snapshot_index], # cluster_categories[:snapshot_index], # distance_matrix_ignore_noise) # plot inter-category distance matrix plot_id = "inter-category_t=%s " % snapshot_index plot_inter_sequence_distances( plots_output_dir, plot_id, distance_func, sdrs[:snapshot_index], categories[:snapshot_index], distance_matrix_ignore_noise, ) # plot clustering accuracy over time plot_id = "file=%s | moving_average_window=%s" % (exp_name, moving_average_window) plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim) if show_plots: plt.show()