def get_runtime(result_path): def median(runtimes): return {key: numpy.median(values) for key, values in runtimes.items()} def sort(runtimes): return sorted(runtimes.items(), key=lambda kv: kv[1]) runtimes = DillSerializer(result_path).deserialize() runtime_equalize_methods = defaultdict(list) runtime_similarity_method = defaultdict(list) runtime_equalize_similarity_methods = defaultdict(list) len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(runtimes) for len_light_pattern1 in len_light_patterns1: for len_light_pattern2 in len_light_patterns2: for equalize_method in equalize_methods: for similarity_method in similarity_methods: runtime = runtimes[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method] if len(runtime) > 0: median_runtime = numpy.median(runtime) runtime_equalize_methods[equalize_method].append(median_runtime) runtime_similarity_method[similarity_method].append(median_runtime) key = equalize_method + ":" + similarity_method runtime_equalize_similarity_methods[key].append(median_runtime) runtime_equalize_methods = sort(median(runtime_equalize_methods)) runtime_similarity_method = sort(median(runtime_similarity_method)) runtime_equalize_similarity_methods = sort(median(runtime_equalize_similarity_methods)) return runtime_equalize_methods, runtime_similarity_method, runtime_equalize_similarity_methods
def runtime_total_per_operation_over_all_testbeds(testbed_data): print( "# total mean runtime per operation per he library over all testbeds") operations, testbeds, he_libraries = misc.get_all_keys(testbed_data) for he_library in he_libraries: for operation in operations: min_feature_lengths = list() max_feature_lengths = list() min_runtime = list() max_runtime = list() # average over min and max values per testbed for testbed in testbeds: testbed_feature_lengths, _, _, testbed_median = testbed_data[ operation][testbed][he_library] min_feature_lengths.append(testbed_feature_lengths[0]) max_feature_lengths.append(testbed_feature_lengths[-1]) min_runtime.append(testbed_median.iloc[0]) max_runtime.append(testbed_median.iloc[-1]) print(he_library) print(operation) print("mean min - feature length: ", round(numpy.mean(min_feature_lengths), 2)) print("mean max - feature length: ", round(numpy.mean(max_feature_lengths), 2)) print("mean min - runtime: ", round(numpy.mean(min_runtime), 2)) print("mean max - runtime: ", round(numpy.mean(max_runtime), 2)) print("---")
def runtime_analysis_tvgl_tsfresh_all(): def scalability_runtime(results): runtimes_tvgl = dict() runtimes_ml_tsfresh_all = dict() for num_clients in sorted(results.keys()): runtime_coupling_tvgl = [result.coupling_tvgl.runtime for result in results[num_clients]] runtime_ml_tsfresh_all = [result.coupling_machine_learning_tsfresh_all.runtime for result in results[num_clients]] runtimes_tvgl[num_clients] = numpy.median(runtime_coupling_tvgl) runtimes_ml_tsfresh_all[num_clients] = numpy.median(runtime_ml_tsfresh_all) return {"tvgl": runtimes_tvgl, "tsfresh all": runtimes_ml_tsfresh_all} print("Runtime analysis of TVGL and tsfresh all features") for path_evaluation_data in glob.glob(os.path.join(__location__, "raw-results", "*-coupling-simulation-tvgl")): scalability_runtimes = None evaluation_data = DillSerializer(path_evaluation_data).deserialize() num_clients, num_reject_clients, len_light_patterns, \ sampling_period_couplings, coupling_compare_methods, \ coupling_similarity_thresholds, equalize_methods, \ sampling_period_localizations, sampling_period_ml_trains, \ coupling_ml_classifiers = misc.get_all_keys(evaluation_data) all_results = list() structured_results = defaultdict(list) for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \ coupling_compare_method, coupling_similarity_threshold, equalize_method, \ sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product( num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds, equalize_methods, sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers): results = evaluation_data[num_client][num_reject_client][len_light_pattern] \ [sampling_period_coupling][coupling_compare_method] \ [coupling_similarity_threshold][equalize_method] \ [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier] if len(results) > 0: all_results.extend(results) structured_results[num_client].extend(results) scalability_runtimes = scalability_runtime(structured_results) runtime_coupling = [result.runtime_coupling for result in all_results] for identifier, runtimes in scalability_runtimes.items(): print(identifier) abs_decrease = numpy.median(abs(numpy.diff(runtimes.values()))) ratio = (abs_decrease / numpy.median(runtimes.values())) * 100 print("Scalability over num clients {0} s ({1} %)".format(round(abs_decrease,2), round(ratio,2))) ratio_runtime = [runtime / numpy.mean(runtime_coupling) for runtime in runtimes.values()] ratio_runtime = [entry for entry in ratio_runtime if entry < 1] print("Ratio to entire coupling runtime: {0:.2f} %".format(numpy.mean(ratio_runtime)*100))
def find_best_per_params(metric_results): best_params = list() features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users): result = metric_results[feature][coupling_method][len_light_pattern][num_user] if len(result) > 0: key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user) per_feature_results[key] = numpy.mean(result) per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) best_param = per_feature_selection[0][0].split("-") coupling_method = best_param[0] len_light_pattern = int(best_param[1]) num_user = int(best_param[2]) best_params.append((feature, coupling_method, len_light_pattern, num_user)) return best_params
def comparison_baseline_time_series(baseline_data, testbed_data): print("# comparison baseline vs. time-series") testbed_comparison_idx = 1 operations, testbeds, he_libraries = misc.get_all_keys(testbed_data) for he_library in he_libraries: baseline_total_values = list() testbed_total_values = list() relative_baseline_runtime = list() relative_testbed_runtime = list() testbed_total_lengths = list() baseline_total_lengths = list() for operation in operations: for testbed in testbeds: baseline_feature_length, _, _, baseline_median = baseline_data[ operation][testbed][he_library] testbed_feature_lengths, _, _, testbed_median = testbed_data[ operation][testbed][he_library] assert type(baseline_median) != list assert len(testbed_feature_lengths) == len(testbed_median) baseline_total_values.append(baseline_median) baseline_total_lengths.append(baseline_feature_length) testbed_total_values.append( testbed_median.iloc[testbed_comparison_idx]) testbed_total_lengths.append( testbed_feature_lengths[testbed_comparison_idx]) # relative runtime with a feature length of one > performance efficiency relative_baseline_runtime.append(baseline_median / baseline_feature_length) relative_testbed_runtime.append( numpy.mean(testbed_median / testbed_feature_lengths)) relative_testbed_runtime = numpy.median(relative_testbed_runtime) relative_baseline_runtime = numpy.median(relative_baseline_runtime) print(he_library) print("baseline: ", round(numpy.median(baseline_total_values), 2)) print("testbed: ", round(numpy.median(testbed_total_values), 2)) print("Mean testbed length: ", round(numpy.mean(testbed_total_lengths))) print("Mean baseline length: ", numpy.mean(baseline_total_lengths)) print( "normalized testbed / baseline performance delta (%): ", round( 100 * (1 - (relative_testbed_runtime / relative_baseline_runtime)), 2))
def runtime_relative_per_operation(testbed_data): print("# relative performance he library per operation") operations, testbeds, he_libraries = misc.get_all_keys(testbed_data) for operation in operations: runtime = defaultdict(list) for testbed in testbeds: for he_library in he_libraries: feature_length, _, _, time_median = testbed_data[operation][ testbed][he_library] temp = time_median / feature_length runtime[he_library].append((numpy.mean(temp), numpy.std(temp))) # calculate mean over all test platforms runtime_summary = list() for library, values in runtime.items(): mean_mean = numpy.mean([mean for mean, _ in values]) mean_std = numpy.mean([std for _, std in values]) runtime_summary.append((library, mean_mean, mean_std)) means = [mean for _, mean, _ in runtime_summary] max_idx = numpy.argmax(means) min_idx = numpy.argmin(means) he_library = [lib for lib, _, _, in runtime_summary] runtime_mean = [round(mean, 3) for _, mean, _, in runtime_summary] runtime_std = [round(std, 3) for _, _, std in runtime_summary] print(operation) print(list(zip(he_library, runtime_mean, runtime_std))) print("faster: ", runtime_summary[min_idx][0]) print("slower: ", runtime_summary[max_idx][0]) print( "percentage ratio (% faster): ", round( 100 * runtime_summary[min_idx][1] / runtime_summary[max_idx][1], 2)) print( "percentage ratio (% saves): ", round( 100 * (1 - runtime_summary[min_idx][1] / runtime_summary[max_idx][1]), 2)) print( "multiple ratio (x faster): ", round(runtime_summary[max_idx][1] / runtime_summary[min_idx][1], 2))
def distortion_similarity_analysis(path_distortion_similarity, result_path, plot_format): results = DillSerializer(path_distortion_similarity).deserialize() len_light_patterns, distortion_rates, similarity_methods = misc.get_all_keys(results) print("Similarity threshold by signal distortion") distortion_rate = 0.5 for similarity_method in ["spearman", "pearson", "distance_correlation"]: similarity = list() for len_light_pattern in len_light_patterns: result = results[len_light_pattern][distortion_rate][similarity_method] similarity.extend(result) print(similarity_method, round(numpy.median(similarity), 2)) fig, ax = plt.subplots() markers = itertools.cycle(misc.markers) colors = [plt.cm.tab20(i) for i in numpy.linspace(0, 1, len(vector_similarity.similarity_methods))] for i, similarity_method in enumerate(similarity_methods): distortion = list() similarity_mean = list() for distortion_rate in distortion_rates: mean = list() for len_light_pattern in len_light_patterns: result = results[len_light_pattern][distortion_rate][similarity_method] mean.append(numpy.mean(result)) distortion.append(distortion_rate) similarity_mean.append(numpy.median(mean)) label = similarity_method.replace("_", " ").capitalize().replace("Dtw", "DTW") ax.plot(distortion, similarity_mean, label=label, marker=next(markers), color=colors[i]) ax.plot([0, 1], [1, 0], color="black", linestyle="--") ax.axvline(0.4, color="red", linestyle="--") ax.grid() ax.set_xticks(distortion_rates) ax.set_ylabel("Signal similarity") ax.set_xlabel("Distortion rate") ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=4, mode="expand", borderaxespad=0.) fig.set_figwidth(fig.get_figwidth()*2.5) filename = "distortion-signal-similarity." + plot_format fig.savefig(os.path.join(result_path, filename), plot_format=plot_format, bbox_inches="tight") #plt.show() plt.close(fig)
def find_best_per_params(metric_results): best_params = list() features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, num_room, num_user, coupling_frequency in itertools.product( coupling_methods, num_rooms, num_users, coupling_frequencies): result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room] if len(result) > 0: result = misc.flatten_list(misc.flatten_list(result)) key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency) per_feature_results[key] = numpy.mean(result) per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0] metric_result = per_feature_results[idx][1] best_param = per_feature_results[idx][0].split("-") coupling_method = best_param[0] num_room = int(best_param[1]) num_user = int(best_param[2]) coupling_frequency = int(best_param[3]) best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result)) return best_params
def he_library_performance_per_platform(testbed_data): print("# HE library performance per platform") operations, testbeds, he_libraries = misc.get_all_keys(testbed_data) runtime = defaultdict(dict) for he_library in he_libraries: for testbed in testbeds: runtime_per_operation = list() for operation in operations: testbed_feature_lengths, _, _, testbed_median = testbed_data[ operation][testbed][he_library] runtime_per_operation.append( numpy.mean(testbed_median / testbed_feature_lengths)) runtime[he_library][testbed] = numpy.mean(runtime_per_operation) for he_library in runtime.keys(): sorted_testbeds_runtime = sorted(runtime[he_library].items(), key=lambda x: x[1]) # use relative runtime to identify order and difference between platforms, don't use runtime values print(he_library) print("result: ", [platform for platform, _ in sorted_testbeds_runtime]) for (testbed1, runtime1), (testbed2, runtime2) in itertools.combinations( sorted_testbeds_runtime, 2): temp_testbeds = [testbed1, testbed2] temp_runtimes = [runtime1, runtime2] min_runtime = numpy.argmin(temp_runtimes) max_runtime = numpy.argmax(temp_runtimes) print(temp_testbeds[min_runtime] + " vs. " + temp_testbeds[max_runtime]) #print("min: ", temp_runtimes[min_runtime], " max: ", temp_runtimes[max_runtime]) print( "slower ratio: ", round(temp_runtimes[max_runtime] / temp_runtimes[min_runtime], 2)) print( "faster (%): ", round( 100 * (1 - (temp_runtimes[min_runtime] / temp_runtimes[max_runtime])), 2))
def plot_distorted_light_signals( distortion_rates, path_light_signals, conversion_us_to_ms, result_path, plot_format, plot_round=0): print("plot distorted light signals") light_signals = DillSerializer(path_light_signals).deserialize() len_light_patterns = misc.get_all_keys(light_signals)[0] for len_light_pattern in len_light_patterns: print("len light pattern:", len_light_pattern) fig, axarr = plt.subplots(len(distortion_rates)) for i, distortion_rate in enumerate(distortion_rates): client = light_signals[len_light_pattern][plot_round] light_signal = client.get_distorted_light_signal(distortion_rate) light_signal_time = client.signal_time relative_time_ms = (light_signal_time-light_signal_time[0]) / conversion_us_to_ms axarr[i].plot(relative_time_ms, light_signal) xticks = [] if i+1 < len(distortion_rates) else numpy.arange(relative_time_ms[-1], step=10) axarr[i].set_xticks(xticks) #axarr[i].set_yticks([round(numpy.mean(light_signal))]) axarr[i].yaxis.tick_right() axis = "both" if i+1 < len(distortion_rates) else "y" axarr[i].tick_params(axis=axis, which='both', length=0) axarr[i].set_yticks([numpy.mean(light_signal)]) axarr[i].set_yticklabels([distortion_rate]) axarr[-1].set_xlabel("Signal time (ms)") ax = fig.add_subplot(111, frameon=False) ax.set_yticks([]) ax.set_xticks([]) ax.set_ylabel("Voltage signal (mV)") ax2 = ax.twinx() ax2.set_yticks([]) ax2.set_xticks([]) ax2.set_ylabel("Distortion rate", labelpad=50) filename = "distortion-rate-signal-len-" + str(len_light_pattern) + "." + plot_format filepath = os.path.join(result_path, filename) fig.savefig(filepath, format=plot_format, bbox_inches="tight") #plt.show() plt.close(fig)
def process_data(evaluation_data): def find_best_per_params(metric_results): best_params = list() features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users): result = metric_results[feature][coupling_method][len_light_pattern][num_user] if len(result) > 0: key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user) per_feature_results[key] = numpy.mean(result) per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) best_param = per_feature_selection[0][0].split("-") coupling_method = best_param[0] len_light_pattern = int(best_param[1]) num_user = int(best_param[2]) best_params.append((feature, coupling_method, len_light_pattern, num_user)) return best_params def get_metrics(result): accuracy = [result.accuracy_accept, result.accuracy_reject] precision = [result.precision_accept, result.precision_reject] recall = [result.recall_accept, result.recall_reject] f1 = [result.f1_accept, result.f1_reject] return (accuracy, precision, recall, f1), result.runtime def save_result(results, runtime_query_data, metric_results, runtime_results, feature, coupling_method, len_light_pattern, num_client): metrics, runtime_coupling = get_metrics(results) metric_results[feature][coupling_method][len_light_pattern][num_client].append(metrics) runtime_results[feature][coupling_method][len_light_pattern][num_client].append((runtime_query_data, runtime_coupling)) num_clients, num_reject_clients, len_light_patterns, \ sampling_period_couplings, coupling_compare_methods, \ coupling_similarity_thresholds, equalize_methods, \ sampling_period_localizations, sampling_period_ml_trains, \ coupling_ml_classifiers = misc.get_all_keys(evaluation_data) print("############### Static simulation ###############") print("Num clients: ", num_clients) print("Num reject clients: ", num_reject_clients) print("Len light patterns: ", len_light_patterns) print("Sampling period couplings: ", sampling_period_couplings) print("Coupling compare methods: ", coupling_compare_methods) print("Coupling similarity thresholds: ", coupling_similarity_thresholds) print("Equalize methods: ", equalize_methods) print("Sampling period localizations: ", sampling_period_localizations) print("Sampling period ML trains: ", sampling_period_ml_trains) print("Coupling ML classifiers: ", coupling_ml_classifiers) similarity_metrics = nested_dict(4, list) machine_learning_metrics = nested_dict(4, list) localization_metrics = nested_dict(4, list) similarity_runtime = nested_dict(4, list) localization_runtime = nested_dict(4, list) machine_learning_runtime = nested_dict(4, list) for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \ coupling_compare_method, coupling_similarity_threshold, equalize_method, \ sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product( num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds, equalize_methods, sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers): results = evaluation_data[num_client][num_reject_client][len_light_pattern] \ [sampling_period_coupling][coupling_compare_method] \ [coupling_similarity_threshold][equalize_method] \ [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier] if len(results) > 0: for result in results: #result.runtime_coupling #result.runtime_query_data # localization feature = "ble" save_result(result.localization_random_forest_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client) save_result(result.localization_filtering_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client) save_result(result.localization_svm_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client) feature = "wifi" save_result(result.localization_random_forest_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client) save_result(result.localization_filtering_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client) save_result(result.localization_svm_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client) # similarity metrics save_result(result.coupling_signal_pattern, result.runtime_query_pattern_light, similarity_metrics, similarity_runtime, "signal pattern", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_signal_pattern_duration, result.runtime_query_pattern_light, similarity_metrics, similarity_runtime, "signal pattern duration", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_signal_similarity, result.runtime_query_raw_light, similarity_metrics, similarity_runtime, "signal similarity", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_machine_learning_basic_all, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "basic all", coupling_ml_classifier, len_light_pattern, num_client) save_result(result.coupling_machine_learning_basic_selected, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "basic selected", coupling_ml_classifier, len_light_pattern, num_client) save_result(result.coupling_machine_learning_tsfresh_selected, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "tsfresh selected", coupling_ml_classifier, len_light_pattern, num_client) best_ml = [(feature, coupling, len_light_pattern, num_user, machine_learning_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(machine_learning_metrics)] best_similarity = [(feature, coupling, len_light_pattern, num_user, similarity_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(similarity_metrics)] best_localization = [(feature, coupling, len_light_pattern, num_user, localization_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(localization_metrics)] return best_similarity, similarity_runtime, best_ml, machine_learning_runtime, best_localization, localization_runtime, len_light_patterns, num_clients
def process_data(evaluation_data): def get_results(results): accuracy = [result.accuracy for result in results if result.accuracy >= 0] precision = [result.precision for result in results if result.precision >= 0] recall = [result.recall for result in results if result.recall >= 0] f1 = [result.f1 for result in results if result.f1 >= 0] runtime = [result.runtime for result in results if result.runtime > 0] return (accuracy, precision, recall, f1), misc.flatten_list(runtime) def save_result(result, metric_results, runtime_results, coupling_ident, runtime_ident, feature, coupling_method, num_user, coupling_frequency, num_room): metrics, runtime = get_results(result.coupling[coupling_ident]) missing_metric = 0 in [len(metric) for metric in metrics] if not missing_metric: # remove empty result metric_results[feature][coupling_method][num_user][coupling_frequency][num_room].append(metrics) runtime_results[feature][coupling_method][num_user][coupling_frequency][num_room].append((result.runtime[runtime_ident], runtime)) def find_best_per_params(metric_results): best_params = list() features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, num_room, num_user, coupling_frequency in itertools.product( coupling_methods, num_rooms, num_users, coupling_frequencies): result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room] if len(result) > 0: result = misc.flatten_list(misc.flatten_list(result)) key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency) per_feature_results[key] = numpy.mean(result) per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0] metric_result = per_feature_results[idx][1] best_param = per_feature_results[idx][0].split("-") coupling_method = best_param[0] num_room = int(best_param[1]) num_user = int(best_param[2]) coupling_frequency = int(best_param[3]) best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result)) return best_params sampling_period_couplings, coupling_compare_methods, \ coupling_similarity_thresholds, equalize_methods, \ sampling_period_localizations, sampling_period_ml_trains, \ coupling_ml_classifiers, num_users, num_rooms, \ simulation_durations, coupling_frequencies = misc.get_all_keys(evaluation_data) print("############### Dynamic simulation ###############") print("Num users: ", num_users) print("Num rooms: ", num_rooms) print("Simulation duration: ", simulation_durations) print("Coupling frequency: ", coupling_frequencies) print("Sampling period couplings: ", sampling_period_couplings) print("Coupling compare methods: ", coupling_compare_methods) print("Coupling similarity thresholds: ", coupling_similarity_thresholds) print("Equalize methods: ", equalize_methods) print("Sampling period localizations: ", sampling_period_localizations) print("Sampling period ML trains: ", sampling_period_ml_trains) print("Coupling ML classifiers: ", coupling_ml_classifiers) similarity_metrics = nested_dict(5, list) machine_learning_metrics = nested_dict(5, list) localization_metrics = nested_dict(5, list) similarity_runtime = nested_dict(5, list) machine_learning_runtime = nested_dict(5, list) localization_runtime = nested_dict(5, list) for sampling_period_coupling, coupling_compare_method, \ coupling_similarity_threshold, equalize_method, \ sampling_period_localization, sampling_period_ml_train, \ coupling_ml_classifier, num_user, num_room, \ simulation_duration, coupling_frequency in itertools.product( sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds, equalize_methods, sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers, num_users, num_rooms, simulation_durations, coupling_frequencies): results = evaluation_data[sampling_period_coupling][coupling_compare_method] \ [coupling_similarity_threshold][equalize_method] \ [sampling_period_localization][sampling_period_ml_train] \ [coupling_ml_classifier][num_user][num_room] \ [simulation_duration][coupling_frequency] if len(results) > 0: for result in results: # localization feature = "ble" save_result(result, localization_metrics, localization_runtime, "loc Random Forest BLE", "time query raw ble", feature, "random forest", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc filtering BLE", "time query raw ble", feature, "filtering", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc SVM BLE", "time query raw ble", feature, "svm", num_user, coupling_frequency, num_room) feature = "wifi" save_result(result, localization_metrics, localization_runtime, "loc Random Forest WiFi", "time query raw wifi", feature, "random forest", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc filtering WiFi", "time query raw wifi", feature, "filtering", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc SVM WiFi", "time query raw wifi", feature, "svm", num_user, coupling_frequency, num_room) # similarity metrics feature = "signal pattern" save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) feature = "signal pattern duration" save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) feature = "signal similarity" save_result(result, similarity_metrics, similarity_runtime, feature, "time query raw light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) # machine learning save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic all features", "time query raw light", "basic all", coupling_ml_classifier, num_user, coupling_frequency, num_room) save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic selected features", "time query raw light", "basic selected", coupling_ml_classifier, num_user, coupling_frequency, num_room) save_result(result, machine_learning_metrics, machine_learning_runtime, "ml tsfresh selected features", "time query raw light", "tsfresh selected", coupling_ml_classifier, num_user, coupling_frequency, num_room) machine_learning_params = find_best_per_params(machine_learning_metrics) similarity_params = find_best_per_params(similarity_metrics) localization_params = find_best_per_params(localization_metrics) best_machine_learning = [(feature, coupling_method, num_room, num_user, coupling_frequency, machine_learning_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in machine_learning_params] best_similarity = [(feature, coupling_method, num_room, num_user, coupling_frequency, similarity_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in similarity_params] best_localization = [(feature, coupling_method, num_room, num_user, coupling_frequency, localization_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in localization_params] return best_similarity, similarity_runtime, similarity_params, \ best_machine_learning, machine_learning_runtime, machine_learning_params, \ best_localization, localization_runtime, num_users, localization_params, \ coupling_frequencies, num_rooms
def plot_runtime_per_operation(baseline_data, testbed_data, scaling, total_feature_length=21000, min_feature_length=200): print("# plot runtime per operation") plot_format = "pdf" colors = {"helib": "blue", "seal": "green"} markers = {"iot": "o", "server": "X", "nuc": "v"} markevery = {"iot": 5, "server": 5, "nuc": 5} translate = { "iot": "IoT", "nuc": "NUC", "server": "Server", "helib": "HElib", "seal": "SEAL" } result_path = os.path.join(__location__, "results") operations, testbeds, he_libraries = misc.get_all_keys(testbed_data) all_feature_lengths = list() for operation in operations: fig, ax = plt.subplots() max_feature_lengths = list() for testbed in testbeds: for he_library in he_libraries: _, _, _, he_library_baseline_median = baseline_data[operation][ testbed][he_library] feature_lengths, _, _, he_library_median = testbed_data[ operation][testbed][he_library] all_feature_lengths.extend(feature_lengths) if "helib" in he_libraries: translate["nuc"] = "Server" translate["server"] = "NUC" if feature_lengths[0] > min_feature_length: fill_feature_lengths = range(min_feature_length, feature_lengths[0], 50) fill_runtimes = list() for fill in fill_feature_lengths: rand = random.randint(0, len(he_library_median) - 1) runtime = ( he_library_median[rand] / feature_lengths[rand] ) * fill if "initialisation" not in operation else he_library_median[ rand] fill_runtimes.append(runtime) he_library_median = fill_runtimes + he_library_median.values.tolist( ) feature_lengths = fill_feature_lengths + feature_lengths ax.plot(feature_lengths, len(feature_lengths) * [he_library_baseline_median], label=translate[testbed] + "-" + translate[he_library] + " - Baseline", marker=markers[testbed], markevery=markevery[testbed], linestyle="--", color=colors[he_library]) ax.plot(feature_lengths, he_library_median, label=translate[testbed] + "-" + translate[he_library] + " - Time-series", marker=markers[testbed], markevery=markevery[testbed], color=colors[he_library]) max_feature_length = feature_lengths[-1] if max_feature_length not in max_feature_lengths and max_feature_length < total_feature_length: value = human_format(max_feature_length) xscaling = 0.5 if value == "20K" else 1.02 ax.text(max_feature_length * xscaling, 4.5, value) ax.axvline(max_feature_length, color="black", linestyle=":", label="Length limit of time-series") max_feature_lengths.append(max_feature_length) print("time-series limit: ", max_feature_length, "environment: ", operation, testbed, he_library) #print(operation) #print(testbed) #print(he_library) #print(he_library_median.values) #print(he_library_baseline_median) #print("---") print("min feature length: ", min(all_feature_lengths)) print("max feature length: ", max(all_feature_lengths)) ax.set_xscale("log") ax.set_yscale("log") ax.set_ylabel("Duration (" + scaling["time unit"] + ")") #ax.set_xlabel("Time-series length") ax.set_xlabel("# Time-series values") ax.grid() #plt.show() filepath = os.path.join(result_path, operation + "." + plot_format) fig.savefig(filepath, bbox_inches="tight", format=plot_format) fig_legend = plt.figure() handles, labels = ax.get_legend_handles_labels() unique_labels = list(set(labels)) unique_handles = list() labels = numpy.array(labels) for ul in unique_labels: label_pos = numpy.where(labels == ul)[0][0] unique_handles.append(handles[label_pos]) unique_labels, unique_handles = zip( *sorted(zip(unique_labels, unique_handles))) plt.figlegend(unique_handles, unique_labels, loc="center", ncol=3) fig_legend.savefig(os.path.join(result_path, operation + "-legend.pdf"), format=plot_format, bbox_inches="tight") plt.close(fig) plt.close(fig_legend)
def offline_analysis_ml_model(path_ml_offline_evaluation): evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize() num_clients, num_reject_clients, len_light_patterns, \ classifiers, sampling_periods = misc.get_all_keys(evaluation_data) analysis_result = nested_dict(2, list) for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product( num_clients, num_reject_clients, len_light_patterns, classifiers, sampling_periods): results = evaluation_data[num_client][num_reject_client][ len_light_pattern][classifier][sampling_period] if len(results) > 0: analysis_result[classifier][sampling_period].extend(results) print("Num clients: ", num_clients) print("Num reject clients: ", num_reject_clients) print("Len light patterns: ", len_light_patterns) print("Classifiers: ", classifiers) print("Sampling periods: ", sampling_periods) for classifier in classifiers: results = analysis_result[classifier] sub_results = list() for sampling_period in sampling_periods: accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \ [entry.accuracy_reject for entry in results[sampling_period]] precision = [entry.precision_accept for entry in results[sampling_period]] + \ [entry.precision_reject for entry in results[sampling_period]] recall = [entry.recall_accept for entry in results[sampling_period]] + \ [entry.recall_reject for entry in results[sampling_period]] f1 = [entry.f1_accept for entry in results[sampling_period]] + \ [entry.f1_reject for entry in results[sampling_period]] entry = [ numpy.mean(accuracy), numpy.mean(precision), numpy.mean(recall), numpy.mean(f1) ] entry = [round(value, 2) for value in entry] sub_results.append(entry) fig, ax = plt.subplots() ax.imshow(sub_results, cmap="Greens", aspect="auto", interpolation="nearest", vmin=0, vmax=1.4) ax.set_ylabel("Sampling period (ms)") ytickpos = numpy.arange(len(sampling_periods)) ax.set_yticks(ytickpos) ax.set_yticklabels([ int(sampling_period * 1e3) for sampling_period in sampling_periods ]) xticks = ["Accuracy", "Precision", "Recall", "F1-score"] xtickpos = range(len(xticks)) ax.set_xticks(xtickpos) ax.set_xticklabels(xticks, rotation=20, ha="right") for i in range(len(sub_results)): for j in range(len(sub_results[0])): ax.text(j, i, sub_results[i][j], ha="center", va="center") ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(xtickpos) ] ax.set_xticks(ticks, minor=True) ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(ytickpos) ] ax.set_yticks(ticks, minor=True) ax.grid(which='minor', color="black") filepath = os.path.join(__location__, "results", "machine-learning", "vm", "ml-param-" + classifier.lower() + ".pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def analysis_runtime_tsfresh_selected_features(evaluate): data_path = os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-selected-features-runtime") if evaluate: features_path = glob.glob( os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-*-to-be-extracted-*")) features_path = sorted( features_path, key=lambda entry: int(os.path.basename(entry).split("-")[-1])) tsfresh_features = TsFreshFeatures() runtime = nested_dict(2, dict) for len_light_pattern in [2, 4, 6, 8, 10]: light_signal, light_signal_time = light_analysis.load_light_pattern( len_light_pattern) coupling_data_provider = CouplingDataProvider( light_signal, light_signal_time, None, None) sampling_period_coupling = get_pattern_max_sampling_period() light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) print("len light pattern: ", len_light_pattern) print("sampling period: ", sampling_period_coupling) print("len sample: ", len(light_signal)) for feature_path in features_path: num_features = int( os.path.basename(feature_path).split("-")[-1]) print("num features: ", num_features) features_to_extract = DillSerializer( feature_path).deserialize() start = time.time() X = tsfresh_features.extract_selected_features( light_signal, features_to_extract, True) end = time.time() print("feature shape: ", X.shape) assert num_features == X.shape[1] runtime[len_light_pattern][num_features] = end - start print("duration: ", end - start) DillSerializer(data_path).serialize(runtime) else: runtime = DillSerializer(data_path).deserialize() runtime_per_num_feature = defaultdict(list) len_light_patterns, num_features = get_all_keys(runtime) for len_light_pattern, num_feature in itertools.product( len_light_patterns, num_features): runtime_per_num_feature[num_feature].append( runtime[len_light_pattern][num_feature]) fig, ax = plt.subplots() num_features = sorted(runtime_per_num_feature.keys()) median_runtime = [ numpy.median(runtime_per_num_feature[num_feature]) for num_feature in num_features ] nth_feature = 10 ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015, round(median_runtime[nth_feature], 3)) ax.axvline(nth_feature, linestyle="--", color="black") ax.plot(num_features, median_runtime, label="Virtual Machine", marker="o", color="#1f77b4") ax.set_ylabel("Runtime (s)") ax.set_xlabel("Number of features") ax.set_xticks(num_features[::4] + [num_features[-1]]) ax.grid() ax.set_ylim(bottom=0, top=0.3) ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=1, mode="expand", borderaxespad=0.) filepath = os.path.join(__location__, "results", "feature-selection", "vm", "tsfresh-features-selected-runtime.pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def client_similarity_analysis(path_client_similarity, path_runtimes, nth_best, result_path, plot_format): def adapt_ticklabels(labels): return [label.replace("_", " ").capitalize() for label in labels] def plot_raw_similarities(plot_data, similarity_methods, equalize_methods): similarities = [list(similarites.values()) for similarites in plot_data.values()] fig, ax = plt.subplots() im = ax.imshow(similarities, cmap="jet", vmin=0, vmax=1) ax.set_xticks(numpy.arange(len(equalize_methods))) ax.set_yticks(numpy.arange(len(similarity_methods))) ax.set_xticklabels(adapt_ticklabels(equalize_methods)) ax.set_yticklabels(adapt_ticklabels(similarity_methods)) for i in range(len(similarity_methods)): for j in range(len(equalize_methods)): ax.text(j, i, round(similarities[i][j], 2), ha="center", va="center") ax.set_ylabel("Similarity") ax.set_xlabel("Equalize") ax.figure.colorbar(im) filename = "raw-similarities." + plot_format fig.savefig(os.path.join(result_path, filename), format=plot_format, bbox_inches="tight") #plt.show() plt.close(fig) def find_best_similarity_equalize_threshold(total_similarity, path_runtimes, round_factor=2): print("Best similarity equalize threshold") total_similarity = sorted(total_similarity.items(), key=lambda kv: numpy.mean(kv[1]), reverse=True) _, _, runtime_equalize_similarity_methods = get_runtime(path_runtimes) runtime_equalize_similarity_methods = dict(runtime_equalize_similarity_methods) best_similarity = dict() for similarity, metrics in total_similarity[:nth_best]: similarity_method, equalize_method, _ = similarity.split(":") runtime = runtime_equalize_similarity_methods[equalize_method + ":" + similarity_method] weight = 0.8 * numpy.mean(metrics) + 0.2 * (1-runtime) best_similarity[similarity] = round(weight, round_factor) print("Similarity / metrics / runtime (s):", similarity, numpy.round(metrics, round_factor), round(runtime, 4)) best_similarity = sorted(best_similarity.items(), key=lambda kv: kv[1], reverse=True) print("Weighted best results:", best_similarity) results = DillSerializer(path_client_similarity).deserialize() len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(results) total_similarity = dict() plot_data = nested_dict(1, dict) for similarity_method in similarity_methods: for equalize_method in equalize_methods: y_true = list() similarities = list() for len_light_pattern1 in len_light_patterns1: for len_light_pattern2 in len_light_patterns2: if len_light_pattern1 in results and len_light_pattern2 in results[len_light_pattern1]: result = results[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method] similarities.extend(result) y_true.extend(len(result) * [1 if len_light_pattern1 == len_light_pattern2 else 0]) plot_data[similarity_method][equalize_method] = numpy.median(similarities) assert len(similarities) == len(y_true) y_true = numpy.asarray(y_true) similarities = numpy.asarray(similarities) similarity_thresholds = numpy.arange(1, step=0.1) for similarity_threshold in similarity_thresholds: similarity_threshold = round(similarity_threshold, 1) y_pred = numpy.zeros(len(y_true)) y_pred[similarities >= similarity_threshold] = 1 acc = accuracy_score(y_true, y_pred) prec = precision_score(y_true, y_pred) rec = recall_score(y_true, y_pred) f1 = f1_score(y_true, y_pred) key = similarity_method + ":" + equalize_method + ":" + str(similarity_threshold) total_similarity[key] = [acc, prec, rec, f1] find_best_similarity_equalize_threshold(total_similarity, path_runtimes) plot_raw_similarities(plot_data, similarity_methods, equalize_methods)