def tsfresh_performance_evaluation(single_light_pattern=False, range_len_light_pattern=range(2, 11, 2)): sampling_period = get_pattern_max_sampling_period() if single_light_pattern: # single light patterns elapsed_times = dict() for len_light_pattern in range_len_light_pattern: data = LightData(sampling_period, [len_light_pattern]) tsfresh_features = TsFreshFeatures() features_extracted, relevance_features = tsfresh_features.relevance( data.X_tsfresh, data.y_tsfresh) elapsed_time = tsfresh_features.performance_evaluation( features_extracted, relevance_features, data.X_tsfresh) elapsed_times[len_light_pattern] = elapsed_time filename = os.path.join(__location__, "raw-results", "feature-selection", "single-light-patterns-only-runtime-tsfresh") DillSerializer(filename).serialize(elapsed_times) else: # combined light patterns data = LightData(sampling_period) tsfresh_features = TsFreshFeatures() features_extracted, relevance_features = tsfresh_features.relevance( data.X_tsfresh, data.y_tsfresh) elapsed_time = tsfresh_features.performance_evaluation( features_extracted, relevance_features, data.X_tsfresh) filename = os.path.join( __location__, "raw-results", "feature-selection", "combined-light-patterns-only-runtime-tsfresh") DillSerializer(filename).serialize(elapsed_time)
def test(): testbed = "vm" # server, vm server_ip = "localhost" server_port = 1026 data_period_coupling = get_pattern_max_sampling_period() coupling_compare_method = "pearson" coupling_similarity_threshold = 0.7 equalize_method = "dtw" data_period_ml_train = 0.05 coupling_ml_classifier = "Random Forest" path_ml_train_data = os.path.join(__location__, "..", "ml-train-data", testbed) path_localization_data = os.path.join(__location__, "..", "..", "localization", "data") localization_room_to_pos = str(coupling_simulator.localization_room_to_pos) data_period_localization = 5 frequency_coupling = 30 simulation_duration = 60 num_users = 1 num_rooms = 10 parameter = SimulationData( server_ip, server_port, data_period_coupling, coupling_compare_method, coupling_similarity_threshold, equalize_method, data_period_ml_train, path_ml_train_data, coupling_ml_classifier, path_localization_data, localization_room_to_pos, data_period_localization, frequency_coupling, num_users, num_rooms, simulation_duration) run(parameter)
def __init__(self): testbed = "vm" # server, vm num_clients = 10 self.server_ip = "localhost" self.server_port = 10026 self.num_clients = range(2, num_clients + 1) self.num_reject_clients = range(num_clients - 1) self.len_light_patterns = [2, 4, 6, 8, 10] self.sampling_period_couplings = [get_pattern_max_sampling_period()] self.coupling_compare_methods = coupling_compare_methods.keys() self.coupling_similarity_thresholds = [0.7] self.sampling_period_localizations = [5] self.localization_pos_in_area = localization_pos_in_area fingerprint_directory = os.path.join(__location__, "..", "..", "localization", "data") self.path_wifi_scans = os.path.join(fingerprint_directory, "wifi-fingerprints") self.path_ble_scans = os.path.join(fingerprint_directory, "bluetooth-fingerprints") self.path_ml_train_data = os.path.join(__location__, "..", "..", "simulator", "ml-train-data", testbed) self.sampling_period_ml_trains = [0.05] self.coupling_ml_classifiers = coupling_ml_classifiers.keys() self.equalize_methods = equalize_methods.keys()
def __init__(self): testbed = "vm" # server, vm self.server_ip = "localhost" self.server_port = 10026 self.sampling_period_couplings = [get_pattern_max_sampling_period()] self.coupling_compare_methods = coupling_compare_methods.keys() self.coupling_similarity_thresholds = [0.7] self.sampling_period_localizations = [5] self.path_ml_train_data = os.path.join(__location__, "..", "..", "simulator", "ml-train-data", testbed) self.path_localization_data = os.path.join(__location__, "..", "..", "localization", "data") self.localization_room_to_pos = localization_room_to_pos self.sampling_period_ml_trains = [0.05] self.coupling_ml_classifiers = coupling_ml_classifiers.keys() self.equalize_methods = equalize_methods.keys() self.coupling_frequency = [10, 20, 30] self.num_users = [3, 5, 10] self.num_rooms = range(1, 11, 1) self.simulation_duration = [180]
def offline_test_ml_model(path_ml_offline_evaluation): def filter_params(param_grid): filtered_params = list() for param in param_grid: if param["num clients"] - param["num reject clients"] >= 2: filtered_params.append(param) return filtered_params testbed = "vm" path_ml_train_data = os.path.join(__location__, "..", "online", "ml-train-data", testbed) combined_raw_feature_data = glob.glob( os.path.join(path_ml_train_data, "combined-*-raw-feature-data"))[0] combined_raw_feature_data = DillSerializer( combined_raw_feature_data).deserialize() tsfresh_features_to_extract_selected = os.path.join( __location__, "..", "online", "tsfresh-features-to-be-extracted") tsfresh_features_to_extract_selected = DillSerializer( tsfresh_features_to_extract_selected).deserialize() sampling_periods = sorted(combined_raw_feature_data.keys()) num_clients = 10 num_reject_clients = range(num_clients - 1) num_clients = range(2, num_clients + 1) len_light_patterns = range(2, 11, 2) param_grid = ParameterGrid({ "num clients": num_clients, "num reject clients": num_reject_clients, "len light pattern": len_light_patterns }) sampling_period_coupling = get_pattern_max_sampling_period() filtered_params = filter_params(param_grid) results = nested_dict(5, list) for i, param in enumerate(filtered_params): print("Param: {0}/{1}".format(i + 1, len(filtered_params))) clients = dict() groundtruth_accept_clients = list() groundtruth_reject_clients = list() light_signal, light_signal_time = light_analysis.load_light_pattern( param["len light pattern"]) coupling_data_provider = CouplingDataProvider(light_signal, light_signal_time, None, None) for _ in range(param["num clients"] - param["num reject clients"]): # accept client mac = create_random_mac() client = Client() client.light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) clients[mac] = client groundtruth_accept_clients.append(mac) #light_signal_random, light_signal_random_time = light_analysis.load_random_light_signal() #coupling_data_provider = CouplingDataProvider(light_signal_random, light_signal_random_time, None, None) datalen = len(light_signal) mean = light_signal.mean() std = light_signal.std() noise = numpy.random.normal(mean, std, datalen) coupling_data_provider = CouplingDataProvider(noise, light_signal_time, None, None) for _ in range(param["num reject clients"]): # reject client mac = create_random_mac() client = Client() client.light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) clients[mac] = client groundtruth_reject_clients.append(mac) for clf in Classifier: for sampling_period in sampling_periods: print("Classifier: ", clf) print("Sampling period: ", sampling_period) tsfresh_features = TsFreshFeatures() X_tsfresh = combined_raw_feature_data[sampling_period][ 0].X_tsfresh y_tsfresh = combined_raw_feature_data[sampling_period][ 0].y_tsfresh print("X: ", X_tsfresh.shape) print("X samples: ", len(X_tsfresh.id.unique())) print("y: ", y_tsfresh.shape) print("Extract features ...") X_selected_features = tsfresh_features.extract_selected_features( X_tsfresh, tsfresh_features_to_extract_selected) print("X selected: ", X_selected_features.shape) print("y: ", y_tsfresh.shape) print("Coupling simulation ...") ml_model = Classifier.get_clf(clf) print("Class 1: ", len(y_tsfresh[y_tsfresh == 1])) print("Class 0: ", len(y_tsfresh[y_tsfresh == 0])) ml_model = ml_model.fit(X_selected_features, y_tsfresh) accept_clients = set() reject_clients = set() for client_mac in clients.keys(): client_light_data = clients[client_mac].light_signal feature = tsfresh_features.extract_selected_features( client_light_data, tsfresh_features_to_extract_selected, True) print("Feature shape: ", feature.shape) result = ml_model.predict(feature) if result == 1.0: accept_clients.add(client_mac) else: reject_clients.add(client_mac) accept_clients = list(accept_clients) reject_clients = list(reject_clients) mac_mapping = { key: value for key, value in zip(range(len(clients)), clients.keys()) } result = StaticCouplingResult(accept_clients, reject_clients, groundtruth_accept_clients, groundtruth_reject_clients, None, mac_mapping) results[param["num clients"]][param["num reject clients"]] \ [param["len light pattern"]][clf.name][sampling_period].append(result) print("accept:") print("result:", accept_clients) print("ground truth: ", groundtruth_accept_clients) print(result.accuracy_accept) print("reject:") print("result: ", reject_clients) print("ground truth: ", groundtruth_reject_clients) print(result.accuracy_reject) print("ML cross validation ...") ml_model = Classifier.get_clf(clf) scores = cross_val_score(ml_model, X_selected_features, y_tsfresh, cv=10, n_jobs=-1) print("Scores: ", scores) print("------------------------------------------------------") DillSerializer(path_ml_offline_evaluation).serialize(results)
def analysis_runtime_tsfresh_selected_features(evaluate): data_path = os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-selected-features-runtime") if evaluate: features_path = glob.glob( os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-*-to-be-extracted-*")) features_path = sorted( features_path, key=lambda entry: int(os.path.basename(entry).split("-")[-1])) tsfresh_features = TsFreshFeatures() runtime = nested_dict(2, dict) for len_light_pattern in [2, 4, 6, 8, 10]: light_signal, light_signal_time = light_analysis.load_light_pattern( len_light_pattern) coupling_data_provider = CouplingDataProvider( light_signal, light_signal_time, None, None) sampling_period_coupling = get_pattern_max_sampling_period() light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) print("len light pattern: ", len_light_pattern) print("sampling period: ", sampling_period_coupling) print("len sample: ", len(light_signal)) for feature_path in features_path: num_features = int( os.path.basename(feature_path).split("-")[-1]) print("num features: ", num_features) features_to_extract = DillSerializer( feature_path).deserialize() start = time.time() X = tsfresh_features.extract_selected_features( light_signal, features_to_extract, True) end = time.time() print("feature shape: ", X.shape) assert num_features == X.shape[1] runtime[len_light_pattern][num_features] = end - start print("duration: ", end - start) DillSerializer(data_path).serialize(runtime) else: runtime = DillSerializer(data_path).deserialize() runtime_per_num_feature = defaultdict(list) len_light_patterns, num_features = get_all_keys(runtime) for len_light_pattern, num_feature in itertools.product( len_light_patterns, num_features): runtime_per_num_feature[num_feature].append( runtime[len_light_pattern][num_feature]) fig, ax = plt.subplots() num_features = sorted(runtime_per_num_feature.keys()) median_runtime = [ numpy.median(runtime_per_num_feature[num_feature]) for num_feature in num_features ] nth_feature = 10 ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015, round(median_runtime[nth_feature], 3)) ax.axvline(nth_feature, linestyle="--", color="black") ax.plot(num_features, median_runtime, label="Virtual Machine", marker="o", color="#1f77b4") ax.set_ylabel("Runtime (s)") ax.set_xlabel("Number of features") ax.set_xticks(num_features[::4] + [num_features[-1]]) ax.grid() ax.set_ylim(bottom=0, top=0.3) ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=1, mode="expand", borderaxespad=0.) filepath = os.path.join(__location__, "results", "feature-selection", "vm", "tsfresh-features-selected-runtime.pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def __init__(self, num_users, num_rooms, simulation_duration, data_period_ml_train, path_ml_train_data, coupling_ml_classifier, path_localization_data, localization_room_to_pos, intra_room_distance=2, inter_room_distance=3, room_step_size=2, len_light_patterns=range(2, 11, 2), check_similarity_runtime=False): self.stay_durations = numpy.random.multinomial( simulation_duration, numpy.ones(num_rooms) / num_rooms) self.rooms, self.room_distribution = self.select_rooms(num_rooms) self.room_distances = self.calculate_room_distances( self.rooms, inter_room_distance, intra_room_distance, room_step_size) self.user_routes = self.create_user_routes(num_users, self.stay_durations, self.room_distribution) path_ble_scans = os.path.join(path_localization_data, "bluetooth-fingerprints") ble_scans, ble_features = LocalizationFeatures.from_multiple_rooms( path_ble_scans, localization_room_to_pos) path_wifi_scans = os.path.join(path_localization_data, "wifi-fingerprints") wifi_scans, wifi_features = LocalizationFeatures.from_multiple_rooms( path_wifi_scans, localization_room_to_pos) light_data = self.create_light_data(len_light_patterns, get_pattern_max_sampling_period(), check_similarity_runtime) tsfresh_selected_features, coupling_classifiers = self.create_ml_coupling( coupling_ml_classifier, path_ml_train_data, data_period_ml_train, len_light_patterns) self.room_data = dict() self.client_data = dict() len_light_patterns = sorted(light_data.keys()) iter_len_light_patterns = itertools.cycle(len_light_patterns) loc_rooms = sorted(ble_scans.keys()) iter_loc_rooms = itertools.cycle(loc_rooms) for room_id in self.rooms: loc_room_id = next(iter_loc_rooms) len_light_pattern = next(iter_len_light_patterns) raw_light_signal = light_data[len_light_pattern][0] raw_light_signal_time = light_data[len_light_pattern][1] light_signal = light_data[len_light_pattern][2] light_pattern = light_data[len_light_pattern][3] light_pattern_duration = light_data[len_light_pattern][4] coupling_classifier_basic_all_features = coupling_classifiers[ len_light_pattern][0] coupling_classifier_basic_selected_features = coupling_classifiers[ len_light_pattern][1] coupling_classifier_tsfresh_selected_features = coupling_classifiers[ len_light_pattern][2] self.room_data[room_id] = RoomData( light_signal, light_pattern, light_pattern_duration, coupling_classifier_basic_all_features, coupling_classifier_basic_selected_features, coupling_classifier_tsfresh_selected_features, tsfresh_selected_features, wifi_scans[loc_room_id], ble_scans[loc_room_id], wifi_features[loc_room_id], ble_features[loc_room_id]) self.client_data[room_id] = ClientData(raw_light_signal, raw_light_signal_time, wifi_scans[loc_room_id], ble_scans[loc_room_id])