Beispiel #1
0
def tsfresh_performance_evaluation(single_light_pattern=False,
                                   range_len_light_pattern=range(2, 11, 2)):
    sampling_period = get_pattern_max_sampling_period()
    if single_light_pattern:  # single light patterns
        elapsed_times = dict()
        for len_light_pattern in range_len_light_pattern:
            data = LightData(sampling_period, [len_light_pattern])
            tsfresh_features = TsFreshFeatures()
            features_extracted, relevance_features = tsfresh_features.relevance(
                data.X_tsfresh, data.y_tsfresh)
            elapsed_time = tsfresh_features.performance_evaluation(
                features_extracted, relevance_features, data.X_tsfresh)
            elapsed_times[len_light_pattern] = elapsed_time
        filename = os.path.join(__location__, "raw-results",
                                "feature-selection",
                                "single-light-patterns-only-runtime-tsfresh")
        DillSerializer(filename).serialize(elapsed_times)
    else:  # combined light patterns
        data = LightData(sampling_period)
        tsfresh_features = TsFreshFeatures()
        features_extracted, relevance_features = tsfresh_features.relevance(
            data.X_tsfresh, data.y_tsfresh)
        elapsed_time = tsfresh_features.performance_evaluation(
            features_extracted, relevance_features, data.X_tsfresh)
        filename = os.path.join(
            __location__, "raw-results", "feature-selection",
            "combined-light-patterns-only-runtime-tsfresh")
        DillSerializer(filename).serialize(elapsed_time)
Beispiel #2
0
def test():
    testbed = "vm"  # server, vm
    server_ip = "localhost"
    server_port = 1026
    data_period_coupling = get_pattern_max_sampling_period()
    coupling_compare_method = "pearson"
    coupling_similarity_threshold = 0.7
    equalize_method = "dtw"
    data_period_ml_train = 0.05
    coupling_ml_classifier = "Random Forest"
    path_ml_train_data = os.path.join(__location__, "..", "ml-train-data",
                                      testbed)
    path_localization_data = os.path.join(__location__, "..", "..",
                                          "localization", "data")
    localization_room_to_pos = str(coupling_simulator.localization_room_to_pos)
    data_period_localization = 5
    frequency_coupling = 30
    simulation_duration = 60
    num_users = 1
    num_rooms = 10
    parameter = SimulationData(
        server_ip, server_port, data_period_coupling, coupling_compare_method,
        coupling_similarity_threshold, equalize_method, data_period_ml_train,
        path_ml_train_data, coupling_ml_classifier, path_localization_data,
        localization_room_to_pos, data_period_localization, frequency_coupling,
        num_users, num_rooms, simulation_duration)
    run(parameter)
 def __init__(self):
     testbed = "vm"  # server, vm
     num_clients = 10
     self.server_ip = "localhost"
     self.server_port = 10026
     self.num_clients = range(2, num_clients + 1)
     self.num_reject_clients = range(num_clients - 1)
     self.len_light_patterns = [2, 4, 6, 8, 10]
     self.sampling_period_couplings = [get_pattern_max_sampling_period()]
     self.coupling_compare_methods = coupling_compare_methods.keys()
     self.coupling_similarity_thresholds = [0.7]
     self.sampling_period_localizations = [5]
     self.localization_pos_in_area = localization_pos_in_area
     fingerprint_directory = os.path.join(__location__, "..", "..",
                                          "localization", "data")
     self.path_wifi_scans = os.path.join(fingerprint_directory,
                                         "wifi-fingerprints")
     self.path_ble_scans = os.path.join(fingerprint_directory,
                                        "bluetooth-fingerprints")
     self.path_ml_train_data = os.path.join(__location__, "..", "..",
                                            "simulator", "ml-train-data",
                                            testbed)
     self.sampling_period_ml_trains = [0.05]
     self.coupling_ml_classifiers = coupling_ml_classifiers.keys()
     self.equalize_methods = equalize_methods.keys()
 def __init__(self):
     testbed = "vm"  # server, vm
     self.server_ip = "localhost"
     self.server_port = 10026
     self.sampling_period_couplings = [get_pattern_max_sampling_period()]
     self.coupling_compare_methods = coupling_compare_methods.keys()
     self.coupling_similarity_thresholds = [0.7]
     self.sampling_period_localizations = [5]
     self.path_ml_train_data = os.path.join(__location__, "..", "..",
                                            "simulator", "ml-train-data",
                                            testbed)
     self.path_localization_data = os.path.join(__location__, "..", "..",
                                                "localization", "data")
     self.localization_room_to_pos = localization_room_to_pos
     self.sampling_period_ml_trains = [0.05]
     self.coupling_ml_classifiers = coupling_ml_classifiers.keys()
     self.equalize_methods = equalize_methods.keys()
     self.coupling_frequency = [10, 20, 30]
     self.num_users = [3, 5, 10]
     self.num_rooms = range(1, 11, 1)
     self.simulation_duration = [180]
Beispiel #5
0
def offline_test_ml_model(path_ml_offline_evaluation):
    def filter_params(param_grid):
        filtered_params = list()
        for param in param_grid:
            if param["num clients"] - param["num reject clients"] >= 2:
                filtered_params.append(param)
        return filtered_params

    testbed = "vm"
    path_ml_train_data = os.path.join(__location__, "..", "online",
                                      "ml-train-data", testbed)
    combined_raw_feature_data = glob.glob(
        os.path.join(path_ml_train_data, "combined-*-raw-feature-data"))[0]
    combined_raw_feature_data = DillSerializer(
        combined_raw_feature_data).deserialize()
    tsfresh_features_to_extract_selected = os.path.join(
        __location__, "..", "online", "tsfresh-features-to-be-extracted")
    tsfresh_features_to_extract_selected = DillSerializer(
        tsfresh_features_to_extract_selected).deserialize()
    sampling_periods = sorted(combined_raw_feature_data.keys())

    num_clients = 10
    num_reject_clients = range(num_clients - 1)
    num_clients = range(2, num_clients + 1)
    len_light_patterns = range(2, 11, 2)
    param_grid = ParameterGrid({
        "num clients": num_clients,
        "num reject clients": num_reject_clients,
        "len light pattern": len_light_patterns
    })
    sampling_period_coupling = get_pattern_max_sampling_period()
    filtered_params = filter_params(param_grid)
    results = nested_dict(5, list)
    for i, param in enumerate(filtered_params):
        print("Param: {0}/{1}".format(i + 1, len(filtered_params)))
        clients = dict()
        groundtruth_accept_clients = list()
        groundtruth_reject_clients = list()
        light_signal, light_signal_time = light_analysis.load_light_pattern(
            param["len light pattern"])
        coupling_data_provider = CouplingDataProvider(light_signal,
                                                      light_signal_time, None,
                                                      None)
        for _ in range(param["num clients"] -
                       param["num reject clients"]):  # accept client
            mac = create_random_mac()
            client = Client()
            client.light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            clients[mac] = client
            groundtruth_accept_clients.append(mac)

        #light_signal_random, light_signal_random_time = light_analysis.load_random_light_signal()
        #coupling_data_provider = CouplingDataProvider(light_signal_random, light_signal_random_time, None, None)

        datalen = len(light_signal)
        mean = light_signal.mean()
        std = light_signal.std()
        noise = numpy.random.normal(mean, std, datalen)
        coupling_data_provider = CouplingDataProvider(noise, light_signal_time,
                                                      None, None)
        for _ in range(param["num reject clients"]):  # reject client
            mac = create_random_mac()
            client = Client()
            client.light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            clients[mac] = client
            groundtruth_reject_clients.append(mac)

        for clf in Classifier:
            for sampling_period in sampling_periods:
                print("Classifier: ", clf)
                print("Sampling period: ", sampling_period)
                tsfresh_features = TsFreshFeatures()
                X_tsfresh = combined_raw_feature_data[sampling_period][
                    0].X_tsfresh
                y_tsfresh = combined_raw_feature_data[sampling_period][
                    0].y_tsfresh
                print("X: ", X_tsfresh.shape)
                print("X samples: ", len(X_tsfresh.id.unique()))
                print("y: ", y_tsfresh.shape)
                print("Extract features ...")
                X_selected_features = tsfresh_features.extract_selected_features(
                    X_tsfresh, tsfresh_features_to_extract_selected)
                print("X selected: ", X_selected_features.shape)
                print("y: ", y_tsfresh.shape)

                print("Coupling simulation ...")
                ml_model = Classifier.get_clf(clf)
                print("Class 1: ", len(y_tsfresh[y_tsfresh == 1]))
                print("Class 0: ", len(y_tsfresh[y_tsfresh == 0]))
                ml_model = ml_model.fit(X_selected_features, y_tsfresh)
                accept_clients = set()
                reject_clients = set()
                for client_mac in clients.keys():
                    client_light_data = clients[client_mac].light_signal
                    feature = tsfresh_features.extract_selected_features(
                        client_light_data,
                        tsfresh_features_to_extract_selected, True)
                    print("Feature shape: ", feature.shape)
                    result = ml_model.predict(feature)
                    if result == 1.0:
                        accept_clients.add(client_mac)
                    else:
                        reject_clients.add(client_mac)
                accept_clients = list(accept_clients)
                reject_clients = list(reject_clients)
                mac_mapping = {
                    key: value
                    for key, value in zip(range(len(clients)), clients.keys())
                }
                result = StaticCouplingResult(accept_clients, reject_clients,
                                              groundtruth_accept_clients,
                                              groundtruth_reject_clients, None,
                                              mac_mapping)
                results[param["num clients"]][param["num reject clients"]] \
                    [param["len light pattern"]][clf.name][sampling_period].append(result)
                print("accept:")
                print("result:", accept_clients)
                print("ground truth: ", groundtruth_accept_clients)
                print(result.accuracy_accept)
                print("reject:")
                print("result: ", reject_clients)
                print("ground truth: ", groundtruth_reject_clients)
                print(result.accuracy_reject)
                print("ML cross validation ...")
                ml_model = Classifier.get_clf(clf)
                scores = cross_val_score(ml_model,
                                         X_selected_features,
                                         y_tsfresh,
                                         cv=10,
                                         n_jobs=-1)
                print("Scores: ", scores)
                print("------------------------------------------------------")
        DillSerializer(path_ml_offline_evaluation).serialize(results)
Beispiel #6
0
def analysis_runtime_tsfresh_selected_features(evaluate):
    data_path = os.path.join(__location__, "raw-results", "feature-selection",
                             "tsfresh-selected-features-runtime")
    if evaluate:
        features_path = glob.glob(
            os.path.join(__location__, "raw-results", "feature-selection",
                         "tsfresh-*-to-be-extracted-*"))
        features_path = sorted(
            features_path,
            key=lambda entry: int(os.path.basename(entry).split("-")[-1]))
        tsfresh_features = TsFreshFeatures()
        runtime = nested_dict(2, dict)
        for len_light_pattern in [2, 4, 6, 8, 10]:
            light_signal, light_signal_time = light_analysis.load_light_pattern(
                len_light_pattern)
            coupling_data_provider = CouplingDataProvider(
                light_signal, light_signal_time, None, None)
            sampling_period_coupling = get_pattern_max_sampling_period()
            light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            print("len light pattern: ", len_light_pattern)
            print("sampling period: ", sampling_period_coupling)
            print("len sample: ", len(light_signal))
            for feature_path in features_path:
                num_features = int(
                    os.path.basename(feature_path).split("-")[-1])
                print("num features: ", num_features)
                features_to_extract = DillSerializer(
                    feature_path).deserialize()
                start = time.time()
                X = tsfresh_features.extract_selected_features(
                    light_signal, features_to_extract, True)
                end = time.time()
                print("feature shape: ", X.shape)
                assert num_features == X.shape[1]
                runtime[len_light_pattern][num_features] = end - start
                print("duration: ", end - start)
            DillSerializer(data_path).serialize(runtime)
    else:
        runtime = DillSerializer(data_path).deserialize()
        runtime_per_num_feature = defaultdict(list)
        len_light_patterns, num_features = get_all_keys(runtime)
        for len_light_pattern, num_feature in itertools.product(
                len_light_patterns, num_features):
            runtime_per_num_feature[num_feature].append(
                runtime[len_light_pattern][num_feature])
        fig, ax = plt.subplots()
        num_features = sorted(runtime_per_num_feature.keys())
        median_runtime = [
            numpy.median(runtime_per_num_feature[num_feature])
            for num_feature in num_features
        ]
        nth_feature = 10
        ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015,
                round(median_runtime[nth_feature], 3))
        ax.axvline(nth_feature, linestyle="--", color="black")
        ax.plot(num_features,
                median_runtime,
                label="Virtual Machine",
                marker="o",
                color="#1f77b4")
        ax.set_ylabel("Runtime (s)")
        ax.set_xlabel("Number of features")
        ax.set_xticks(num_features[::4] + [num_features[-1]])
        ax.grid()
        ax.set_ylim(bottom=0, top=0.3)
        ax.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                  loc=3,
                  ncol=1,
                  mode="expand",
                  borderaxespad=0.)
        filepath = os.path.join(__location__, "results", "feature-selection",
                                "vm", "tsfresh-features-selected-runtime.pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
Beispiel #7
0
    def __init__(self,
                 num_users,
                 num_rooms,
                 simulation_duration,
                 data_period_ml_train,
                 path_ml_train_data,
                 coupling_ml_classifier,
                 path_localization_data,
                 localization_room_to_pos,
                 intra_room_distance=2,
                 inter_room_distance=3,
                 room_step_size=2,
                 len_light_patterns=range(2, 11, 2),
                 check_similarity_runtime=False):

        self.stay_durations = numpy.random.multinomial(
            simulation_duration,
            numpy.ones(num_rooms) / num_rooms)
        self.rooms, self.room_distribution = self.select_rooms(num_rooms)
        self.room_distances = self.calculate_room_distances(
            self.rooms, inter_room_distance, intra_room_distance,
            room_step_size)
        self.user_routes = self.create_user_routes(num_users,
                                                   self.stay_durations,
                                                   self.room_distribution)

        path_ble_scans = os.path.join(path_localization_data,
                                      "bluetooth-fingerprints")
        ble_scans, ble_features = LocalizationFeatures.from_multiple_rooms(
            path_ble_scans, localization_room_to_pos)

        path_wifi_scans = os.path.join(path_localization_data,
                                       "wifi-fingerprints")
        wifi_scans, wifi_features = LocalizationFeatures.from_multiple_rooms(
            path_wifi_scans, localization_room_to_pos)

        light_data = self.create_light_data(len_light_patterns,
                                            get_pattern_max_sampling_period(),
                                            check_similarity_runtime)
        tsfresh_selected_features, coupling_classifiers = self.create_ml_coupling(
            coupling_ml_classifier, path_ml_train_data, data_period_ml_train,
            len_light_patterns)

        self.room_data = dict()
        self.client_data = dict()
        len_light_patterns = sorted(light_data.keys())
        iter_len_light_patterns = itertools.cycle(len_light_patterns)
        loc_rooms = sorted(ble_scans.keys())
        iter_loc_rooms = itertools.cycle(loc_rooms)

        for room_id in self.rooms:
            loc_room_id = next(iter_loc_rooms)
            len_light_pattern = next(iter_len_light_patterns)

            raw_light_signal = light_data[len_light_pattern][0]
            raw_light_signal_time = light_data[len_light_pattern][1]
            light_signal = light_data[len_light_pattern][2]
            light_pattern = light_data[len_light_pattern][3]
            light_pattern_duration = light_data[len_light_pattern][4]

            coupling_classifier_basic_all_features = coupling_classifiers[
                len_light_pattern][0]
            coupling_classifier_basic_selected_features = coupling_classifiers[
                len_light_pattern][1]
            coupling_classifier_tsfresh_selected_features = coupling_classifiers[
                len_light_pattern][2]

            self.room_data[room_id] = RoomData(
                light_signal, light_pattern, light_pattern_duration,
                coupling_classifier_basic_all_features,
                coupling_classifier_basic_selected_features,
                coupling_classifier_tsfresh_selected_features,
                tsfresh_selected_features, wifi_scans[loc_room_id],
                ble_scans[loc_room_id], wifi_features[loc_room_id],
                ble_features[loc_room_id])

            self.client_data[room_id] = ClientData(raw_light_signal,
                                                   raw_light_signal_time,
                                                   wifi_scans[loc_room_id],
                                                   ble_scans[loc_room_id])