Ejemplo n.º 1
0
 def print_output(results, simulation_parameter):
     results = {key: numpy.mean(result) for key, result in results.items()}
     results = sorted(results.items(), key=lambda kv: kv[1], reverse=True)
     print("Performance " + simulation_parameter + ":", [(key, round(value, 2)) for key, value in results])
     for (key1, result1), (key2, result2) in misc.pairwise(results):
         print(simulation_parameter + ":", key1, " better (%) than ", key2, round(100*((result1 / result2)-1),2))
Ejemplo n.º 2
0
def analysis_basic_features(testbeds):
    def plot_feature_importance(importances, filename):
        feature_order = importances.median().sort_values().index.values
        importances = importances[feature_order]
        fig, ax = plt.subplots()
        importances.boxplot(ax=ax, vert=False, showfliers=False)
        ax.set_xlabel("Relative importance")
        filename = os.path.join(__location__, "results", "machine-learning",
                                filename + ".pdf")
        directory = os.path.dirname(filename)
        if not os.path.exists(directory):
            os.makedirs(directory)
        plt.savefig(filename, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)

    print("importance basic features")
    for feature_type in ["combined", "single"]:
        feature_importances = list()
        for testbed in testbeds:
            file_regex = "*" + feature_type + "*basic*"
            path_basic_data = glob.glob(
                os.path.join(__location__, "raw-results", "feature-selection",
                             testbed, file_regex))
            if len(path_basic_data) == 0:
                continue
            assert len(path_basic_data) == 1

            path_basic_data = path_basic_data[0]
            light_data_type = os.path.basename(path_basic_data).split("-")[0]
            basic_features_selection = DillSerializer(
                path_basic_data).deserialize()
            if "single" in light_data_type:
                len_light_patterns = basic_features_selection.keys()
                sampling_periods = basic_features_selection[
                    len_light_patterns[0]].keys()
                classifiers = basic_features_selection[len_light_patterns[0]][
                    sampling_periods[0]].keys()
                features = get_features(light_data_type,
                                        basic_features_selection,
                                        sampling_periods, classifiers,
                                        len_light_patterns)
            elif "combined" in light_data_type:
                sampling_periods = basic_features_selection.keys()
                classifiers = basic_features_selection[
                    sampling_periods[0]].keys()
                features = get_features(light_data_type,
                                        basic_features_selection,
                                        sampling_periods, classifiers)

            row = 0
            importances = pandas.DataFrame(columns=features)
            if "single" in light_data_type:
                for len_light_pattern, sampling_period, classifier in itertools.product(
                        len_light_patterns, sampling_periods, classifiers):
                    feature_importance = basic_features_selection[
                        len_light_pattern][sampling_period][classifier]
                    row, importances = add_data(importances,
                                                feature_importance, row)
            elif "combined" in light_data_type:
                for sampling_period, classifier in itertools.product(
                        sampling_periods, classifiers):
                    feature_importance = basic_features_selection[
                        sampling_period][classifier]
                    row, importances = add_data(importances,
                                                feature_importance, row)

            feature_importances.append(importances)

        df = pandas.concat(feature_importances)
        importance_median = df.median().sort_values(ascending=False)
        importance_median = zip(importance_median.index,
                                importance_median.values)
        filename = "basic-features-importance-" + light_data_type
        plot_feature_importance(df, filename)

        print(light_data_type)
        print(classifiers)
        for (feature1_name, feature1_importance), (
                feature2_name,
                feature2_importance) in misc.pairwise(importance_median):
            print(feature1_name, "importance:", round(feature1_importance, 2))
            print(feature2_name, "importance:", round(feature2_importance, 2))
            print("ratio importance:",
                  round(feature2_importance / feature1_importance, 2))
        print("---")
Ejemplo n.º 3
0
def offline_analysis_ml_model(path_ml_offline_evaluation):
    evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize()
    num_clients, num_reject_clients, len_light_patterns, \
        classifiers, sampling_periods = misc.get_all_keys(evaluation_data)
    analysis_result = nested_dict(2, list)
    for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product(
            num_clients, num_reject_clients, len_light_patterns, classifiers,
            sampling_periods):
        results = evaluation_data[num_client][num_reject_client][
            len_light_pattern][classifier][sampling_period]
        if len(results) > 0:
            analysis_result[classifier][sampling_period].extend(results)

    print("Num clients: ", num_clients)
    print("Num reject clients: ", num_reject_clients)
    print("Len light patterns: ", len_light_patterns)
    print("Classifiers: ", classifiers)
    print("Sampling periods: ", sampling_periods)

    for classifier in classifiers:
        results = analysis_result[classifier]
        sub_results = list()
        for sampling_period in sampling_periods:
            accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \
                [entry.accuracy_reject for entry in results[sampling_period]]
            precision = [entry.precision_accept for entry in results[sampling_period]] + \
                [entry.precision_reject for entry in results[sampling_period]]
            recall = [entry.recall_accept for entry in results[sampling_period]] + \
                [entry.recall_reject for entry in results[sampling_period]]
            f1 = [entry.f1_accept for entry in results[sampling_period]] + \
                [entry.f1_reject for entry in results[sampling_period]]

            entry = [
                numpy.mean(accuracy),
                numpy.mean(precision),
                numpy.mean(recall),
                numpy.mean(f1)
            ]
            entry = [round(value, 2) for value in entry]
            sub_results.append(entry)

        fig, ax = plt.subplots()
        ax.imshow(sub_results,
                  cmap="Greens",
                  aspect="auto",
                  interpolation="nearest",
                  vmin=0,
                  vmax=1.4)
        ax.set_ylabel("Sampling period (ms)")
        ytickpos = numpy.arange(len(sampling_periods))
        ax.set_yticks(ytickpos)
        ax.set_yticklabels([
            int(sampling_period * 1e3) for sampling_period in sampling_periods
        ])
        xticks = ["Accuracy", "Precision", "Recall", "F1-score"]
        xtickpos = range(len(xticks))
        ax.set_xticks(xtickpos)
        ax.set_xticklabels(xticks, rotation=20, ha="right")
        for i in range(len(sub_results)):
            for j in range(len(sub_results[0])):
                ax.text(j, i, sub_results[i][j], ha="center", va="center")
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(xtickpos)
        ]
        ax.set_xticks(ticks, minor=True)
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(ytickpos)
        ]
        ax.set_yticks(ticks, minor=True)
        ax.grid(which='minor', color="black")
        filepath = os.path.join(__location__, "results", "machine-learning",
                                "vm",
                                "ml-param-" + classifier.lower() + ".pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
Ejemplo n.º 4
0
def analysis_runtime_tsfresh(testbeds):
    print("runtime tsfresh")
    for runtime_type in ["patterns-runtime", "only-runtime"]:
        for feature_type in ["combined", "single"]:
            runtimes = dict()
            plot_data = list()
            fileregex = "*" + feature_type + "*" + runtime_type + "*"
            labels = {
                "bbb": "IoT Board",
                "server": "Virtual Machine",
                "vm": "Server"
            }
            for testbed in testbeds:
                filepath = glob.glob(
                    os.path.join(__location__, "raw-results",
                                 "feature-selection", testbed, fileregex))
                if len(filepath) == 0:  # raw data not available
                    continue
                assert len(filepath) == 1
                path_runtime_tsfresh = filepath[0]

                # Get runtimes
                filename = os.path.basename(path_runtime_tsfresh)
                light_data_type = filename.split("-")[0]
                runtime_tsfresh = DillSerializer(
                    path_runtime_tsfresh).deserialize()
                if "only" in filename:
                    if "single" in filename:
                        len_light_patterns = runtime_tsfresh.keys()
                        tmp_runtime_tsfresh = pandas.DataFrame(
                            columns=runtime_tsfresh[
                                len_light_patterns[0]].columns)
                        # Merge runtime only per feature length
                        for len_light_pattern in len_light_patterns:
                            runtime_data = runtime_tsfresh[len_light_pattern]
                            tmp_runtime_tsfresh = tmp_runtime_tsfresh.append(
                                runtime_data, ignore_index=True)
                        runtime_tsfresh = tmp_runtime_tsfresh
                else:
                    if "combined" in filename:
                        sampling_periods = runtime_tsfresh.keys()
                        tmp_runtime_tsfresh = pandas.DataFrame(
                            columns=runtime_tsfresh[
                                sampling_periods[0]][0].columns)
                        row = 0
                        for sampling_period in sampling_periods:
                            runtime_data = runtime_tsfresh[sampling_period]
                            for entry in runtime_data:
                                tmp_runtime_tsfresh.loc[row] = entry.loc[0]
                                row += 1
                        runtime_tsfresh = tmp_runtime_tsfresh
                    elif "single" in filename:
                        len_light_patterns = runtime_tsfresh.keys()
                        sampling_periods = runtime_tsfresh[
                            len_light_patterns[0]].keys()
                        tmp_runtime_tsfresh = pandas.DataFrame(
                            columns=runtime_tsfresh[len_light_patterns[0]][
                                sampling_periods[0]][0].columns)
                        row = 0
                        for len_light_pattern in len_light_patterns:
                            for sampling_period in sampling_periods:
                                runtime_data = runtime_tsfresh[
                                    len_light_pattern][sampling_period]
                                for entry in runtime_data:
                                    assert len(entry) == 1
                                    tmp_runtime_tsfresh.loc[row] = entry.loc[0]
                                    row += 1
                        runtime_tsfresh = tmp_runtime_tsfresh

                # remove outlier, more than 3 three times std
                #runtime_tsfresh[numpy.abs(runtime_tsfresh - runtime_tsfresh.mean()) > 3 * runtime_tsfresh.std()] = numpy.nan
                median = runtime_tsfresh.median()
                feature_len = median.index.values
                relative_runtime = median.values / feature_len
                runtimes[labels[testbed]] = numpy.mean(relative_runtime)
                plot_data.append((labels[testbed], feature_len, median))

            nth_label = 5
            fig, ax = plt.subplots()
            markers = itertools.cycle(misc.markers)
            datalen = [
                numpy.where(numpy.diff(median) < -0.8)[0]
                for _, _, median in plot_data
            ]
            datalen = [array[0] + 1 for array in datalen if len(array) > 0]
            datalen = min(datalen) if len(datalen) > 0 else min(
                [len(feature_len) for _, feature_len, _ in plot_data])
            #datalen = min([len(feature_len) for _, feature_len, _ in plot_data])

            for label, feature_len, median in plot_data:
                ax.plot(feature_len[:datalen],
                        median[:datalen],
                        label=label,
                        marker=markers.next(),
                        markevery=nth_label)
            ax.grid()
            ax.set_ylabel("Runtime (s)")
            ax.set_xlabel("Number of features")
            feature_len = feature_len[:datalen]
            xticks = feature_len[::nth_label]
            xticks = numpy.concatenate([xticks, [feature_len[-1]]])
            ax.set_xticks(xticks)
            ax.set_ylim(bottom=0)
            ax.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                      loc=3,
                      ncol=3,
                      mode="expand",
                      borderaxespad=0.)
            fig.set_figwidth(fig.get_figwidth() * 1.6)
            #plt.show()

            filepath = os.path.join(__location__, "results",
                                    "feature-selection")
            filename = "tsfresh-features-only-runtime-" if "only" in filename else "tsfresh-features-runtime-"
            filename = filename + light_data_type + ".pdf"
            save_path = os.path.join(filepath, filename)
            directory = os.path.dirname(save_path)
            if not os.path.exists(directory):
                os.makedirs(directory)
            plt.savefig(save_path, format="pdf", bbox_inches="tight")
            plt.close(fig)

            runtimes_ms = {key: value * 1e3 for key, value in runtimes.items()}
            runtimes_ms = sorted(runtimes_ms.items(), key=lambda kv: kv[1])
            print("runtime type:", runtime_type, "feature type:", feature_type)
            for (testbed1_name, testbed1_runtime), (
                    testbed2_name,
                    testbed2_runtime) in misc.pairwise(runtimes_ms):
                print(testbed1_name, "relative runtime (ms):",
                      round(testbed1_runtime, 2))
                print(testbed2_name, "relative runtime (ms):",
                      round(testbed2_runtime, 2))
                print("ratio faster:",
                      round(testbed2_runtime / testbed1_runtime, 2))
            print("---")