Esempio n. 1
0
    def _execute(self):
        df = dicts_to_df(self.features)
        format_feature_df(df)

        df_real = df[df["Model"] == "real-world"]
        print(collections.Counter(df["Model"]))

        small_avg_degree = df_real[
            "Centrality.Degree.Location.Arithmetic Mean"] <= 30

        filters = {
            "all": [True] * len(df_real),
            "avg-degree-le-30": small_avg_degree,
            "avg-degree-gt-30": ~small_avg_degree,
            "socfb": df_real["Type"] == "socfb",
            "not-socfb": df_real["Type"] != "socfb"
        }

        format_str = "{:20}{:>5}"

        network_models = sorted(
            set(
                filter(lambda model: not model.endswith("-second"),
                       set(df["Model"]))) - set(["real-world"]))

        for filtername, filterdf in sorted(filters.items()):
            graphs = sorted(df_real[filterdf]["Graph"])
            print(format_str.format(filtername, len(graphs)))

            features_collection = get_all_feature_sets_self_check(df, graphs)
            sub_df = df.loc(axis=0)[:, graphs, :]
            accuracies = \
                classification_experiment(
                    sub_df,
                    network_models,
                    features_collection,
                    self.cores)
            accuracies.to_csv(self._stagepath + "accuracies/" + filtername +
                              ".csv",
                              header=True,
                              index_label="features")
Esempio n. 2
0
    def _execute(self):
        df = dicts_to_df(self.features)
        df.sort_index(axis=1, inplace=True)
        format_feature_df(df)

        network_models = sorted(set(filter(lambda model: not model.endswith("-second"), set(df["Model"])))-set(["real-world"]))

        diff_features = df.columns[df.dtypes == float].values
        print("Calculating difference for {} features for {} models...".format(len(diff_features), len(network_models)))

        idx = pandas.IndexSlice
        for model in network_models:
            val_1 = df.loc[idx[:,:,model],diff_features].values
            val_2 = df.loc[idx[:,:,model+"-second"],diff_features].values

            df.loc[idx[:,:,model],diff_features] = val_1 - val_2
            df.loc[idx[:,:,model+"-second"],diff_features] = val_2 - val_1
            print("Done with model {}".format(model))

        for a_dict in df.to_dict("records"):
            self._save_as_csv(a_dict)