def _execute(self): df = dicts_to_df(self.features) format_feature_df(df) df_real = df[df["Model"] == "real-world"] print(collections.Counter(df["Model"])) small_avg_degree = df_real[ "Centrality.Degree.Location.Arithmetic Mean"] <= 30 filters = { "all": [True] * len(df_real), "avg-degree-le-30": small_avg_degree, "avg-degree-gt-30": ~small_avg_degree, "socfb": df_real["Type"] == "socfb", "not-socfb": df_real["Type"] != "socfb" } format_str = "{:20}{:>5}" network_models = sorted( set( filter(lambda model: not model.endswith("-second"), set(df["Model"]))) - set(["real-world"])) for filtername, filterdf in sorted(filters.items()): graphs = sorted(df_real[filterdf]["Graph"]) print(format_str.format(filtername, len(graphs))) features_collection = get_all_feature_sets_self_check(df, graphs) sub_df = df.loc(axis=0)[:, graphs, :] accuracies = \ classification_experiment( sub_df, network_models, features_collection, self.cores) accuracies.to_csv(self._stagepath + "accuracies/" + filtername + ".csv", header=True, index_label="features")
def _execute(self): df = dicts_to_df(self.features) df.sort_index(axis=1, inplace=True) format_feature_df(df) network_models = sorted(set(filter(lambda model: not model.endswith("-second"), set(df["Model"])))-set(["real-world"])) diff_features = df.columns[df.dtypes == float].values print("Calculating difference for {} features for {} models...".format(len(diff_features), len(network_models))) idx = pandas.IndexSlice for model in network_models: val_1 = df.loc[idx[:,:,model],diff_features].values val_2 = df.loc[idx[:,:,model+"-second"],diff_features].values df.loc[idx[:,:,model],diff_features] = val_1 - val_2 df.loc[idx[:,:,model+"-second"],diff_features] = val_2 - val_1 print("Done with model {}".format(model)) for a_dict in df.to_dict("records"): self._save_as_csv(a_dict)