def setUp(self): self.method = getConfig().eval(self.__class__.__name__, "method") self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold") self.walk = getConfig().eval(self.__class__.__name__, "random_walks") self.continue_from_log = getConfig().eval(self.__class__.__name__, "continue_from_log")
def __init__(self, db): AbstractController.__init__(self, db) self.db = db self.continue_from_log = getConfig().eval(self.__class__.__name__, "continue_from_log") self.csv_data_path = getConfig().eval(self.__class__.__name__, "csv_data_path") self.corr_method = getConfig().eval(self.__class__.__name__, "corr_function") self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold") self.data_files = [f for f in listdir(self.csv_data_path) if isfile(join(self.csv_data_path, f))] self.corr_mat = {} self.targets = {"dataset_name": [], "target_feature": []}
def setup(self): self.randomWalk_length = getConfig().eval(self.__class__.__name__, 'random_walk_length') self.random_walk_num = getConfig().eval(self.__class__.__name__, 'random_walk_number') self.subGraphs_directory_path = getConfig().eval( self.__class__.__name__, 'subGraphs_directory_path') self.random_walk_directory_path_output = getConfig().eval( self.__class__.__name__, 'random_walk_directory_path_output') self.classifier_files_directory = getConfig().eval( self.__class__.__name__, 'classifier_files_directory') self.statistics_output_path = getConfig().eval( self.__class__.__name__, 'statistics_output_path') self.subGraphs_list = [] self.rw_list_of_graphs_train = [] self.rw_list_of_graphs_train_positive = [] self.rw_list_of_graphs_train_negative = [] self.rw_list_of_graphs_test = [] self.rw_args = json.loads(getConfig().eval(self.__class__.__name__, 'randomwalk_args')) self.rw_extensions = getConfig().eval(self.__class__.__name__, 'rw_extensions').split(",") self.doc2vec_args = json.loads(getConfig().eval( self.__class__.__name__, 'doc2vec_args'))
def __init__(self, db): AbstractController.__init__(self, db) self.target = getConfig().eval(self.__class__.__name__, "target_attr") self.append_new_graphs = getConfig().eval(self.__class__.__name__, "append_new_graphs") self.fields = [ 'graph_name', 'dataset_name', 'connected', 'density', 'Avg_CC', 'Median_deg', 'Variance_deg', 'Avg_degree', 'Median_wights', 'Variance_wights', 'Avg_weight', 'Avg_weight_abs', 'edges', 'nodes', 'self_loops', 'edge_to_node_ratio', 'Num_of_zero_weights', 'negative_edges', 'min_vc', 'target' ] self.db = db
def __init__(self, db): AbstractController.__init__(self, db) self.target = getConfig().eval(self.__class__.__name__, "target_attr") self.append_new_graphs = getConfig().eval(self.__class__.__name__, "append_new_graphs") self.fields = ['graph_name', 'dataset_name', 'global_avg_betweenness', 'global_var_betweenness', 'global_average_edge_weight', 'global_var_average_edge_weight', 'global_avg_degree', 'global_var_degree', 'global_avg_authority', 'global_var_authority', 'global_avg_hub', 'global_var_hub', 'Avg_degree', 'Median_wights', 'Variance_wights', 'Avg_weight', 'Avg_weight_abs', 'edges', 'nodes', 'edge_to_node_ratio', 'Num_of_zero_weights', 'negative_edges', 'target'] self.db = db
def setUp(self): print('setting up test_module') self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.target = getConfig().eval(self.__class__.__name__, "target") self.xgb_model_loaded = pickle.load( open('data/RF_regression_model.dat', "rb")) population = pd.read_csv(self.dataset).columns self.source_inds = list(population) self.population = list() for ind in population: l = list() for ind2 in population: n = int(len(population) / 10) l.append(random.sample(self.source_inds, n)) self.population.append(l)
def setUp(self): # configInst = getConfig() self._date = getConfig().eval(self.__class__.__name__, "start_date") # self._pathToEngine = configInst.get(self.__class__.__name__, "DB_path") + \ # configInst.get(self.__class__.__name__, "DB_name_prefix") + \ # configInst.get(self.__class__.__name__, "DB_name_suffix") # if configInst.eval(self.__class__.__name__, "remove_on_setup"): # self.deleteDB() # # self.engine = create_engine("sqlite:///" + self._pathToEngine, echo=False) # self.Session = sessionmaker() # self.Session.configure(bind=self.engine) # # self.session = self.Session() # @event.listens_for(self.engine, "connect") # def connect(dbapi_connection, connection_rec): # dbapi_connection.enable_load_extension(True) # dbapi_connection.execute( # 'SELECT load_extension("{0}{1}")'.format(configInst.get("DB", "DB_path_to_extension"), '.dll')) # # dbapi_connection.enable_load_extension(False) # # if getConfig().eval(self.__class__.__name__, "dropall_on_setup"): # Base.metadata.drop_all(self.engine) # # Base.metadata.create_all(self.engine) pass
def setUp(self): self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.exclude_log = getConfig().eval(self.__class__.__name__, "exclude_log") self.exclude_table_list = getConfig().eval(self.__class__.__name__, "exclude_table_list") self.labels = [] self.datasets_dfs = dict() for file in listdir(join('data', 'dataset_in')): print('loading ', file) with open('data/dataset_out/target_features.csv', newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: if row[0] == str(file.split('.')[0])+"_corr_graph": target_feature = row[1] df = pd.read_csv(join('data', 'dataset_in', file)) if type(df[target_feature]) != 'float64' and type(df[target_feature].dtype) != 'int64': df[target_feature] = df[target_feature].astype('category').cat.codes self.datasets_dfs[file.split('.')[0]] = (df, target_feature)
def setUp(self): self.max_depth = getConfig().eval(self.__class__.__name__, "max_depth") self.eta = getConfig().eval(self.__class__.__name__, "eta") self.silent = getConfig().eval(self.__class__.__name__, "silent") self.objective_multi = getConfig().eval(self.__class__.__name__, "objective_multi") self.objective_binary = getConfig().eval(self.__class__.__name__, "objective_binary") self.nthread = getConfig().eval(self.__class__.__name__, "nthread") self.epochs = getConfig().eval(self.__class__.__name__, "epochs") self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.exclude_table_list = getConfig().eval(self.__class__.__name__, "exclude_table_list") self.labels = []
def setUp(self): self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold") self.target = getConfig().eval(self.__class__.__name__, "target") self.early_stop = getConfig().eval(self.__class__.__name__, "early_stop") self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.corr_method = getConfig().eval(self.__class__.__name__, "corr_method") self.model_path = getConfig().eval(self.__class__.__name__, "model_path") self.target_att = getConfig().eval(self.__class__.__name__, "target_att") self.corr_method = getattr(corr_calc, self.corr_method) df = pd.read_csv(self.dataset) self.data = df.copy() if self.target_att in df.columns: features_df = df.drop(self.target_att, axis=1) else: features_df = df.drop(df.columns[-1], axis=1) self.target_att = df.columns[-1] self.corr_mat = self.corr_method(features_df) self.features = list(features_df.columns) self.model = pickle.load(open(self.model_path, 'rb')) features_df = self.corr_mat.set_index(self.corr_mat.columns) self.full_graph = nx.from_pandas_adjacency(features_df) print('calculating invalid edges...') egdes_to_remove = [ edge for edge in self.full_graph.edges if abs(self.full_graph[ edge[0]][edge[1]]['weight']) > self.corr_threshold ] print('removing edges...') self.full_graph.remove_edges_from(egdes_to_remove)
def setUp(self): self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.feature_set_in = getConfig().eval(self.__class__.__name__, "feature_set_in") self.results_out = getConfig().eval(self.__class__.__name__, "results_out") self.model_path = getConfig().eval(self.__class__.__name__, "model_path") self.path_to_truth = getConfig().eval(self.__class__.__name__, "path_to_truth") self.eval_only = getConfig().eval(self.__class__.__name__, "eval_only") self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold")
def randomWalk(self): self.main_graph = nx.read_gml( self.getPath(relative_path=getConfig().eval( self.__class__.__name__, 'main_graph_path'))) random_walk_object = rw.RandomWalk( threshold=self.randomWalk_length, number_of_graphs=self.random_walk_num, args=self.rw_args, extensions=self.rw_extensions, main_graph=self.main_graph) for k in self.subGraphs_list: g = k[1] g.graph["name"] = k[0] if g.graph['type'] == "trainset": self.rw_list_of_graphs_train = random_walk_object.insertGraphToSet( list_of_graphs=self.rw_list_of_graphs_train, graph=g)
def __init__(self, db): AbstractController.__init__(self, db) self.db = db self.iterations = getConfig().eval(self.__class__.__name__, "iterations") self.dimensions = getConfig().eval(self.__class__.__name__, "dimensions") self.windowSize = getConfig().eval(self.__class__.__name__, "windowSize") self.dm = getConfig().eval(self.__class__.__name__, "dm") self.walkLength = getConfig().eval(self.__class__.__name__, "walkLength") self.embedding_type = getConfig().eval(self.__class__.__name__, "embedding_type") self.att = getConfig().eval(self.__class__.__name__, "attribute")
def setUp(self): self.continue_from_log = getConfig().eval(self.__class__.__name__, "continue_from_log") self.clear_existing_subgraphs = getConfig().eval( self.__class__.__name__, "clear_existing_subgraphs") self.save_path = getConfig().eval(self.__class__.__name__, "save_path") self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold") self.input_folder = getConfig().eval(self.__class__.__name__, "input_folder") self.output_folder = getConfig().eval(self.__class__.__name__, "output_folder")
def __init__(self, db): self.db = db self.data_path = getConfig().eval(self.__class__.__name__, "data") self.out_path = getConfig().eval(self.__class__.__name__, "out")
def setUp(self): self.dataset = getConfig().eval(self.__class__.__name__, "dataset") self.target_att = getConfig().eval(self.__class__.__name__, "target_att") self.out = getConfig().eval(self.__class__.__name__, "out") self.test_att = getConfig().eval(self.__class__.__name__, "test_att")
def setUp(self): self.data_path = getConfig().eval(self.__class__.__name__, "data") self.model = getConfig().eval(self.__class__.__name__, "model")
def setUp(self): self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold") pass
def setUp(self): self.path = getConfig().eval(self.__class__.__name__, "path")
def __init__(self): self.data_path = getConfig().eval(self.__class__.__name__, "csv_path") self.pysqldf = lambda q: sqldf(q, self.data_path) self.is_csv = getConfig().eval(self.__class__.__name__, "is_csv")
modules_dict['random_walk'] = random_walk modules_dict['structural_feature_extraction'] = structural_feature_extraction modules_dict['xgboost_generator'] = xgboost_generator modules_dict['Decision_Tree'] = Decision_Tree modules_dict['global_local_fs'] = global_local_fs modules_dict['full_graph_fs'] = full_graph_fs modules_dict['sub2vec'] = sub2vec modules_dict['RandomForestReg'] = RandomForestReg modules_dict['XgboostRegression'] = XgboostRegression modules_dict['GA_Feature_Selection'] = GA_Feature_Selection modules_dict['test_dataset_cross_validation'] = test_dataset_cross_validation modules_dict['challenge_prediction'] = challenge_prediction modules_dict['simulated_annealing_feature_selection'] = simulated_annealing_feature_selection modules_dict['benchmark'] = benchmark window_start = getConfig().eval("DEFAULT", "start_date") disable_prints = getConfig().eval("DEFAULT", "disable_prints") if disable_prints: sys.stdout = open(os.devnull, 'w') newbmrk = os.path.isfile("benchmark.csv") bmrk_file = open("benchmark.csv", 'a', newline='') bmrk_results = csv.DictWriter(bmrk_file, ["time", "jobnumber", "config", "window_size", "window_start", "dones", "posts", "authors"] + list(modules_dict.keys()), dialect="excel", lineterminator="\n") if not newbmrk: bmrk_results.writeheader() modules_dict["DB"] = lambda x: x pipeline = [] for module in getConfig().sections():
def setUp(self): self.dataset_table = getConfig().eval(self.__class__.__name__, "dataset_table")