def list_clustering(dataset_name): ''' list clustering results associated with the dataset :param dataset_name: name of a dataset ''' path = config.get_result_file_path(dataset_name) return [os.path.basename(u[:-1]) for u in glob.glob(path + "/*/")]
def remove_local(name, rm_graph_data=True, rm_clustering_result=True): ''' remove a local dataset :param name: name of a dataset :param rm_clustering_result: remove local graph data :param rm_clustering_result: remove clustering results associated with the graph. ''' if rm_graph_data: path = config.get_data_file_path(name) utils.remove_if_file_exit(path, is_dir=True) if rm_clustering_result: path = config.get_result_file_path(name) utils.remove_if_file_exit(path, is_dir=True)
def save_result(result): if isinstance(result, Result): result.save() else: filepath = config.get_result_file_path(result['dataname'], result['runname'], create=True) try: fpath = os.path.join(filepath, 'result.txt') with open(fpath, 'wt') as f: json.dump(result, f) except: utils.remove_if_file_exit(fpath, is_dir=False) raise
def load_result(dataname, runname): fpath = os.path.join(config.get_result_file_path(dataname, runname), 'result.txt') with open(fpath, 'rt') as f: j = json.load(f) d = dataset.load_local(dataname) edges = d.get_edges() nodes = set(edges['src']).union(set(edges['dest'])) result_nodes = set() for l in j['clusters'].values(): result_nodes.update(l) missed_nodes = nodes.difference(result_nodes) j['clusters']['-9999'] = list(missed_nodes) return Result(j)
def has_run(self, runame, dsname): fpath = os.path.join( config.get_result_file_path(dsname=dsname, runname=runame), 'result.txt') return utils.file_exists(fpath)
def __init__(self, name=None, description="", groundtruthObj=None, edgesObj=None, directed=False, weighted=False, overide=False, additional_meta=None, is_edge_mirrored=False): assert edgesObj is not None self.name = name self.description = description self.additional_meta = additional_meta self.logger = utils.get_logger("{}:{}".format( type(self).__name__, self.name)) self.directed = directed self.weighted = weighted self.is_edge_mirrored = is_edge_mirrored self.parq_edges = None if name: assert name self.file_edges = config.get_data_file_path(self.name, 'edges.txt') self.file_pajek = config.get_data_file_path(self.name, 'pajek.txt') self.file_hig = config.get_data_file_path(self.name, 'pajek.hig') self.file_scanbin = config.get_data_file_path(self.name, 'scanbin') self.file_anyscan = config.get_data_file_path( self.name, 'anyscan.txt') self.file_snap = config.get_data_file_path(self.name, 'snap.bin') self.file_mcl_mci = config.get_data_file_path(self.name, 'mcl.mci') self.file_mcl_tab = config.get_data_file_path(self.name, 'mcl.tab') self.file_topgc = config.get_data_file_path(self.name, 'topgc.txt') self.file_mirror_edges = config.get_data_file_path( self.name, 'edges_mirror.txt') if self.is_weighted(): self.file_unweighted_edges = self.file_edges else: self.file_unweighted_edges = config.get_data_file_path( self.name, 'unweighted_edges.txt') self.set_ground_truth(groundtruthObj) self.set_edges(edgesObj) if name: is_persistent = self.is_edges_persistent( ) and self.is_ground_truth_persistent() self.home = config.get_data_file_path(name, create=False) if utils.file_exists(self.home): if overide: utils.remove_if_file_exit(self.home, is_dir=True) elif is_persistent: pass else: raise Exception( "Dataset {} exists at {}. Use overide=True or load it locally." .format(name, self.home)) if not is_persistent: utils.remove_if_file_exit(config.get_result_file_path( self.name), is_dir=True) utils.create_dir_if_not_exists(self.home) self.persistent() self.update_meta()
def has_result(dataname, runname): fpath = os.path.join(config.get_result_file_path(dataname, runname), 'result.txt') return utils.file_exists(fpath)