Beispiel #1
0
def get_download_file_path(dsname, fname="", create=False):
    dspath = os.path.join(DOWNLOAD_PATH, dsname)
    if create: utils.create_dir_if_not_exists(dspath)
    if fname:
        return os.path.join(dspath, fname)
    else:
        return dspath
Beispiel #2
0
def get_result_file_path(dsname, runname="", create=False):
    dspath = os.path.join(RESULT_PATH, dsname)
    if create: utils.create_dir_if_not_exists(dspath)
    if runname:
        algpath = os.path.join(dspath, runname)
        if create: utils.create_dir_if_not_exists(algpath)
        return algpath
    else:
        return dspath
 def to_scanbin(self, filepath=None):
     if (filepath == None):
         filepath = self.file_scanbin
         if utils.file_exists(os.path.join(
                 filepath, "b_degree.bin")) and utils.file_exists(
                     os.path.join(filepath, "b_adj.bin")):
             return filepath
         utils.create_dir_if_not_exists(filepath)
     if not utils.file_exists(self.file_edges): self.to_edgelist()
     cmd = "{} {} {} {}".format(config.SCAN_CONVERT_PROG, self.file_edges,
                                "b_degree.bin", "b_adj.bin")
     self.logger.info("running " + cmd)
     status = utils.shell_run_and_wait(cmd, filepath)
     if (status != 0):
         raise Exception("run command failed: " + str(status))
     return filepath
Beispiel #4
0
    def run(self, data, **kwargs):
        if False and (data.is_directed()):
            raise Exception("only undirected is supported")
        params = dict(kwargs)
        params = {k:v for k, v in params.items() if v is not None }
        if not data.is_directed():
            params['Sym'] = 1
        params['d'] = "output"
        if "r" not in params: params['r'] = 0.1

        if not utils.file_exists(data.file_edges):
            data.to_edgelist()
        
        txt_params = " ".join(["-{} {}".format(k, v) for k, v in params.items()]) 
        cmd = "{} -jar {} -i {} {} ".format(utils.get_java_command(), config.GANXISW_PROG, "edges.txt", txt_params)
        with utils.TempDir() as tmp_dir:
            utils.remove_if_file_exit(os.path.join(tmp_dir, "output"), True)
            utils.create_dir_if_not_exists(os.path.join(tmp_dir, "output"))
            self.logger.info("Running " + cmd)
            utils.link_file(data.file_edges, tmp_dir, "edges.txt")
            timecost, status = utils.timeit(lambda: utils.shell_run_and_wait(cmd, tmp_dir))
            if status != 0: 
                raise Exception("Run command with error status code {}".format(status))
            outputfile = glob.glob(os.path.join(tmp_dir, "output/SLPAw*.icpm"))[0]
            clusters = []
            with open (os.path.join(tmp_dir, outputfile), "r") as f:
                for line in f: 
                    clusters.append([int(u) for u in line.strip().split(" ")])
            clusters = dict(enumerate(clusters))
        
        self.logger.info("Made %d clusters in %f seconds" % (len(clusters), timecost))

        result = {}
        result['runname'] = self.name
        result['params'] = params
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['timecost'] = timecost
        result['clusters'] = clusters 

        save_result(result)
        self.result = result 
        return self 
Beispiel #5
0
    def run(self, data, cutoff_r=0.01, inflation_in=2, NBDisimilarity_q=0.3, seed=None):
        if False and (data.is_directed()):
            raise Exception("only undirected is supported")
        if seed is not None:self.logger.info("seed ignored")        
        params = locals();del params['self'];del params['data']

        if not utils.file_exists(data.file_edges):
            data.to_edgelist()
        
        cmd = "{} {} {} {} {}".format(config.LABLE_RANK_PROG, "edges.txt", cutoff_r, inflation_in, NBDisimilarity_q)
        with utils.TempDir() as tmp_dir:
            utils.remove_if_file_exit(os.path.join(tmp_dir, "output"), True)
            utils.create_dir_if_not_exists(os.path.join(tmp_dir, "output"))
            self.logger.info("Running " + cmd)
            utils.link_file(data.file_edges, tmp_dir, "edges.txt")
            timecost, status = utils.timeit(lambda: utils.shell_run_and_wait(cmd, tmp_dir))
            if status != 0: 
                raise Exception("Run command with error status code {}".format(status))
            outputfile = glob.glob(os.path.join(tmp_dir, "output/LabelRank*.icpm"))[0]
            clusters = []
            with open (os.path.join(tmp_dir, outputfile), "r") as f:
                for line in f: 
                    clusters.append([int(u) for u in line.strip().split(" ")])
            clusters = dict(enumerate(clusters))
        
        self.logger.info("Made %d clusters in %f seconds" % (len(clusters), timecost))

        result = {}
        result['runname'] = self.name
        result['params'] = params
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['timecost'] = timecost
        result['clusters'] = clusters 

        save_result(result)
        self.result = result 
        return self 
    def __init__(self,
                 name=None,
                 description="",
                 groundtruthObj=None,
                 edgesObj=None,
                 directed=False,
                 weighted=False,
                 overide=False,
                 additional_meta=None,
                 is_edge_mirrored=False):
        assert edgesObj is not None
        self.name = name
        self.description = description
        self.additional_meta = additional_meta
        self.logger = utils.get_logger("{}:{}".format(
            type(self).__name__, self.name))
        self.directed = directed
        self.weighted = weighted
        self.is_edge_mirrored = is_edge_mirrored

        self.parq_edges = None

        if name:
            assert name
            self.file_edges = config.get_data_file_path(self.name, 'edges.txt')
            self.file_pajek = config.get_data_file_path(self.name, 'pajek.txt')
            self.file_hig = config.get_data_file_path(self.name, 'pajek.hig')
            self.file_scanbin = config.get_data_file_path(self.name, 'scanbin')
            self.file_anyscan = config.get_data_file_path(
                self.name, 'anyscan.txt')
            self.file_snap = config.get_data_file_path(self.name, 'snap.bin')
            self.file_mcl_mci = config.get_data_file_path(self.name, 'mcl.mci')
            self.file_mcl_tab = config.get_data_file_path(self.name, 'mcl.tab')
            self.file_topgc = config.get_data_file_path(self.name, 'topgc.txt')
            self.file_mirror_edges = config.get_data_file_path(
                self.name, 'edges_mirror.txt')

            if self.is_weighted():
                self.file_unweighted_edges = self.file_edges
            else:
                self.file_unweighted_edges = config.get_data_file_path(
                    self.name, 'unweighted_edges.txt')

        self.set_ground_truth(groundtruthObj)
        self.set_edges(edgesObj)

        if name:
            is_persistent = self.is_edges_persistent(
            ) and self.is_ground_truth_persistent()
            self.home = config.get_data_file_path(name, create=False)
            if utils.file_exists(self.home):
                if overide:
                    utils.remove_if_file_exit(self.home, is_dir=True)
                elif is_persistent:
                    pass
                else:
                    raise Exception(
                        "Dataset {} exists at {}. Use overide=True or load it locally."
                        .format(name, self.home))

            if not is_persistent:
                utils.remove_if_file_exit(config.get_result_file_path(
                    self.name),
                                          is_dir=True)
                utils.create_dir_if_not_exists(self.home)
                self.persistent()
                self.update_meta()
Beispiel #7
0
'''
Created on Oct 23, 2018

@author: lizhen
'''

import os
from gct import utils

if 'GCT_DATA' not in os.environ:
    DATA_HOME = os.path.join(os.environ['HOME'], '.gct')
    utils.create_dir_if_not_exists(DATA_HOME)
else:
    DATA_HOME = os.environ['GCT_DATA']

assert DATA_HOME

DATA_PATH = os.path.join(DATA_HOME, "data")
DOWNLOAD_PATH = os.path.join(DATA_HOME, "download")
RESULT_PATH = os.path.join(DATA_HOME, "result")

[
    utils.create_dir_if_not_exists(directory)
    for directory in [DATA_PATH, DOWNLOAD_PATH, RESULT_PATH]
]

if 'GCT_HOME' not in os.environ:
    GCT_HOME = os.path.join(os.environ['HOME'], 'graph_clustering_toolkit')
else:
    GCT_HOME = os.environ['GCT_HOME']