def generate_Erdos_Renyi(name,
                         n_node,
                         n_edge,
                         directed=False,
                         seed=None,
                         overide=False):
    '''
    Generates an Erdos-Renyi random graph of the specified GraphType.
    '''
    if not overide and local_exists(name):
        raise Exception("graph {} exists".format(name))
    else:
        params = {
            "name": "Erdos-Renyi",
            'n_node': n_node,
            "n_edge": n_edge,
            "directed": directed
        }
        description = "Erdos_Renyi random graph"
        weighted = False
        gen = RandomGenerator(params=params)
        edges, gt = gen.generate(seed)
        from .dataset import Dataset
        return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, \
                       directed=directed, weighted=weighted, overide=overide, additional_meta={'genopts': params})
def generate_undirected_unweighted_hier_random_graph_LFR(name, N, k=None, maxk=None, mu1=None, mu2=None, t1=None, t2=None, minc=None, \
                maxc=None, on=None, om=None, minC=None, maxC=None, seed=None, overide=False):
    '''
    Lancichinetti-Fortunato-Radicchi Benchmark geneartor. Original from https://sites.google.com/site/andrealancichinetti/files  
    
    Parameter

        ===========        ==========================================================
            -N              [number of nodes]
            -k              [average degree]
            -maxk           [maximum degree]
            -t1             [minus exponent for the degree sequence]
            -t2             [minus exponent for the community size distribution]
            -minc           [minimum for the micro community sizes]
            -maxc           [maximum for the micro community sizes]
            -on             [number of overlapping nodes]
            -om             [number of memberships of the overlapping nodes]
            -minC           [minimum for the macro community size]
            -maxC           [maximum for the macro community size]
            -mu1            [mixing parameter for the macro communities (see Readme file)]
            -mu2            [mixing parameter for the micro communities (see Readme file)]
        ===========        ==========================================================
    
        -------------------- Examples ---------------------------
        
        Example2:
        ./hbenchmark -f flags.dat
        
        ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1
        

    Reference
        Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118.  
      
    '''
    if not overide and local_exists(name):
        raise Exception("graph {} exists".format(name))
    else:
        params = locals()
        del params['overide']

        description = "LFR random graph"
        directed = False
        weighted = False
        params['directed'] = directed
        params['hier'] = True
        params['weighted'] = weighted
        params['name'] = 'LFR'
        gen = RandomGenerator(params=params)
        edges, gt = gen.generate(seed)
        from .dataset import Dataset
        return Dataset(name,
                       description=description,
                       groundtruthObj=gt,
                       edgesObj=edges,
                       directed=directed,
                       weighted=weighted,
                       overide=overide,
                       additional_meta={'genopts': params})
Example #3
0
def load_sample_dataset(name, overide=False):
    if not overide and local_exists(name):
        return load_local(name)

    else:
        path = os.path.join(config.GCT_HOME, 'data', _DATASET_[name])
        edges = pd.read_csv(path)
        gt = pd.read_csv(path.replace("_edges", '_gt'))
        description = ""
        directed = False
        return convert.from_edgelist(name,
                                     edges,
                                     groundtruth=gt,
                                     directed=directed,
                                     description=description,
                                     overide=overide)
def load_snap_dataset(name, overide=False):
    if not overide and local_exists(name):
        return load_local(name)

    else:
        conf = _DATASET_[name]
        edges = conf.get_edges()
        gt = conf.get_ground_truth()
        description = conf.description
        weighted = conf.weighted
        directed = conf.directed
        return Dataset(name,
                       description=description,
                       groundtruthObj=gt,
                       edgesObj=edges,
                       directed=directed,
                       weighted=weighted,
                       overide=overide)
Example #5
0
    def setUp(self):
        overide = False

        def f(name):
            overide = False
            return random_dataset.generate_ovp_LFR(name,
                                                   N=128,
                                                   k=16,
                                                   maxk=16,
                                                   muw=0.1,
                                                   minc=32,
                                                   beta=1,
                                                   a=0,
                                                   overide=overide)

        name = "test_LFR_unw_und"
        self.graph_unweighted_undirect = self.get_dataset(name, f)
        assert not self.graph_unweighted_undirect.is_directed()
        assert not self.graph_unweighted_undirect.is_weighted()

        def f2(name):
            return random_dataset.generate_ovp_LFR("test_LFR_w_und",
                                                   N=128,
                                                   k=16,
                                                   maxk=16,
                                                   muw=0.1,
                                                   minc=32,
                                                   beta=1,
                                                   weighted=True,
                                                   a=0,
                                                   overide=overide)

        name = "test_LFR_w_und"
        self.graph_weighted_undirect = self.get_dataset(name, f)
        assert not self.graph_weighted_undirect.is_directed()
        assert self.graph_weighted_undirect.is_weighted()

        name = "test_LFR_w_dir"
        if dataset.local_exists(name):
            self.graph_weighted_direct = dataset.load_local(name)
        else:
            self.graph_weighted_direct = random_dataset.generate_ovp_LFR(
                name,
                N=128,
                k=16,
                maxk=16,
                muw=0.1,
                minc=32,
                beta=1,
                weighted=True,
                a=1,
                overide=overide)
            assert self.graph_weighted_direct.is_directed()
            assert self.graph_weighted_direct.is_weighted()

        name = "test_LFR_unw_dir"
        if dataset.local_exists(name):
            self.graph_unweighted_direct = dataset.load_local(name)
        else:
            self.graph_unweighted_direct = random_dataset.generate_ovp_LFR(
                name,
                N=128,
                k=16,
                maxk=16,
                muw=0.1,
                minc=32,
                beta=1,
                weighted=False,
                a=1,
                overide=overide)
            assert self.graph_unweighted_direct.is_directed()
            assert not self.graph_unweighted_direct.is_weighted()

        self.graphs = [
            self.graph_unweighted_undirect, self.graph_weighted_undirect,
            self.graph_weighted_direct, self.graph_unweighted_direct
        ]
Example #6
0
 def get_dataset(self, name, fun):
     if dataset.local_exists(name):
         return dataset.load_local(name)
     else:
         return fun(name)
def generate_directed_weighted_random_graph_LFR(name, N, k=None, maxk=None, mut=None, muw=None, beta=None, t1=None, t2=None, \
                minc=None, maxc=None, on=None, om=None, C=None, seed=None, overide=False):
    '''
    Lancichinetti-Fortunato-Radicchi Benchmark geneartor. Original from https://sites.google.com/site/andrealancichinetti/files  
    
    Parameter:
    
        ===========        ==========================================================
            -N              [number of nodes]
            -k              [average in-degree]
            -maxk           [maximum in-degree]
            -mut            [mixing parameter for the topology]
            -muw            [mixing parameter for the weights]
            -beta           [exponent for the weight distribution]
            -t1             [minus exponent for the degree sequence]
            -t2             [minus exponent for the community size distribution]
            -minc           [minimum for the community sizes]
            -maxc           [maximum for the community sizes]
            -on             [number of overlapping nodes]
            -om             [number of memberships of the overlapping nodes]
        ===========        ==========================================================
    
        -N, -k, -maxk, -muw have to be specified. For the others, the program can use default values:
        
        t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be chosen close to the degree sequence extremes.
        
        If you set a parameter twice, the latter one will be taken.
        
        -------------------- Other options ---------------------------
        
        To have a random network use:    -rand
        
        Using this option will set muw=0, mut=0, and minc=maxc=N, i.e. there will be one only community.
        
        Use option -sup (-inf) if you want to produce a benchmark whose distribution of the ratio of external in-degree/total in-degree is superiorly (inferiorly) bounded by the mixing parameter.
        
        -------------------- Examples ---------------------------
        
        Example1:    ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50
        Example2:    ./benchmark -f flags.dat -t1 3
    

    Reference
        Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118.  
      
    '''
    if not overide and local_exists(name):
        raise Exception("graph {} exists".format(name))
    else:
        params = locals()
        del params['overide']

        description = "LFR random graph"
        directed = True
        weighted = True
        params['directed'] = directed
        params['weighted'] = weighted
        params['name'] = 'LFR'
        gen = RandomGenerator(params=params)
        edges, gt = gen.generate(seed)
        from .dataset import Dataset
        return Dataset(name,
                       description=description,
                       groundtruthObj=gt,
                       edgesObj=edges,
                       directed=directed,
                       weighted=weighted,
                       overide=overide)
def generate_ovp_LFR(name,
                     N,
                     k=None,
                     maxk=None,
                     mut=None,
                     muw=None,
                     beta=None,
                     t1=None,
                     t2=None,
                     minc=None,
                     maxc=None,
                     on=None,
                     om=None,
                     C=None,
                     a=0,
                     weighted=False,
                     seed=None,
                     overide=False):
    '''
    Extended version of the Lancichinetti-Fortunato-Radicchi Benchmark for Weighted Overlapping networks 
    to evaluate clustering algorithms using generated ground-truth communities.
    
    Refer https://github.com/eXascaleInfolab/LFR-Benchmark_UndirWeightOvp

    Parameter
  
      ==========  ===========================================================================
        -N        [number of nodes]
        -k        [average degree]
        -maxk        [maximum degree]
        -mut        [mixing parameter for the topology]
        -muw        [mixing parameter for the weights]
        -beta        [exponent for the weight distribution]
        -t1        [minus exponent for the degree sequence]
        -t2        [minus exponent for the community size distribution]
        -minc        [minimum for the community sizes]
        -maxc        [maximum for the community sizes]
        -on        [number of overlapping nodes]
        -om        [number of memberships of the overlapping nodes]
        -C        [Average clustering coefficient]
        -cnl        [output communities as strings of nodes (input format for NMI evaluation)]
        -name        [base name for the output files]. It is used for the network, communities and statistics; files extensions are added automatically:
                    .nsa  - network, represented by space/tab separated arcs
                    .nse  - network, represented by space/tab separated edges
                    {.cnl, .nmc}  - communities, represented by nodes lists '.cnl' if '-cnl' is used, otherwise as a nodes membership in communities '.nmc')
                    .nst  - network statistics
        -seed        [file name of the random seed, default: seed.txt]
        -a        [{0, 1} yield directed network (1 - arcs) rather than undirected (0 - edges), default: 0 - edges]
      ==========  ===========================================================================
          
    Reference
        Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118.
    '''

    if not overide and local_exists(name):
        raise Exception("graph {} exists".format(name))
    else:
        params = locals()
        del params['overide']
        params['a'] = int(a)
        params['name'] = 'ovp_LFR'
        description = "overlap LFR random graph"
        directed = (a > 0)
        params['directed'] = directed
        gen = RandomGenerator(params=params)
        edges, gt = gen.generate(seed)
        from .dataset import Dataset
        return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, \
                       directed=directed, weighted=weighted, overide=overide, additional_meta={'genopts': params})