def prep_environment(self) -> None:
        """
        Prepare the Python environment
        :return:
        """
        if check_file_exists('./envs/hrg'):
            return

        CP.print_blue('Making virtual environment for HRG')
        sub.run(
            'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;',
            shell=True,
            stdout=sub.DEVNULL)  # create and activate environment
        if 'Linux' not in platform.platform():
            completed_process = sub.run(
                'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        else:
            completed_process = sub.run(
                '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        assert completed_process.returncode == 0, 'Error while creating environment for HRG'
        return
Exemple #2
0
def parallel_computation(input_path, dataset, model):

    path = os.path.join(input_path, dataset, model)
    input_filenames = [f for f in listdir(path) if isfile(join(path, f))]

    number_of_files = len(input_filenames)
    n_threads = 2

    pbar_inner = tqdm(number_of_files)

    def pbar_update(result):
        pbar_inner.update()
        pbar_inner.set_postfix_str(result)

    # for idx in range(number_of_files):
    #     sublevel_parallel_computation(p_arg[0],p_arg[1],p_arg[2], idx)

    asyncResults = []
    with mp.Pool(n_threads) as innerPool:
        ColorPrint.print_green(
            f"Starting Pool with {n_threads} threads with {len(parallel_args)} tasks."
        )
        for idx in range(number_of_files):
            r = innerPool.apply_async(sublevel_parallel_computation,
                                      [input_path, dataset, model, idx],
                                      callback=pbar_update)
            asyncResults.append(r)
        for r in asyncResults:
            try:
                r.wait()
            except:
                continue

    return model, dataset
Exemple #3
0
    def diameter(self) -> float:
        CP.print_none('Calculating Diameter')

        diam = nx.diameter(self.graph)
        self.stats['diameter'] = diam

        return diam
Exemple #4
0
    def k_hop_reach(self) -> np.array:
        """
        Returns the average number of nodes reachable from any node in k-hops
        Two levels of aggregation:
            1. _k_hop_reachability gives the absolute count of nodes reachable within a k-hops from a node
            2. overall_k_hop_dict aggregates the sum of all absolute counts for all nodes
        Normalizing factor: n ** 2 (once for each step)
        Then convert to a cumulative distribution
        :return:
        """
        CP.print_none('Calculating hop-plot')

        overall_k_hop_dict = Counter()

        for node in self.graph.nodes():
            k_hop_dict = self._k_hop_reachability_counter(node)
            overall_k_hop_dict += Counter(k_hop_dict)

        k_hop_vec = np.array([
            v
            for k, v in sorted(overall_k_hop_dict.items(), key=lambda x: x[0])
        ])
        k_hop_vec = k_hop_vec / (self.graph.order()**2)

        self.stats['k_hop_reach'] = np.cumsum(k_hop_vec)

        return self.stats['k_hop_reach']
Exemple #5
0
    def clustering_coefficients_by_degree(self) -> Dict[int, float]:
        """
        Returns the average clustering coefficient by degree
        :return:
        """
        CP.print_none('Calculating Clustering Coefficients and CC by degree')

        clustering_coeffs = nx.clustering(self.graph)
        self.stats['clustering_coeffs'] = clustering_coeffs

        clustering_by_degree = {}  # clustering per degree

        # get the sums
        for node, cc in clustering_coeffs.items():
            deg = self.graph.degree[node]
            if deg not in clustering_by_degree:
                clustering_by_degree[deg] = []
            clustering_by_degree[deg].append(cc)

        avg_clustering_by_degree = {
            deg: np.mean(ccs)
            for deg, ccs in clustering_by_degree.items()
        }
        self.stats[
            'clustering_coefficients_by_degree'] = avg_clustering_by_degree

        return avg_clustering_by_degree
 def __init__(self, input_graph: nx.Graph, trial: int, **kwargs) -> None:
     super().__init__(model_name='BUGGE',
                      input_graph=input_graph,
                      trial=trial)
     self.rule_min = 2
     self.rule_max = 5
     CP.print_blue(
         f'Rule sizes: min: {self.rule_min}, max: {self.rule_max}')
     return
Exemple #7
0
    def degree_centrality(self) -> Dict[int, float]:
        """
        Degree centrality
        """
        CP.print_none('Calculating Degree Centrality')

        degree_centrality = nx.degree_centrality(self.graph)
        self.stats['degree_centrality'] = degree_centrality

        return degree_centrality
Exemple #8
0
 def seq95d(a):
     a = a.values
     result = st.t.interval(0.95,
                            len(a) - 1,
                            loc=np.mean(a),
                            scale=st.sem(a))[0]
     if np.isnan(result):
         ColorPrint.print_red(f'CI failed on array {a}')
         return a[0]
     return result
Exemple #9
0
    def closeness_centrality(self) -> Dict[int, float]:
        """
        Closeness centrality
        """
        CP.print_none('Calculating Closeness Centrality')

        closeness = nx.closeness_centrality(self.graph)
        self.stats['closeness_centrality'] = closeness

        return closeness
Exemple #10
0
    def adj_eigenvalues(self):
        """
        Returns the eigenvalues of the Adjacency matrix
        :return:
        """
        CP.print_none('Calculating eigenvalues of Adjacency Matrix')

        adj_eigenvalues = nx.adjacency_spectrum(self.graph)
        self.stats['adj_eigenvalues'] = adj_eigenvalues

        return adj_eigenvalues
Exemple #11
0
    def pagerank(self) -> Dict[int, float]:
        """
        PageRank centrality
        """
        CP.print_none('Calculating PageRank')

        pagerank = nx.pagerank_scipy(self.graph)
        pagerank = {int(k): v for k, v in pagerank.items()}
        self.stats['pagerank'] = pagerank

        return pagerank
    def update(self, new_input_graph: nx.Graph) -> None:
        """
        Update the model to (a) update the input graph, (b) fit the parameters
        :return:
        """
        CP.print_none('Updating graph')

        self.input_graph = new_input_graph
        self._fit()  # re-fit the parameters

        return
Exemple #13
0
 def abs95u(a):
     a = a.values
     result = st.t.interval(0.95,
                            len(a) - 1,
                            loc=np.mean(a),
                            scale=st.sem(a))[1]
     if np.isnan(result):
         ColorPrint.print_red(
             f'CI failed on array {a} with type {type(a)}')
         return a[0]
     return result
Exemple #14
0
    def assortativity(self) -> float:
        """
        Returns the assortativity of the network
        :return:
        """
        CP.print_none('Calculating Degree Assortativity')

        assortativity = nx.degree_assortativity_coefficient(self.graph)
        self.stats['assortativity'] = assortativity

        return assortativity
Exemple #15
0
def main():
    df_path = './dataframes/'
    for subdir, dirs, files in os.walk(df_path):
        for filename in files:
            if filename.split('.')[-1] == 'csv':
                path = os.path.join(df_path, filename)
                print(filename)
                latex_printer(path)
            else:
                ColorPrint.print_red(f'CAUTION: Skipped {filename}')
    return
Exemple #16
0
 def write_stats_pickle(self, base_path: Union[str, Path]):
     """
     write the stats dictionary as a pickle
     :return:
     """
     filename = os.path.join(base_path, 'graph_stats', self.dataset,
                             self.model,
                             f'gs_{self.trial}_{self.iteration}.pkl.gz')
     CP.print_blue(f'Stats pickle stored at {filename}')
     save_pickle(self.stats, filename)
     return
def get_filenames(base_path, dataset, models):
    filenames = []
    for model in models:
        path = os.path.join(base_path, dataset, model)
        for subdir, dirs, files in os.walk(path):
            for filename in files:
                if 'seq' not in filename and 'rob' not in filename:
                    # print(f'loading {filename}')
                    filenames.append(os.path.join(subdir, filename))
                    # yield load_pickle(os.path.join(subdir, filename))
    ColorPrint.print_bold(f"Found {len(filenames)} graph files to be loaded.")
    return filenames
    def _fit(self) -> None:
        from src.netgan.fit import fit
        sparse_adj = nx.to_scipy_sparse_matrix(self.input_graph)
        try:
            scores, tg_sum = fit(sparse_adj)
        except Exception as e:
            CP.print_orange(f'NetGAN fit failed\n{e}')
            scores, tg_sum = None, None

        self.params['scores'] = scores
        self.params['tg_sum'] = tg_sum

        return
Exemple #19
0
def main() -> None:
    args = parse_args()
    num_jobs, num_trials = int(args.cores[0]), int(args.trials[0])

    CP.print_green(
        f'Running infinity mirror on {num_jobs} cores for {num_trials} trials')
    # print(args)
    # exit(1)
    Parallel(n_jobs=num_jobs, backend='multiprocessing')(
        delayed(run_infinity_mirror)(trial=i + 1, args=args)
        for i in range(num_trials))

    return
Exemple #20
0
def make_dirs(output_dir: str, gname: str, model: str) -> None:
    """
    Makes input and output directories if they do not exist already
    :return:
    """
    output_dir = Path(output_dir)
    for dirname in ('pickles', f'pickles/{gname}', f'pickles/{gname}/{model}',
                    'features', f'features/{gname}',
                    f'features/{gname}/{model}'):
        dir_ = output_dir / dirname
        if not dir_.exists():
            CP.print_blue(f'Making dir {dir_!r}')
            os.makedirs(dir_, exist_ok=True)
    return
Exemple #21
0
    def component_size_distribution(self) -> List[Tuple[int, float]]:
        """
        Returns the distribution of component sizes and fraction of nodes in each component, largest first
        :return:
        """
        CP.print_none('Calculating Component Size Distribution')

        component_size_ratio_list = [
            (len(c), len(c) / self.graph.order()) for c in sorted(
                nx.connected_components(self.graph), key=len, reverse=True)
        ]
        self.stats['component_size_distribution'] = component_size_ratio_list

        return component_size_ratio_list
 def _gen(self, gname: str, gen_id: int) -> nx.Graph:
     from src.netgan.netgan.utils import graph_from_scores
     assert 'scores' in self.params
     assert 'tg_sum' in self.params
     if self.params['scores'] is None or self.params['tg_sum'] is None:
         CP.print_orange('NetGAN gen failed')
         raise Exception('Generation failed!')
     else:
         gen_mat = graph_from_scores(self.params['scores'],
                                     self.params['tg_sum'])
         g = nx.from_numpy_array(gen_mat, create_using=nx.Graph())
         g.name = gname
     g.gen_id = gen_id
     return g
    def prep_environment(self) -> None:
        proc = sub.run('conda init bash; . ~/.bashrc; conda activate netgan',
                       shell=True,
                       stdout=sub.DEVNULL)
        os.makedirs('./src/netgan/dumps',
                    exist_ok=True)  # make the directory to store the dumps
        if proc.returncode == 0:  # conda environment exists
            return

        CP.print_blue('Making conda environment for NetGAN')
        proc = sub.run('conda env create -f ./envs/netgan.yml',
                       shell=True,
                       stdout=sub.DEVNULL)  # create and activate environment

        assert proc.returncode == 0, 'Error while creating env for NetGAN'
        return
Exemple #24
0
    def laplacian_eigenvalues(self) -> np.array:
        """
        Returns eigenvalues of the Laplacian
        :return:
        """
        CP.print_none('Calculating Laplacian Eigenvalues')
        if self.graph.order() == 0 or self.graph.size() == 0:
            CP.print_orange(
                f'Graph has {self.graph.order()} nodes and {self.graph.size()} edges!'
            )
            laplacian_eigs = []
        else:
            laplacian_eigs = nx.laplacian_spectrum(self.graph)
        self.stats['laplacian_eigenvalues'] = laplacian_eigs

        return laplacian_eigs
Exemple #25
0
    def _calculate_all_stats(self):
        """
        Calculate all stats
        """
        CP.print_orange('Calculating all stats')

        object_methods = [
            method_name for method_name in dir(self)
            if callable(getattr(self, method_name))
            and not method_name.startswith('_')
        ]

        for method in object_methods:
            method = getattr(self, method)
            try:
                method()
            except NotImplementedError as e:
                pass
Exemple #26
0
    def degree_dist(self, normalized=True) -> Dict[int, float]:
        """
        Returns the degrees counter - keys: degrees, values: #nodes with that degree
        :return:
        """
        CP.print_none('Calculating Degree Distribution')

        degree_seq = sorted(deg for _, deg in self.graph.degree())
        self.stats['degree_seq'] = degree_seq

        degree_counts = Counter(degree_seq)

        if normalized:
            for deg, count in degree_counts.items():
                degree_counts[deg] /= self.graph.order()

        self.stats['degree_dist'] = dict(degree_counts)
        return dict(degree_counts)
Exemple #27
0
    def __getitem__(self, item):
        """
        Allows square bracket indexing for stats - allow for some fuzzy matching
        """
        if item in self.stats:  # the stat has already been calculated
            return self.stats[item]

        # try some fuzzy matching to figure out the function to call based on the item
        object_methods = [
            method_name for method_name in dir(self)
            if callable(getattr(self, method_name))
            and not method_name.startswith('_')
        ]

        best_match_func = ''
        best_match_score = float('inf')

        for method in object_methods:
            dist = ed.eval(method, item)
            if dist == 0:
                best_match_score = dist
                best_match_func = method
                break

            if dist < best_match_score:
                best_match_score = dist
                best_match_func = method

        assert best_match_func != '', 'edit distance did not work'
        item = best_match_func
        if best_match_score != 0:
            CP.print_orange(
                f'Best matching function found for "{item}": "{best_match_func}()", edit distance: {best_match_score}'
            )

        if best_match_func not in self.stats:
            best_match_func = getattr(
                self, best_match_func
            )  # translates best_match_fun from string to a function object
            best_match_func()  # call the best match function

        assert item in self.stats, f'stat: {item} is not updated after function call'
        return self.stats[item]
def stats_computation(dataset, model, trial, filename, stats):
    path = Path(
        get_imt_output_directory()) / 'pickles' / dataset / model / filename
    graph_list = load_pickle(path)
    assert isinstance(
        graph_list,
        list), f'Expected type "list" and got type {type(graph_list)}.'
    assert all(isinstance(g, nx.Graph) for g in graph_list
               ), f'Expected a list of nx.Graph and got disappointed instead.'

    ColorPrint.print_orange(f'{filename} has length {len(graph_list)}')

    for idx, G in enumerate(graph_list):
        gs_obj = GraphStats(graph=G,
                            dataset=dataset,
                            model=model,
                            trial=trial,
                            iteration=idx)
        gs_obj.write_stats_jsons(stats=stats)

    return None
    def _gen(self, gname: str, gen_id: int) -> nx.Graph:
        """
        call KronGen
        """
        orig_n = self.input_graph.order()
        kron_iters = int(
            math.log2(orig_n)
        )  # floor of log2 gives a bound on kronecker iteration count
        if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) -
                                                         orig_n):
            kron_iters += 1

        assert 'initiator_matrix' in self.params, 'Initiator matrix not found'
        matrix = self.params['initiator_matrix']

        output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt'

        if len(matrix) == 0:  # KronFit failed
            CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}'
            completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE)

            if completed_process.returncode != 0 or not check_file_exists(
                    output_file):
                CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
                raise Exception('Generation failed!')
            else:
                graph = nx.read_edgelist(output_file,
                                         nodetype=int,
                                         create_using=nx.Graph())
                graph.name = gname

                delete_files(output_file)
        graph.gen_id = gen_id
        return graph
    def generate(self, num_graphs: int,
                 gen_id: int) -> Union[List[nx.Graph], None]:
        edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g'
        nx.write_edgelist(self.input_graph, edgelist_path, data=False)
        output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle'

        completed_process = sub.run(
            f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;',
            shell=True,
            stdout=sub.DEVNULL)

        if completed_process.returncode != 0 or not check_file_exists(
                output_pickle_path):
            CP.print_blue(f'Error in HRG: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            generated_graphs = []
            gen_graphs = load_pickle(output_pickle_path)
            if not isinstance(gen_graphs,
                              list) or len(gen_graphs) != num_graphs:
                raise Exception('Generation failed!')

            for i, gen_graph in enumerate(gen_graphs):
                gen_graph = self._make_graph(gen_graph)
                gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}'  # adding the number of graph
                gen_graph.gen_id = gen_id

                generated_graphs.append(gen_graph)

            if not isinstance(generated_graphs,
                              list) or len(generated_graphs) != num_graphs:
                print('HRG failed')
                raise Exception('Generation failed!')

        # delete_files(edgelist_path, output_pickle_path)
        return generated_graphs