def prep_environment(self) -> None:
        """
        Prepare the Python environment
        :return:
        """
        if check_file_exists('./envs/hrg'):
            return

        CP.print_blue('Making virtual environment for HRG')
        sub.run(
            'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;',
            shell=True,
            stdout=sub.DEVNULL)  # create and activate environment
        if 'Linux' not in platform.platform():
            completed_process = sub.run(
                'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        else:
            completed_process = sub.run(
                '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        assert completed_process.returncode == 0, 'Error while creating environment for HRG'
        return
 def __init__(self, input_graph: nx.Graph, trial: int, **kwargs) -> None:
     super().__init__(model_name='BUGGE',
                      input_graph=input_graph,
                      trial=trial)
     self.rule_min = 2
     self.rule_max = 5
     CP.print_blue(
         f'Rule sizes: min: {self.rule_min}, max: {self.rule_max}')
     return
Example #3
0
 def write_stats_pickle(self, base_path: Union[str, Path]):
     """
     write the stats dictionary as a pickle
     :return:
     """
     filename = os.path.join(base_path, 'graph_stats', self.dataset,
                             self.model,
                             f'gs_{self.trial}_{self.iteration}.pkl.gz')
     CP.print_blue(f'Stats pickle stored at {filename}')
     save_pickle(self.stats, filename)
     return
Example #4
0
def make_dirs(output_dir: str, gname: str, model: str) -> None:
    """
    Makes input and output directories if they do not exist already
    :return:
    """
    output_dir = Path(output_dir)
    for dirname in ('pickles', f'pickles/{gname}', f'pickles/{gname}/{model}',
                    'features', f'features/{gname}',
                    f'features/{gname}/{model}'):
        dir_ = output_dir / dirname
        if not dir_.exists():
            CP.print_blue(f'Making dir {dir_!r}')
            os.makedirs(dir_, exist_ok=True)
    return
    def prep_environment(self) -> None:
        proc = sub.run('conda init bash; . ~/.bashrc; conda activate netgan',
                       shell=True,
                       stdout=sub.DEVNULL)
        os.makedirs('./src/netgan/dumps',
                    exist_ok=True)  # make the directory to store the dumps
        if proc.returncode == 0:  # conda environment exists
            return

        CP.print_blue('Making conda environment for NetGAN')
        proc = sub.run('conda env create -f ./envs/netgan.yml',
                       shell=True,
                       stdout=sub.DEVNULL)  # create and activate environment

        assert proc.returncode == 0, 'Error while creating env for NetGAN'
        return
Example #6
0
    def write_stats_jsons(self,
                          stats: Union[str, list],
                          overwrite: bool = False) -> None:
        """
        write the stats dictionary as a compressed json
        :return:
        """
        # standardize incoming type
        if isinstance(stats, str):
            stats = [stats]

        for statistic in stats:
            assert statistic in [
                method_name for method_name in dir(self)
                if callable(getattr(self, method_name))
                and not method_name.startswith('_')
            ]
            output_directory = get_imt_output_directory()

            file_output_directory = os.path.join(output_directory,
                                                 'graph_stats', self.dataset,
                                                 self.model, statistic)
            ensure_dir(file_output_directory, recursive=True)

            filename = os.path.join(
                output_directory, 'graph_stats', self.dataset, self.model,
                statistic, f'gs_{self.trial}_{self.iteration}.json.gz')

            # if the file already exists and overwrite flag is not set, then don't rework.
            if not overwrite and verify_file(filename):
                CP.print_green(
                    f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.'
                )
                return

            try:
                data = self[statistic]  # todo : maybe there's a better way?!
                save_zipped_json(data, filename)
                CP.print_blue(f'Stats json stored at {filename}')
            except Exception as e:
                CP.print_red(f'Exception occurred on {filename}!')
                CP.print_red(str(e))
                if statistic == 'netlsd':
                    save_zipped_json(data, filename + '.failed')
        return
    def _gen(self, gname: str, gen_id: int) -> nx.Graph:
        """
        call KronGen
        """
        orig_n = self.input_graph.order()
        kron_iters = int(
            math.log2(orig_n)
        )  # floor of log2 gives a bound on kronecker iteration count
        if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) -
                                                         orig_n):
            kron_iters += 1

        assert 'initiator_matrix' in self.params, 'Initiator matrix not found'
        matrix = self.params['initiator_matrix']

        output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt'

        if len(matrix) == 0:  # KronFit failed
            CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}'
            completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE)

            if completed_process.returncode != 0 or not check_file_exists(
                    output_file):
                CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
                raise Exception('Generation failed!')
            else:
                graph = nx.read_edgelist(output_file,
                                         nodetype=int,
                                         create_using=nx.Graph())
                graph.name = gname

                delete_files(output_file)
        graph.gen_id = gen_id
        return graph
    def generate(self, num_graphs: int,
                 gen_id: int) -> Union[List[nx.Graph], None]:
        edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g'
        nx.write_edgelist(self.input_graph, edgelist_path, data=False)
        output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle'

        completed_process = sub.run(
            f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;',
            shell=True,
            stdout=sub.DEVNULL)

        if completed_process.returncode != 0 or not check_file_exists(
                output_pickle_path):
            CP.print_blue(f'Error in HRG: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            generated_graphs = []
            gen_graphs = load_pickle(output_pickle_path)
            if not isinstance(gen_graphs,
                              list) or len(gen_graphs) != num_graphs:
                raise Exception('Generation failed!')

            for i, gen_graph in enumerate(gen_graphs):
                gen_graph = self._make_graph(gen_graph)
                gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}'  # adding the number of graph
                gen_graph.gen_id = gen_id

                generated_graphs.append(gen_graph)

            if not isinstance(generated_graphs,
                              list) or len(generated_graphs) != num_graphs:
                print('HRG failed')
                raise Exception('Generation failed!')

        # delete_files(edgelist_path, output_pickle_path)
        return generated_graphs
    def _fit(self) -> None:
        """
        call KronFit
        """
        output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}-fit'

        # write edgelist to the path, but graph needs to start from 1
        g = nx.convert_node_labels_to_integers(self.input_graph,
                                               first_label=1,
                                               label_attribute='old_label')
        directed_g = g.to_directed()  # kronecker expects a directed graph

        edgelist_path = f'./src/kronecker/{self.initial_gname}_{self.trial}.txt'
        nx.write_edgelist(directed_g, edgelist_path, data=False)

        bash_code = f'cd src/kronecker; {self.kronfit_exec} -i:{self.initial_gname}_{self.trial}.txt -o:{self.initial_gname}_{self.trial}-fit -s:50000'
        completed_process = sub.run(bash_code,
                                    shell=True)  # , stdout=sub.PIPE)

        if completed_process.returncode != 0:
            CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        elif not check_file_exists(output_file):
            CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            with open(output_file) as f:
                last_line = f.readlines()[-1]
                last_line = last_line.replace(']', '')
                matrix = last_line[last_line.find('[') + 1:]
            # CP.print_blue('Initiator matrix:', matrix)

        self.params['initiator_matrix'] = matrix
        return
Example #10
0
    def pgd_graphlet_counts(self, n_threads=4) -> Dict:
        """
        Return the dictionary of graphlets and their counts - based on Neville's PGD
        :return:
        """
        pgd_path = Path(get_imt_input_directory()).parent / 'src' / 'PGD'
        graphlet_counts = {}

        if 'Linux' in platform.platform() and (pgd_path / 'pgd_0').exists():
            edgelist = '\n'.join(nx.generate_edgelist(self.graph, data=False))
            edgelist += '\nX'  # add the X
            dummy_path = f'{pgd_path}/dummy.txt'

            try:
                bash_script = f'{pgd_path}/pgd_0 -w {n_threads} -f {dummy_path} -c {dummy_path}'

                #pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True, timeout=30000)
                pipe = sub.run(bash_script,
                               shell=True,
                               capture_output=True,
                               input=edgelist.encode(),
                               check=True)

                output_data = pipe.stdout.decode()

            except sub.TimeoutExpired as e:
                CP.print_blue(f'PGD timeout!{e.stderr}')
                graphlet_counts = {}

            except sub.CalledProcessError as e:
                CP.print_blue(f'PGD error {e.stderr}')
                graphlet_counts = {}
            except Exception as e:
                CP.print_blue(str(e))
                graphlet_counts = {}
            else:  # pgd is successfully run
                for line in output_data.split('\n')[:-1]:  # last line blank
                    graphlet_name, count = map(lambda st: st.strip(),
                                               line.split('='))
                    graphlet_counts[graphlet_name] = int(count)
        else:
            CP.print_red(f'PGD executable not found at {pgd_path}/pgd')
            graphlet_counts = {}
        self.stats['pgd_graphlet_counts'] = graphlet_counts

        return graphlet_counts
    def _gen(self, gname: str, gen_id: int) -> nx.Graph:
        g = self.input_graph

        # fix BTER to use the directory..
        CP.print_blue('Starting BTER...')

        graph_path = f'./src/bter/{g.name}_{self.trial}.mat'
        np.savetxt(graph_path, nx.to_numpy_matrix(g), fmt='%d')

        matlab_code = [
            'mex -largeArrayDims tricnt_mex.c;',
            'mex -largeArrayDims ccperdegest_mex.c;',
            f"G = dlmread('{g.name}_{self.trial}.mat');", 'G = sparse(G);',
            f"graphname = '{g.name}_{self.trial}';", '',
            'nnodes = size(G, 1);', 'nedges = nnz(G) / 2;',
            r"fprintf('nodes: %d edges: %d\n', nnodes, nedges);", '',
            'nd = accumarray(nonzeros(sum(G,2)),1);',
            "maxdegree = find(nd>0,1,'last');",
            r"fprintf('Maximum degree: %d\n', maxdegree);", '',
            '[ccd,gcc] = ccperdeg(G);',
            r"fprintf('Global clustering coefficient: %.2f\n', gcc);", '',
            r"fprintf('Running BTER...\n');", 't1=tic;',
            '[E1,E2] = bter(nd,ccd);', 'toc(t1);',
            r"fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));",
            '',
            "fprintf('Turning edge list into adjacency matrix (including dedup)...');",
            't2=tic;', 'G_bter = bter_edges2graph(E1,E2);', 'toc(t2);',
            r"fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2);",
            '', 'G_bter = full(G_bter);',
            r"dlmwrite('{}_{}_bter.mat', G_bter, ' ');".format(
                g.name, self.trial), 'quit;'
        ]

        matlab_code_filename = f'{g.name}_{self.trial}_code.m'
        matlab_code_path = f'./src/bter/{matlab_code_filename}'

        print('\n'.join(matlab_code), file=open(matlab_code_path, 'w'))

        output_path = f'./src/bter/{g.name}_{self.trial}_bter.mat'

        start_time = time()
        completed_process = sub.run(
            f'cd src/bter; cat {matlab_code_filename} | matlab -nosplash -nodesktop',
            shell=True,
            stdout=sub.DEVNULL,
            stderr=sub.DEVNULL)
        CP.print_blue(f'BTER ran in {round(time() - start_time, 3)} secs')

        if completed_process.returncode != 0 or not check_file_exists(
                output_path):
            CP.print_blue('BTER failed!')
            raise Exception('Generation failed!')

        else:
            bter_mat = np.loadtxt(output_path, dtype=int)
            g_bter = nx.from_numpy_matrix(bter_mat, create_using=nx.Graph())
            g_bter.name = gname

        g_bter.gen_id = gen_id
        delete_files(graph_path, output_path, matlab_code_path)

        return g_bter
    work_pool = mp.Pool(num)

    with mp.Pool(num) as read_pool:
        while filenames or graphs_list:
            if active_reads + pending_work + active_work <= num:
                if filenames:
                    filename = filenames.pop(0)  # take the first item
                    active_reads += 1
                    read_pool.apply_async(load_graph, [filename], callback=read_update)
                    # graphs_list.append(read_update(load_graph(filename)))
                for idx, graph in enumerate(graphs_list):
                    active_work += 1
                    # work_update(parallel_thing(graph))
                    work_pool.apply_async(parallel_thing, [graph], callback=work_update)
                    graphs_list.pop(idx)
                    pending_work -= 1
            else:
                for idx, graph in enumerate(graphs_list):
                    active_work += 1
                    # work_update(parallel_thing(graph))
                    work_pool.apply_async(parallel_thing, [graph], callback=work_update)
                    graphs_list.pop(idx)
                    pending_work -= 1
                ColorPrint.print_blue(f'Sleeping {active_reads}, {pending_work}, {active_work}')
                time.sleep(10)
    # wait until everything is off of the queue
    while active_work > 0:
        time.sleep(num)

    work_pool.close()
    def run(self, use_pickle: bool) -> None:
        """
        New runner - uses list of graphs
        :param use_pickle:
        :return:
        """
        pickle_ext = '.pkl.gz'
        self.graphs = []

        if use_pickle:
            if check_file_exists(self.graphs_pickle_path +
                                 pickle_ext):  # the whole pickle exists
                graphs = load_pickle(self.graphs_pickle_path + pickle_ext)
                #assert len(graphs) == 21, f'Expected 21 graphs, found {len(graphs)}'
                assert len(
                    graphs
                ) == self.num_generations + 1, f'Expected 21 graphs, found {len(graphs)}'
                CP.print_green(
                    f'Using completed pickle at {self.graphs_pickle_path + pickle_ext!r}. Loaded {len(graphs)} graphs'
                )
                return
            else:
                temp_file_pattern = re.compile(
                    f'list_(\d+)_{self.trial}_temp_(\d+).pkl.gz')
                dir_name = '/'.join(self.graphs_pickle_path.split('/')[:-1])

                input_files = [
                    f for f in os.listdir(dir_name)
                    if re.match(temp_file_pattern, f)
                ]
                if len(input_files) > 0:
                    assert len(
                        input_files
                    ) == 1, f'More than one matches found: {input_files}'

                    input_file = input_files[0]
                    total_generations, progress = map(
                        int,
                        temp_file_pattern.fullmatch(input_file).groups())
                    graphs = load_pickle(join(dir_name, input_file))
                    assert len(
                        graphs
                    ) == progress + 1, f'Found {len(graphs)}, expected: {progress}'
                    CP.print_blue(
                        f'Partial pickle found at {input_file!r} trial: {self.trial} progress: {progress}/{total_generations}'
                    )
                    self.graphs = graphs

        remaining_generations = self.num_generations - len(self.graphs)

        tqdm.write(
            f'Running Infinity Mirror on {self.initial_graph.name!r} {self.initial_graph.order(), self.initial_graph.size()} {self.model.model_name!r} {remaining_generations} generations'
        )
        pbar = tqdm(total=remaining_generations,
                    bar_format='{l_bar}{bar}|[{elapsed}<{remaining}]',
                    ncols=50)

        if len(self.graphs) == 0:
            self.initial_graph.level = 0
            self.graphs = [self.initial_graph]
            self.features = [None]

        completed_trial = False
        for i in range(len(self.graphs) - 1, self.num_generations):
            if i == len(self.graphs) - 1:
                curr_graph = self.graphs[-1]  # use the last graph

            level = i + 1
            try:
                fit_time_start = time.perf_counter()
                self.model.update(
                    new_input_graph=curr_graph)  # update the model
                fit_time = time.perf_counter() - fit_time_start
            except Exception as e:
                fit_time = np.nan
                print(f'Model fit failed {e}')
                break

            try:
                gen_time_start = time.perf_counter()
                generated_graphs = self.model.generate(
                    num_graphs=self.num_graphs,
                    gen_id=level)  # generate a new set of graphs
                gen_time = time.perf_counter() - gen_time_start
            except Exception as e:
                gen_time = np.nan
                print(f'Generation failed {e}')
                break

            if self.features:
                self.features.append(self.model.params)
            curr_graph = generated_graphs[
                0]  # we are only generating one graph
            curr_graph.name = f'{self.initial_graph.name}_{level}_{self.trial}'
            curr_graph.gen = level
            self.graphs.append(curr_graph)

            temp_pickle_path = self.graphs_pickle_path + f'_temp_{level}{pickle_ext}'
            prev_temp_pickle_path = self.graphs_pickle_path + f'_temp_{level-1}{pickle_ext}'

            temp_features_path = self.graphs_features_path + f'_temp_{level}{pickle_ext}'
            prev_temp_features_path = self.graphs_features_path + f'_temp_{level-1}{pickle_ext}'

            save_pickle(obj=self.graphs, path=temp_pickle_path)
            save_pickle(obj=self.features, path=temp_features_path)

            delete_files(prev_temp_pickle_path)
            delete_files(prev_temp_features_path)

            self.write_timing_csv(iter_=level,
                                  fit_time=fit_time,
                                  gen_time=gen_time)

            if level == self.num_generations:
                completed_trial = True
            pbar.update(1)
        pbar.close()

        if completed_trial:  # only delete the temp pickle if the trial finishes successfully
            delete_files(
                temp_pickle_path
            )  # delete the temp file if the loop finishes normally
            delete_files(
                temp_features_path
            )  # delete the temp file if the loop finishes normally
            CP.print_green(
                f'List of {len(self.graphs)} Graphs is pickled at "{self.graphs_pickle_path + pickle_ext}"'
            )
            save_pickle(obj=self.graphs,
                        path=self.graphs_pickle_path + pickle_ext)
            save_pickle(obj=self.features,
                        path=self.graphs_features_path + pickle_ext)
        return