Ejemplo n.º 1
0
    def prep_environment(self) -> None:
        """
        Prepare the Python environment
        :return:
        """
        if check_file_exists('./envs/hrg'):
            return

        CP.print_blue('Making virtual environment for HRG')
        sub.run(
            'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;',
            shell=True,
            stdout=sub.DEVNULL)  # create and activate environment
        if 'Linux' not in platform.platform():
            completed_process = sub.run(
                'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        else:
            completed_process = sub.run(
                '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt',
                shell=True,
                stdout=sub.DEVNULL)  # install requirements for cnrg

        assert completed_process.returncode == 0, 'Error while creating environment for HRG'
        return
    def write_fail_stats(self, level) -> None:
        """
        Write fail stats into a csv
        :return:
        """
        fieldnames = ['trial', 'gname', 'model', 'sel', 'gens', 'level']

        fail_file = f'{get_imt_output_directory()}/fail_stats.csv'
        if not check_file_exists(
                fail_file):  # initialize the file with headers
            writer = csv.DictWriter(open(fail_file, 'w'),
                                    fieldnames=fieldnames)
            writer.writeheader()

        with open(fail_file, 'a') as fp:
            writer = csv.DictWriter(fp, fieldnames=fieldnames)
            writer.writerow({
                'trial': self.trial,
                'gname': self.initial_graph.name,
                'model': self.model.model_name,
                'gens': self.num_generations,
                'level': level
            })

        return
    def write_timing_stats(self, time_taken) -> None:
        """
        Write timing stats into a csv
        Write model info and timing info
        :return:
        """
        fieldnames = ['trial', 'gname', 'model', 'sel', 'gens', 'time']

        stats_file = f'{get_imt_output_directory()}/timing_stats.csv'
        if not check_file_exists(
                stats_file):  # initialize the file with headers
            writer = csv.DictWriter(open(stats_file, 'w'),
                                    fieldnames=fieldnames)
            writer.writeheader()

        with open(stats_file, 'a') as fp:
            writer = csv.DictWriter(fp, fieldnames=fieldnames)
            writer.writerow({
                'trial': self.trial,
                'gname': self.initial_graph.name,
                'model': self.model.model_name,
                'gens': self.num_generations,
                'time': time_taken
            })

        return
Ejemplo n.º 4
0
    def _fit(self) -> None:
        dump = f'./src/netgan/dumps'
        gname = f'{self.input_graph.name}_{self.trial}'
        path = f'{dump}/{gname}.g'
        nx.write_edgelist(self.input_graph, path, data=False)

        proc = sub.run(
            f'conda init bash; . ~/.bashrc; conda activate netgan; python src/netgan/fit.py {gname} {path}; conda deactivate',
            shell=True)  # , stderr=sub.DEVNULL)#, stdout=sub.DEVNULL)
        assert proc.returncode == 0, 'NetGAN fit did not work'
        assert check_file_exists(
            f'{dump}/{gname}.pkl.gz'
        ), f'pickle not found at {dump}/{gname}.pkl.gz'
        return
Ejemplo n.º 5
0
def external_orca(g: nx.Graph, gname: str):
    if not isinstance(g, nx.Graph):
        g = nx.Graph(g)  # convert it into a simple graph

    self_loop_edges = list(nx.selfloop_edges(g))
    if len(self_loop_edges) > 0:
        g.remove_edges_from(self_loop_edges)

    if nx.number_connected_components(g) > 1:
        g = g.subgraph(max(nx.connected_components(g), key=len))
    if nx.is_directed(g):
        selfloops = g.selfloop_edges()
        g.remove_edges_from(selfloops)  # removing self-loop edges

    g = nx.convert_node_labels_to_integers(g, first_label=0)

    file_dir = 'src/scratch'
    input_path = f'./{file_dir}/{gname}.in'
    with open(input_path, 'w') as f:
        f.write(f'{g.order()} {g.size()}\n')
        for u, v in g.edges():
            f.write(f'{u} {v}\n')

    args = ['', '4', f'./{file_dir}/{gname}.in', f'./{file_dir}/{gname}.out']

    if 'Windows' in platform.platform():
        args[0] = './src/orca/orca.exe'
    elif 'Linux' in platform.platform():
        args[0] = './src/orca/orca_linux'
    else:
        args[0] = './src/orca/orca_mac'

    process = subprocess.run(' '.join(args),
                             shell=True,
                             stdout=subprocess.DEVNULL)
    if process.returncode != 0:
        print('Error in ORCA')

    output_path = f'./{file_dir}/{gname}.out'
    assert check_file_exists(
        output_path), f'output file @ {output_path} not found in GCD'
    df = pd.read_csv(output_path, sep=' ', header=None)

    # delete both the input and output files
    delete_files(input_path, output_path)

    return df
Ejemplo n.º 6
0
    def _gen(self, gname: str, gen_id: int) -> nx.Graph:
        """
        call KronGen
        """
        orig_n = self.input_graph.order()
        kron_iters = int(
            math.log2(orig_n)
        )  # floor of log2 gives a bound on kronecker iteration count
        if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) -
                                                         orig_n):
            kron_iters += 1

        assert 'initiator_matrix' in self.params, 'Initiator matrix not found'
        matrix = self.params['initiator_matrix']

        output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt'

        if len(matrix) == 0:  # KronFit failed
            CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}'
            completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE)

            if completed_process.returncode != 0 or not check_file_exists(
                    output_file):
                CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"')
                raise Exception('Generation failed!')
            else:
                graph = nx.read_edgelist(output_file,
                                         nodetype=int,
                                         create_using=nx.Graph())
                graph.name = gname

                delete_files(output_file)
        graph.gen_id = gen_id
        return graph
Ejemplo n.º 7
0
    def generate(self, num_graphs: int,
                 gen_id: int) -> Union[List[nx.Graph], None]:
        edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g'
        nx.write_edgelist(self.input_graph, edgelist_path, data=False)
        output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle'

        completed_process = sub.run(
            f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;',
            shell=True,
            stdout=sub.DEVNULL)

        if completed_process.returncode != 0 or not check_file_exists(
                output_pickle_path):
            CP.print_blue(f'Error in HRG: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            generated_graphs = []
            gen_graphs = load_pickle(output_pickle_path)
            if not isinstance(gen_graphs,
                              list) or len(gen_graphs) != num_graphs:
                raise Exception('Generation failed!')

            for i, gen_graph in enumerate(gen_graphs):
                gen_graph = self._make_graph(gen_graph)
                gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}'  # adding the number of graph
                gen_graph.gen_id = gen_id

                generated_graphs.append(gen_graph)

            if not isinstance(generated_graphs,
                              list) or len(generated_graphs) != num_graphs:
                print('HRG failed')
                raise Exception('Generation failed!')

        # delete_files(edgelist_path, output_pickle_path)
        return generated_graphs
Ejemplo n.º 8
0
    def _fit(self) -> None:
        """
        call KronFit
        """
        output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}-fit'

        # write edgelist to the path, but graph needs to start from 1
        g = nx.convert_node_labels_to_integers(self.input_graph,
                                               first_label=1,
                                               label_attribute='old_label')
        directed_g = g.to_directed()  # kronecker expects a directed graph

        edgelist_path = f'./src/kronecker/{self.initial_gname}_{self.trial}.txt'
        nx.write_edgelist(directed_g, edgelist_path, data=False)

        bash_code = f'cd src/kronecker; {self.kronfit_exec} -i:{self.initial_gname}_{self.trial}.txt -o:{self.initial_gname}_{self.trial}-fit -s:50000'
        completed_process = sub.run(bash_code,
                                    shell=True)  # , stdout=sub.PIPE)

        if completed_process.returncode != 0:
            CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        elif not check_file_exists(output_file):
            CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            with open(output_file) as f:
                last_line = f.readlines()[-1]
                last_line = last_line.replace(']', '')
                matrix = last_line[last_line.find('[') + 1:]
            # CP.print_blue('Initiator matrix:', matrix)

        self.params['initiator_matrix'] = matrix
        return
Ejemplo n.º 9
0
    def _gen(self, gname: str, gen_id: int) -> nx.Graph:
        g = self.input_graph

        # fix BTER to use the directory..
        CP.print_blue('Starting BTER...')

        graph_path = f'./src/bter/{g.name}_{self.trial}.mat'
        np.savetxt(graph_path, nx.to_numpy_matrix(g), fmt='%d')

        matlab_code = [
            'mex -largeArrayDims tricnt_mex.c;',
            'mex -largeArrayDims ccperdegest_mex.c;',
            f"G = dlmread('{g.name}_{self.trial}.mat');", 'G = sparse(G);',
            f"graphname = '{g.name}_{self.trial}';", '',
            'nnodes = size(G, 1);', 'nedges = nnz(G) / 2;',
            r"fprintf('nodes: %d edges: %d\n', nnodes, nedges);", '',
            'nd = accumarray(nonzeros(sum(G,2)),1);',
            "maxdegree = find(nd>0,1,'last');",
            r"fprintf('Maximum degree: %d\n', maxdegree);", '',
            '[ccd,gcc] = ccperdeg(G);',
            r"fprintf('Global clustering coefficient: %.2f\n', gcc);", '',
            r"fprintf('Running BTER...\n');", 't1=tic;',
            '[E1,E2] = bter(nd,ccd);', 'toc(t1);',
            r"fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));",
            '',
            "fprintf('Turning edge list into adjacency matrix (including dedup)...');",
            't2=tic;', 'G_bter = bter_edges2graph(E1,E2);', 'toc(t2);',
            r"fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2);",
            '', 'G_bter = full(G_bter);',
            r"dlmwrite('{}_{}_bter.mat', G_bter, ' ');".format(
                g.name, self.trial), 'quit;'
        ]

        matlab_code_filename = f'{g.name}_{self.trial}_code.m'
        matlab_code_path = f'./src/bter/{matlab_code_filename}'

        print('\n'.join(matlab_code), file=open(matlab_code_path, 'w'))

        output_path = f'./src/bter/{g.name}_{self.trial}_bter.mat'

        start_time = time()
        completed_process = sub.run(
            f'cd src/bter; cat {matlab_code_filename} | matlab -nosplash -nodesktop',
            shell=True,
            stdout=sub.DEVNULL,
            stderr=sub.DEVNULL)
        CP.print_blue(f'BTER ran in {round(time() - start_time, 3)} secs')

        if completed_process.returncode != 0 or not check_file_exists(
                output_path):
            CP.print_blue('BTER failed!')
            raise Exception('Generation failed!')

        else:
            bter_mat = np.loadtxt(output_path, dtype=int)
            g_bter = nx.from_numpy_matrix(bter_mat, create_using=nx.Graph())
            g_bter.name = gname

        g_bter.gen_id = gen_id
        delete_files(graph_path, output_path, matlab_code_path)

        return g_bter
    def run(self, use_pickle: bool) -> None:
        """
        New runner - uses list of graphs
        :param use_pickle:
        :return:
        """
        pickle_ext = '.pkl.gz'
        self.graphs = []

        if use_pickle:
            if check_file_exists(self.graphs_pickle_path +
                                 pickle_ext):  # the whole pickle exists
                graphs = load_pickle(self.graphs_pickle_path + pickle_ext)
                #assert len(graphs) == 21, f'Expected 21 graphs, found {len(graphs)}'
                assert len(
                    graphs
                ) == self.num_generations + 1, f'Expected 21 graphs, found {len(graphs)}'
                CP.print_green(
                    f'Using completed pickle at {self.graphs_pickle_path + pickle_ext!r}. Loaded {len(graphs)} graphs'
                )
                return
            else:
                temp_file_pattern = re.compile(
                    f'list_(\d+)_{self.trial}_temp_(\d+).pkl.gz')
                dir_name = '/'.join(self.graphs_pickle_path.split('/')[:-1])

                input_files = [
                    f for f in os.listdir(dir_name)
                    if re.match(temp_file_pattern, f)
                ]
                if len(input_files) > 0:
                    assert len(
                        input_files
                    ) == 1, f'More than one matches found: {input_files}'

                    input_file = input_files[0]
                    total_generations, progress = map(
                        int,
                        temp_file_pattern.fullmatch(input_file).groups())
                    graphs = load_pickle(join(dir_name, input_file))
                    assert len(
                        graphs
                    ) == progress + 1, f'Found {len(graphs)}, expected: {progress}'
                    CP.print_blue(
                        f'Partial pickle found at {input_file!r} trial: {self.trial} progress: {progress}/{total_generations}'
                    )
                    self.graphs = graphs

        remaining_generations = self.num_generations - len(self.graphs)

        tqdm.write(
            f'Running Infinity Mirror on {self.initial_graph.name!r} {self.initial_graph.order(), self.initial_graph.size()} {self.model.model_name!r} {remaining_generations} generations'
        )
        pbar = tqdm(total=remaining_generations,
                    bar_format='{l_bar}{bar}|[{elapsed}<{remaining}]',
                    ncols=50)

        if len(self.graphs) == 0:
            self.initial_graph.level = 0
            self.graphs = [self.initial_graph]
            self.features = [None]

        completed_trial = False
        for i in range(len(self.graphs) - 1, self.num_generations):
            if i == len(self.graphs) - 1:
                curr_graph = self.graphs[-1]  # use the last graph

            level = i + 1
            try:
                fit_time_start = time.perf_counter()
                self.model.update(
                    new_input_graph=curr_graph)  # update the model
                fit_time = time.perf_counter() - fit_time_start
            except Exception as e:
                fit_time = np.nan
                print(f'Model fit failed {e}')
                break

            try:
                gen_time_start = time.perf_counter()
                generated_graphs = self.model.generate(
                    num_graphs=self.num_graphs,
                    gen_id=level)  # generate a new set of graphs
                gen_time = time.perf_counter() - gen_time_start
            except Exception as e:
                gen_time = np.nan
                print(f'Generation failed {e}')
                break

            if self.features:
                self.features.append(self.model.params)
            curr_graph = generated_graphs[
                0]  # we are only generating one graph
            curr_graph.name = f'{self.initial_graph.name}_{level}_{self.trial}'
            curr_graph.gen = level
            self.graphs.append(curr_graph)

            temp_pickle_path = self.graphs_pickle_path + f'_temp_{level}{pickle_ext}'
            prev_temp_pickle_path = self.graphs_pickle_path + f'_temp_{level-1}{pickle_ext}'

            temp_features_path = self.graphs_features_path + f'_temp_{level}{pickle_ext}'
            prev_temp_features_path = self.graphs_features_path + f'_temp_{level-1}{pickle_ext}'

            save_pickle(obj=self.graphs, path=temp_pickle_path)
            save_pickle(obj=self.features, path=temp_features_path)

            delete_files(prev_temp_pickle_path)
            delete_files(prev_temp_features_path)

            self.write_timing_csv(iter_=level,
                                  fit_time=fit_time,
                                  gen_time=gen_time)

            if level == self.num_generations:
                completed_trial = True
            pbar.update(1)
        pbar.close()

        if completed_trial:  # only delete the temp pickle if the trial finishes successfully
            delete_files(
                temp_pickle_path
            )  # delete the temp file if the loop finishes normally
            delete_files(
                temp_features_path
            )  # delete the temp file if the loop finishes normally
            CP.print_green(
                f'List of {len(self.graphs)} Graphs is pickled at "{self.graphs_pickle_path + pickle_ext}"'
            )
            save_pickle(obj=self.graphs,
                        path=self.graphs_pickle_path + pickle_ext)
            save_pickle(obj=self.features,
                        path=self.graphs_features_path + pickle_ext)
        return