def prep_environment(self) -> None: """ Prepare the Python environment :return: """ if check_file_exists('./envs/hrg'): return CP.print_blue('Making virtual environment for HRG') sub.run( 'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;', shell=True, stdout=sub.DEVNULL) # create and activate environment if 'Linux' not in platform.platform(): completed_process = sub.run( 'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg else: completed_process = sub.run( '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg assert completed_process.returncode == 0, 'Error while creating environment for HRG' return
def write_fail_stats(self, level) -> None: """ Write fail stats into a csv :return: """ fieldnames = ['trial', 'gname', 'model', 'sel', 'gens', 'level'] fail_file = f'{get_imt_output_directory()}/fail_stats.csv' if not check_file_exists( fail_file): # initialize the file with headers writer = csv.DictWriter(open(fail_file, 'w'), fieldnames=fieldnames) writer.writeheader() with open(fail_file, 'a') as fp: writer = csv.DictWriter(fp, fieldnames=fieldnames) writer.writerow({ 'trial': self.trial, 'gname': self.initial_graph.name, 'model': self.model.model_name, 'gens': self.num_generations, 'level': level }) return
def write_timing_stats(self, time_taken) -> None: """ Write timing stats into a csv Write model info and timing info :return: """ fieldnames = ['trial', 'gname', 'model', 'sel', 'gens', 'time'] stats_file = f'{get_imt_output_directory()}/timing_stats.csv' if not check_file_exists( stats_file): # initialize the file with headers writer = csv.DictWriter(open(stats_file, 'w'), fieldnames=fieldnames) writer.writeheader() with open(stats_file, 'a') as fp: writer = csv.DictWriter(fp, fieldnames=fieldnames) writer.writerow({ 'trial': self.trial, 'gname': self.initial_graph.name, 'model': self.model.model_name, 'gens': self.num_generations, 'time': time_taken }) return
def _fit(self) -> None: dump = f'./src/netgan/dumps' gname = f'{self.input_graph.name}_{self.trial}' path = f'{dump}/{gname}.g' nx.write_edgelist(self.input_graph, path, data=False) proc = sub.run( f'conda init bash; . ~/.bashrc; conda activate netgan; python src/netgan/fit.py {gname} {path}; conda deactivate', shell=True) # , stderr=sub.DEVNULL)#, stdout=sub.DEVNULL) assert proc.returncode == 0, 'NetGAN fit did not work' assert check_file_exists( f'{dump}/{gname}.pkl.gz' ), f'pickle not found at {dump}/{gname}.pkl.gz' return
def external_orca(g: nx.Graph, gname: str): if not isinstance(g, nx.Graph): g = nx.Graph(g) # convert it into a simple graph self_loop_edges = list(nx.selfloop_edges(g)) if len(self_loop_edges) > 0: g.remove_edges_from(self_loop_edges) if nx.number_connected_components(g) > 1: g = g.subgraph(max(nx.connected_components(g), key=len)) if nx.is_directed(g): selfloops = g.selfloop_edges() g.remove_edges_from(selfloops) # removing self-loop edges g = nx.convert_node_labels_to_integers(g, first_label=0) file_dir = 'src/scratch' input_path = f'./{file_dir}/{gname}.in' with open(input_path, 'w') as f: f.write(f'{g.order()} {g.size()}\n') for u, v in g.edges(): f.write(f'{u} {v}\n') args = ['', '4', f'./{file_dir}/{gname}.in', f'./{file_dir}/{gname}.out'] if 'Windows' in platform.platform(): args[0] = './src/orca/orca.exe' elif 'Linux' in platform.platform(): args[0] = './src/orca/orca_linux' else: args[0] = './src/orca/orca_mac' process = subprocess.run(' '.join(args), shell=True, stdout=subprocess.DEVNULL) if process.returncode != 0: print('Error in ORCA') output_path = f'./{file_dir}/{gname}.out' assert check_file_exists( output_path), f'output file @ {output_path} not found in GCD' df = pd.read_csv(output_path, sep=' ', header=None) # delete both the input and output files delete_files(input_path, output_path) return df
def _gen(self, gname: str, gen_id: int) -> nx.Graph: """ call KronGen """ orig_n = self.input_graph.order() kron_iters = int( math.log2(orig_n) ) # floor of log2 gives a bound on kronecker iteration count if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) - orig_n): kron_iters += 1 assert 'initiator_matrix' in self.params, 'Initiator matrix not found' matrix = self.params['initiator_matrix'] output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt' if len(matrix) == 0: # KronFit failed CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}' completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE) if completed_process.returncode != 0 or not check_file_exists( output_file): CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: graph = nx.read_edgelist(output_file, nodetype=int, create_using=nx.Graph()) graph.name = gname delete_files(output_file) graph.gen_id = gen_id return graph
def generate(self, num_graphs: int, gen_id: int) -> Union[List[nx.Graph], None]: edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g' nx.write_edgelist(self.input_graph, edgelist_path, data=False) output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle' completed_process = sub.run( f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;', shell=True, stdout=sub.DEVNULL) if completed_process.returncode != 0 or not check_file_exists( output_pickle_path): CP.print_blue(f'Error in HRG: "{self.input_graph.name}"') raise Exception('Generation failed!') else: generated_graphs = [] gen_graphs = load_pickle(output_pickle_path) if not isinstance(gen_graphs, list) or len(gen_graphs) != num_graphs: raise Exception('Generation failed!') for i, gen_graph in enumerate(gen_graphs): gen_graph = self._make_graph(gen_graph) gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}' # adding the number of graph gen_graph.gen_id = gen_id generated_graphs.append(gen_graph) if not isinstance(generated_graphs, list) or len(generated_graphs) != num_graphs: print('HRG failed') raise Exception('Generation failed!') # delete_files(edgelist_path, output_pickle_path) return generated_graphs
def _fit(self) -> None: """ call KronFit """ output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}-fit' # write edgelist to the path, but graph needs to start from 1 g = nx.convert_node_labels_to_integers(self.input_graph, first_label=1, label_attribute='old_label') directed_g = g.to_directed() # kronecker expects a directed graph edgelist_path = f'./src/kronecker/{self.initial_gname}_{self.trial}.txt' nx.write_edgelist(directed_g, edgelist_path, data=False) bash_code = f'cd src/kronecker; {self.kronfit_exec} -i:{self.initial_gname}_{self.trial}.txt -o:{self.initial_gname}_{self.trial}-fit -s:50000' completed_process = sub.run(bash_code, shell=True) # , stdout=sub.PIPE) if completed_process.returncode != 0: CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"') raise Exception('Generation failed!') elif not check_file_exists(output_file): CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"') raise Exception('Generation failed!') else: with open(output_file) as f: last_line = f.readlines()[-1] last_line = last_line.replace(']', '') matrix = last_line[last_line.find('[') + 1:] # CP.print_blue('Initiator matrix:', matrix) self.params['initiator_matrix'] = matrix return
def _gen(self, gname: str, gen_id: int) -> nx.Graph: g = self.input_graph # fix BTER to use the directory.. CP.print_blue('Starting BTER...') graph_path = f'./src/bter/{g.name}_{self.trial}.mat' np.savetxt(graph_path, nx.to_numpy_matrix(g), fmt='%d') matlab_code = [ 'mex -largeArrayDims tricnt_mex.c;', 'mex -largeArrayDims ccperdegest_mex.c;', f"G = dlmread('{g.name}_{self.trial}.mat');", 'G = sparse(G);', f"graphname = '{g.name}_{self.trial}';", '', 'nnodes = size(G, 1);', 'nedges = nnz(G) / 2;', r"fprintf('nodes: %d edges: %d\n', nnodes, nedges);", '', 'nd = accumarray(nonzeros(sum(G,2)),1);', "maxdegree = find(nd>0,1,'last');", r"fprintf('Maximum degree: %d\n', maxdegree);", '', '[ccd,gcc] = ccperdeg(G);', r"fprintf('Global clustering coefficient: %.2f\n', gcc);", '', r"fprintf('Running BTER...\n');", 't1=tic;', '[E1,E2] = bter(nd,ccd);', 'toc(t1);', r"fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));", '', "fprintf('Turning edge list into adjacency matrix (including dedup)...');", 't2=tic;', 'G_bter = bter_edges2graph(E1,E2);', 'toc(t2);', r"fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2);", '', 'G_bter = full(G_bter);', r"dlmwrite('{}_{}_bter.mat', G_bter, ' ');".format( g.name, self.trial), 'quit;' ] matlab_code_filename = f'{g.name}_{self.trial}_code.m' matlab_code_path = f'./src/bter/{matlab_code_filename}' print('\n'.join(matlab_code), file=open(matlab_code_path, 'w')) output_path = f'./src/bter/{g.name}_{self.trial}_bter.mat' start_time = time() completed_process = sub.run( f'cd src/bter; cat {matlab_code_filename} | matlab -nosplash -nodesktop', shell=True, stdout=sub.DEVNULL, stderr=sub.DEVNULL) CP.print_blue(f'BTER ran in {round(time() - start_time, 3)} secs') if completed_process.returncode != 0 or not check_file_exists( output_path): CP.print_blue('BTER failed!') raise Exception('Generation failed!') else: bter_mat = np.loadtxt(output_path, dtype=int) g_bter = nx.from_numpy_matrix(bter_mat, create_using=nx.Graph()) g_bter.name = gname g_bter.gen_id = gen_id delete_files(graph_path, output_path, matlab_code_path) return g_bter
def run(self, use_pickle: bool) -> None: """ New runner - uses list of graphs :param use_pickle: :return: """ pickle_ext = '.pkl.gz' self.graphs = [] if use_pickle: if check_file_exists(self.graphs_pickle_path + pickle_ext): # the whole pickle exists graphs = load_pickle(self.graphs_pickle_path + pickle_ext) #assert len(graphs) == 21, f'Expected 21 graphs, found {len(graphs)}' assert len( graphs ) == self.num_generations + 1, f'Expected 21 graphs, found {len(graphs)}' CP.print_green( f'Using completed pickle at {self.graphs_pickle_path + pickle_ext!r}. Loaded {len(graphs)} graphs' ) return else: temp_file_pattern = re.compile( f'list_(\d+)_{self.trial}_temp_(\d+).pkl.gz') dir_name = '/'.join(self.graphs_pickle_path.split('/')[:-1]) input_files = [ f for f in os.listdir(dir_name) if re.match(temp_file_pattern, f) ] if len(input_files) > 0: assert len( input_files ) == 1, f'More than one matches found: {input_files}' input_file = input_files[0] total_generations, progress = map( int, temp_file_pattern.fullmatch(input_file).groups()) graphs = load_pickle(join(dir_name, input_file)) assert len( graphs ) == progress + 1, f'Found {len(graphs)}, expected: {progress}' CP.print_blue( f'Partial pickle found at {input_file!r} trial: {self.trial} progress: {progress}/{total_generations}' ) self.graphs = graphs remaining_generations = self.num_generations - len(self.graphs) tqdm.write( f'Running Infinity Mirror on {self.initial_graph.name!r} {self.initial_graph.order(), self.initial_graph.size()} {self.model.model_name!r} {remaining_generations} generations' ) pbar = tqdm(total=remaining_generations, bar_format='{l_bar}{bar}|[{elapsed}<{remaining}]', ncols=50) if len(self.graphs) == 0: self.initial_graph.level = 0 self.graphs = [self.initial_graph] self.features = [None] completed_trial = False for i in range(len(self.graphs) - 1, self.num_generations): if i == len(self.graphs) - 1: curr_graph = self.graphs[-1] # use the last graph level = i + 1 try: fit_time_start = time.perf_counter() self.model.update( new_input_graph=curr_graph) # update the model fit_time = time.perf_counter() - fit_time_start except Exception as e: fit_time = np.nan print(f'Model fit failed {e}') break try: gen_time_start = time.perf_counter() generated_graphs = self.model.generate( num_graphs=self.num_graphs, gen_id=level) # generate a new set of graphs gen_time = time.perf_counter() - gen_time_start except Exception as e: gen_time = np.nan print(f'Generation failed {e}') break if self.features: self.features.append(self.model.params) curr_graph = generated_graphs[ 0] # we are only generating one graph curr_graph.name = f'{self.initial_graph.name}_{level}_{self.trial}' curr_graph.gen = level self.graphs.append(curr_graph) temp_pickle_path = self.graphs_pickle_path + f'_temp_{level}{pickle_ext}' prev_temp_pickle_path = self.graphs_pickle_path + f'_temp_{level-1}{pickle_ext}' temp_features_path = self.graphs_features_path + f'_temp_{level}{pickle_ext}' prev_temp_features_path = self.graphs_features_path + f'_temp_{level-1}{pickle_ext}' save_pickle(obj=self.graphs, path=temp_pickle_path) save_pickle(obj=self.features, path=temp_features_path) delete_files(prev_temp_pickle_path) delete_files(prev_temp_features_path) self.write_timing_csv(iter_=level, fit_time=fit_time, gen_time=gen_time) if level == self.num_generations: completed_trial = True pbar.update(1) pbar.close() if completed_trial: # only delete the temp pickle if the trial finishes successfully delete_files( temp_pickle_path ) # delete the temp file if the loop finishes normally delete_files( temp_features_path ) # delete the temp file if the loop finishes normally CP.print_green( f'List of {len(self.graphs)} Graphs is pickled at "{self.graphs_pickle_path + pickle_ext}"' ) save_pickle(obj=self.graphs, path=self.graphs_pickle_path + pickle_ext) save_pickle(obj=self.features, path=self.graphs_features_path + pickle_ext) return