def convert_huffner(): # Define some directories-of-interest paths original_dir = Path('.') / 'data' / 'original' preprocessed_dir = Path('.') / 'data' / 'preprocessed' # Huffner files we don't preprocess blacklist = ['aa12', 'j12', 'j27'] # Identify the Huffner data data_names = sorted( filter(lambda n: n not in blacklist, names_in_dir(original_dir / 'huffner', '.graph'))) print('Identified {} Huffner files'.format(len(data_names))) # Convert datasets for dataset in data_names: print('Processing', dataset) start_time = time.time() # Process the graph graph = read_huffner(original_dir / 'huffner', dataset) oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, preprocessed_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True total_time = time.time() - start_time print('Preprocessing `{}` took {} seconds'.format( dataset, round(total_time, 1))) # Write the results graph = reset_labels(graph) write_summary(graph, preprocessed_dir / 'summary', 'huffner.csv') write_oct_set(graph, oct_set, preprocessed_dir / 'oct') write_name_lookup(graph, preprocessed_dir / 'lookup') write_edgelist(graph, preprocessed_dir / 'edgelist') write_huffner(graph, preprocessed_dir / 'huffner') write_snap(graph, preprocessed_dir / 'snap') print('Preprocessed Huffner data')
def convert_select_gka(data_names): # Define some directories-of-interest paths original_dir = Path('.') / 'data' / 'original' preprocessed_dir = Path('.') / 'data' / 'preprocessed' # Remove the old statistics CSV if Path(preprocessed_dir / 'summary' / 'gka.csv').is_file(): Path(preprocessed_dir / 'summary' / 'gka.csv').unlink() # Convert datasets for dataset in data_names: print('Processing', dataset) start_time = time.time() # Process the graph graph = read_beasley(original_dir / 'gka', dataset) oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, preprocessed_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True # Write the results total_time = time.time() - start_time print('Preprocessing `{}` took {} seconds'.format( dataset, round(total_time, 1))) graph = reset_labels(graph) write_summary(graph, preprocessed_dir / 'summary', 'gka.csv') write_oct_set(graph, oct_set, preprocessed_dir / 'oct') write_name_lookup(graph, preprocessed_dir / 'lookup') write_edgelist(graph, preprocessed_dir / 'edgelist') write_huffner(graph, preprocessed_dir / 'huffner') write_snap(graph, preprocessed_dir / 'snap') print('Preprocessed GKA data')
def _generate_to(qubo, seed, oct_upper_bound, bias=0.5): """ Given a QUBO, an upper bound on oct, and a bias of bipartite vertices, generate an Erdos-Renyi graph such that oct_upper_bound number of vertices form an OCT set and the remaining vertices are partitioned into partites (left partite set with probability of "bias"). Edges between the partite sets are then removed. """ # Compute parameters needed for ER n = qubo.order() p = qubo.size() / scipy.special.binom(n, 2) # Generate graph graph = nx.erdos_renyi_graph(n=n, p=p, seed=seed) random.seed(seed) # Compute partite sets on the remaining vertices nodes = list(graph.nodes())[oct_upper_bound:] partite1 = set() partite2 = set() for node in nodes: if random.random() < bias: partite1.add(node) else: partite2.add(node) # Remove edges within a partite set for edge in chain(combinations(partite1, 2), combinations(partite2, 2)): if graph.has_edge(*edge): graph.remove_edge(*edge) # Name the graph graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'to', seed) # Sanitize the graph and return graph = reset_labels(graph) return graph
def _sanitize_select_gka(original_dir, sanitized_dir, data_names): for dataset in data_names: # Sanitize the graph and write print('Sanitizing', dataset) graph = read_beasley(original_dir / 'gka', dataset + '.txt') graph = reset_labels(graph) write_edgelist(graph, sanitized_dir / 'edgelist') write_huffner(graph, sanitized_dir / 'huffner') write_snap(graph, sanitized_dir / 'snap') print('Preprocessed GKA data')
def _sanitize_select_beasley(original_dir, sanitized_dir, data_names): """ Sanitize select graphs in the origina/beasley/ directory. """ for dataset in data_names: # Sanitize the graph and write print('Sanitizing', dataset) graph = read_beasley(original_dir / 'beasley', dataset + '.txt') graph = reset_labels(graph) write_edgelist(graph, sanitized_dir / 'edgelist') write_huffner(graph, sanitized_dir / 'huffner') write_snap(graph, sanitized_dir / 'snap') print('Preprocessed Beasley data')
def _generate_cl(qubo, seed): """Generate a Chung-Lu graph that matches a graph's degree distriubtion""" # Compute the parameters needed for CL degree_distribution = sorted([qubo.degree(node) for node in qubo.nodes()]) # Generate graph graph = nx.expected_degree_graph(w=degree_distribution, selfloops=False, seed=seed) # Name the graph graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'cl', seed) # Sanitize the graph and return graph = reset_labels(graph) return graph
def _generate_ba(qubo, seed): """Generate Barabasi-Albert graph such that each new edge has 'edge density' neighbors""" # Compute the parameters needed for BA n = qubo.order() m = math.ceil(qubo.size() / n) # Generate graph graph = nx.barabasi_albert_graph(n=n, m=m, seed=seed) # Name the graph graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'ba', seed) # Sanitize the graph and return graph = reset_labels(graph) return graph
def _generate_er(qubo, seed): """ Given a QUBO, generate an Erdos-Renyi graph matching the number of vertices and edges (in expectation) """ # Compute parameters needed for model n = qubo.order() p = qubo.size() / scipy.special.binom(n, 2) # Generate graph graph = nx.erdos_renyi_graph(n=n, p=p, seed=seed) # Name the graph graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'er', seed) # Sanitize the graph and return graph = reset_labels(graph) return graph
def _sanitize_huffner(original_dir, sanitized_dir): """ Sanitize all graphs in the original/huffner/ directory. """ # Identify the Huffner data data_names = sorted(names_in_dir(original_dir / 'huffner', '.graph')) print('Identified {} Huffner files'.format(len(data_names))) # Convert datasets for dataset in data_names: # Sanitize the graph and write print('Sanitizing', dataset) graph = read_huffner(original_dir / 'huffner', dataset + '.graph') graph = reset_labels(graph) write_edgelist(graph, sanitized_dir / 'edgelist') write_huffner(graph, sanitized_dir / 'huffner') write_snap(graph, sanitized_dir / 'snap') print('Sanitized Huffner data')
def _convert_quantum(data_names): # Define some directories-of-interest paths input_dir = Path('.') / 'data' / 'sanitized' output_dir = Path('.') / 'data' / 'preprocessed' # Remove the old statistics CSV summary_dir = Path(output_dir / 'summary') summary_filename = summary_dir / 'quantum.csv' if summary_filename.is_file(): Path(summary_filename).unlink() else: summary_dir.mkdir(exist_ok=True, parents=True) _write_summary_header(summary_filename) # Convert datasets for dataset in data_names: timestamp = datetime.\ datetime.\ fromtimestamp(time.time()).strftime('%Y/%m/%d-%H:%M:%S:') print('{} Processing {}'.format(timestamp, dataset)) # Process the graph graph = read_edgelist(input_dir / 'edgelist', dataset) graph = reset_labels(graph) graph.graph['original_vertices'] = graph.order() graph.graph['original_edges'] = graph.size() oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, output_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True # Write the results graph = reset_labels(graph) _write_summary(graph, output_dir / 'summary', 'quantum.csv') _write_oct_set(graph, oct_set, output_dir / 'oct') _write_name_lookup(graph, output_dir / 'lookup') write_edgelist(graph, output_dir / 'edgelist') write_huffner(graph, output_dir / 'huffner') write_snap(graph, output_dir / 'snap') print('Finished preprocessing quantum data')
# Obtain the names of the quantum graphs already sanitized datasets = names_in_dir(input_dir, '.edgelist') # Keep only the non-synthetic data datasets = sorted(list(filter(lambda x: '-' not in x, datasets))) # Read in the pre-computed optimal OCT sizes oct_upper_bound = _populate_oct_upper_bound_lookup() # For every dataset and seed, generate a synthetic graph with each model for dataset, seed in product(datasets, args.seeds): print('For {} and seed {}'.format(dataset, seed)) # Generate the sanitized ER random graph print('- Generating Erdos-Renyi') graph = read_edgelist(input_dir, dataset + '.edgelist') er_graph = _generate_er(graph, seed) reset_labels(er_graph) # Write the graph write_edgelist(er_graph, sanitized_dir / 'edgelist') write_huffner(er_graph, sanitized_dir / 'huffner') write_snap(er_graph, sanitized_dir / 'snap') # Generate the sanitized CL random graph print('- Generating Chung-Lu') graph = read_edgelist(input_dir, dataset + '.edgelist') cl_graph = _generate_cl(graph, seed) reset_labels(cl_graph) # Write the graph write_edgelist(cl_graph, sanitized_dir / 'edgelist') write_huffner(cl_graph, sanitized_dir / 'huffner') write_snap(cl_graph, sanitized_dir / 'snap')