def extraction_helper(A, links, g_labels, split_env): with env.begin(write=True, db=split_env) as txn: txn.put('num_graphs'.encode(), (len(links)).to_bytes(int.bit_length(len(links)), byteorder='little')) with mp.Pool(processes=None, initializer=intialize_worker, initargs=(A, params, max_label_value)) as p: args_ = zip(range(len(links)), links, g_labels) for (str_id, datum) in tqdm(p.imap(extract_save_subgraph, args_), total=len(links)): max_n_label['value'] = np.maximum(np.max(datum['n_labels'], axis=0), max_n_label['value']) subgraph_sizes.append(datum['subgraph_size']) enc_ratios.append(datum['enc_ratio']) num_pruned_nodes.append(datum['num_pruned_nodes']) with env.begin(write=True, db=split_env) as txn: txn.put(str_id, serialize(datum))
def get_average_subgraph_size(sample_size, links, A, params): total_size = 0 for (n1, n2, r_label) in links[np.random.choice(len(links), sample_size)]: nodes, n_labels, subgraph_size, enc_ratio, num_pruned_nodes = subgraph_extraction_labeling( (n1, n2), r_label, A, params.hop, params.enclosing_sub_graph, params.max_nodes_per_hop) datum = { 'nodes': nodes, 'r_label': r_label, 'g_label': 0, 'n_labels': n_labels, 'subgraph_size': subgraph_size, 'enc_ratio': enc_ratio, 'num_pruned_nodes': num_pruned_nodes } total_size += len(serialize(datum)) return total_size / sample_size