def partial_k_edge_augmentation(G, k, avail, weight=None): """Finds augmentation that k-edge-connects as much of the graph as possible. When a k-edge-augmentation is not possible, we can still try to find a small set of edges that partially k-edge-connects as much of the graph as possible. All possible edges are generated between remaining parts. This minimizes the number of k-edge-connected subgraphs in the resulting graph and maxmizes the edge connectivity between those subgraphs. Parameters ---------- G : NetworkX graph An undirected graph. k : integer Desired edge connectivity avail : dict or a set of 2 or 3 tuples For more details, see :func:`k_edge_augmentation`. weight : string key to use to find weights if ``avail`` is a set of 3-tuples. For more details, see :func:`k_edge_augmentation`. Yields ------ edge : tuple Edges in the partial augmentation of G. These edges k-edge-connect any part of G where it is possible, and maximally connects the remaining parts. In other words, all edges from avail are generated except for those within subgraphs that have already become k-edge-connected. Notes ----- Construct H that augments G with all edges in avail. Find the k-edge-subgraphs of H. For each k-edge-subgraph, if the number of nodes is more than k, then find the k-edge-augmentation of that graph and add it to the solution. Then add all edges in avail between k-edge subgraphs to the solution. See Also -------- :func:`k_edge_augmentation` Example ------- >>> G = nx.path_graph((1, 2, 3, 4, 5, 6, 7)) >>> G.add_node(8) >>> avail = [(1, 3), (1, 4), (1, 5), (2, 4), (2, 5), (3, 5), (1, 8)] >>> sorted(partial_k_edge_augmentation(G, k=2, avail=avail)) [(1, 5), (1, 8)] """ def _edges_between_disjoint(H, only1, only2): """ finds edges between disjoint nodes """ only1_adj = {u: set(H.adj[u]) for u in only1} for u, neighbs in only1_adj.items(): # Find the neighbors of u in only1 that are also in only2 neighbs12 = neighbs.intersection(only2) for v in neighbs12: yield (u, v) avail_uv, avail_w = _unpack_available_edges(avail, weight=weight, G=G) # Find which parts of the graph can be k-edge-connected H = G.copy() H.add_edges_from( ( (u, v, {"weight": w, "generator": (u, v)}) for (u, v), w in zip(avail, avail_w) ) ) k_edge_subgraphs = list(nx.k_edge_subgraphs(H, k=k)) # Generate edges to k-edge-connect internal subgraphs for nodes in k_edge_subgraphs: if len(nodes) > 1: # Get the k-edge-connected subgraph C = H.subgraph(nodes).copy() # Find the internal edges that were available sub_avail = { d["generator"]: d["weight"] for (u, v, d) in C.edges(data=True) if "generator" in d } # Remove potential augmenting edges C.remove_edges_from(sub_avail.keys()) # Find a subset of these edges that makes the compoment # k-edge-connected and ignore the rest yield from nx.k_edge_augmentation(C, k=k, avail=sub_avail) # Generate all edges between CCs that could not be k-edge-connected for cc1, cc2 in it.combinations(k_edge_subgraphs, 2): for (u, v) in _edges_between_disjoint(H, cc1, cc2): d = H.get_edge_data(u, v) edge = d.get("generator", None) if edge is not None: yield edge
def benchmark_diffnet(sij_generator, ntimes=100, optimalities=['D', 'A', 'Etree'], constant_relative_error=False, epsilon=1e-2): ''' For each optimality, compute the reduction of covariance in the D-, A-, and E-optimal in reference to the minimum spanning tree. Args: sij_generator: function - sij_generator() generates a symmetric matrix of sij. Returns: ( stats, avg, topo ): tuple - stats['D'|'A'|'E'][o] is a numpy array of the covariance ratio ('D': ln(det(C)), 'A': tr(C), 'E': max(eig(C))) avg['D'|'A'|'E'][o] is the corresponding mean. topo[o][0] is the histogram of n_{ii}/s_{ii}. topo[o][1] is the histogram of n_{ij}/s_{ij} for j!=i. topo[o][2] is the list of connectivities of the measurement networks topo[o][3] is the list containing the numbers of edges that need to be added to the measurement networks to make the graphs 2-edge-connected (which ensures a cycle between any two nodes). o can be 'D', 'A', 'Etree', 'MSTn', 'MSTs', 'MSTv', 'cstn', 'cstv', 'csts'. ''' stats = dict(D=dict(), A=dict(), E=dict()) for s in stats: for o in optimalities + [ 'MSTn', 'MSTs', 'MSTv' ] + \ [ 'cstn', 'csts', 'cstv' ]: stats[s][o] = np.zeros(ntimes) emin = -5 emax = 2 nbins = 2 * (emax + 1 - emin) bins = np.concatenate([[0], np.logspace(emin, emax, nbins)]) # topo records the topology of the optimal measurement networks topo = dict([ (o, [np.zeros(nbins, dtype=float), np.zeros(nbins, dtype=float), [], []]) for o in optimalities ]) nfails = 0 for t in xrange(ntimes): if constant_relative_error: results = dict() si, sij = sij_generator() for o in optimalities: if o == 'A': results[o] = A_optimize_const_relative_error(si) elif o == 'D': results[o] = D_optimize_const_relative_error(si) else: results.update(optimize(sij, [o])) else: sij = sij_generator() results = optimize(sij, optimalities) ssum = np.sum(np.triu(sij)) if None in results.values(): nfails += 1 continue for o in optimalities: n = np.array(results[o]) n[n < 0] = 0 nos = ssum * n / sij d = np.diag(nos) u = [ nos[i, j] for i in xrange(n.shape[0]) for j in xrange(i + 1, n.shape[0]) ] hd, _ = np.histogram(d, bins, density=False) hu, _ = np.histogram(u, bins, density=False) topo[o][0] += hd topo[o][1] += hu nos[nos < epsilon] = 0 gdn = nx.from_numpy_matrix(nos) topo[o][2].append(nx.edge_connectivity(gdn)) topo[o][3].append(len(sorted(nx.k_edge_augmentation(gdn, 2)))) results.update( dict(MSTn=MST_optimize(sij, 'n'), MSTs=MST_optimize(sij, 'std'), MSTv=MST_optimize(sij, 'var'))) results.update( dict(cstn=const_allocation(sij, 'n'), csts=const_allocation(sij, 'std'), cstv=const_allocation(sij, 'var'))) CMSTn = covariance(cvxopt.div(results['MSTn'], sij**2)) DMSTn = np.log(linalg.det(CMSTn)) AMSTn = np.trace(CMSTn) EMSTn = np.max(linalg.eig(CMSTn)[0]).real for o in results: n = results[o] C = covariance(cvxopt.div(n, sij**2)) D = np.log(linalg.det(C)) A = np.trace(C) E = np.max(linalg.eig(C)[0]).real stats['D'][o][t - nfails] = D - DMSTn stats['A'][o][t - nfails] = A / AMSTn stats['E'][o][t - nfails] = E / EMSTn avg = dict() for s in stats: avg[s] = dict() for o in stats[s]: stats[s][o] = stats[s][o][:ntimes - nfails] avg[s][o] = np.mean(stats[s][o]) for o in optimalities: topo[o][0] /= (ntimes - nfails) topo[o][1] /= (ntimes - nfails) return stats, avg, topo
def sparse_A_optimal_network(sij, nadd=1., nsofar=None, n_measure=0, connectivity=2, sparse_by_fluctuation=True): ''' Construct a sparse A-optimal network, so that (approximately) only max_measure different measurements will receive resource allocations, while guaranteeing the given degree of connectivity. Args: sij: KxK symmetric matrix, where the measurement variance of the difference between i and j is proportional to s[i][j]^2 = s[j][i]^2, and the measurement variance of i is proportional to s[i][i]^2. nadd: float, nadd gives the additional number of samples to be collected in the next iteration. nsofar: KxK symmetric matrix, where nsofar[i,j] is the number of samples that has already been collected for (i,j) pair. n_measure: int, the number of measurements to receive allocations. The actual number of measurements with non-zero allocation might exceed this number in order to guarantee the connectivity. If it is zero, the number of measurements will be determined by the connectivity requirement. connectivity: int, ensure that the resulting difference network is k-edge connected. sparse_by_fluctuation: bool, if True, generate the sparse network by minimizing \sum_e s_e in the k-connected spanning subgraph. Return: KxK symmetric matrix of float, the (i,j) element of which gives the number of samples to be allocated to the measurement of (i,j) difference in the next iteration. ''' K = sij.size[0] if nsofar is None: nsofar = np.zeros((K, K), dtype=float) if not sparse_by_fluctuation: # First, get the dense optimal network nij = update_A_optimal_sdp(sij, nadd, nsofar) def weight(i, j, epsilon=1e-10): n = nij[i, j] large = 1 / epsilon if n > epsilon: return 1. / n else: return large else: def weight(i, j): return sij[i, j] # Next, get the k-connected graph that approximately minimizes the # sum of 1/n_{ij}. G = nx.Graph() G.add_nodes_from(range(K)) G.add_node('O') edges = [] for i in xrange(K): edges.append(('O', i, weight(i, i))) for j in xrange(i + 1, K): edges.append((i, j, weight(i, j))) edges = list(nx.k_edge_augmentation(G, k=connectivity, partial=True)) # Include only the edges that guarantee k-connectivity and nothing else only_include_measurements = set([]) for i, j in edges: if 'O' == i: only_include_measurements.add((j, j)) elif 'O' == j: only_include_measurements.add((i, i)) else: if i < j: only_include_measurements.add((i, j)) else: only_include_measurements.add((j, i)) # If there is additional allowance for the number of measurements, # add the remaining ones with the largest allocations from the dense # network. if (len(only_include_measurements) < n_measure): indices = [] for i in xrange(K): for j in xrange(i, K): if (i, j) in only_include_measurements: continue heapq.heappush(indices, (weight(i, j), (i, j))) addition = [] for m in xrange(n_measure - len(only_include_measurements)): _w, (i, j) = heapq.heappop(indices) addition.append((i, j)) only_include_measurements.update(addition) nij = update_A_optimal_sdp(sij, nadd, nsofar, only_include_measurements) return nij
def k_edge_augmentation(G, k, avail=None, partial=False): return it.starmap(e_, nx.k_edge_augmentation(G, k, avail=avail, partial=partial))
def partial_k_edge_augmentation(G, k, avail, weight=None): """Finds augmentation that k-edge-connects as much of the graph as possible. When a k-edge-augmentation is not possible, we can still try to find a small set of edges that partially k-edge-connects as much of the graph as possible. All possible edges are generated between remaining parts. This minimizes the number of k-edge-connected subgraphs in the resulting graph and maxmizes the edge connectivity between those subgraphs. Parameters ---------- G : NetworkX graph An undirected graph. k : integer Desired edge connectivity avail : dict or a set of 2 or 3 tuples For more details, see :func:`k_edge_augmentation`. weight : string key to use to find weights if ``avail`` is a set of 3-tuples. For more details, see :func:`k_edge_augmentation`. Yields ------ edge : tuple Edges in the partial augmentation of G. These edges k-edge-connect any part of G where it is possible, and maximally connects the remaining parts. In other words, all edges from avail are generated except for those within subgraphs that have already become k-edge-connected. Notes ----- Construct H that augments G with all edges in avail. Find the k-edge-subgraphs of H. For each k-edge-subgraph, if the number of nodes is more than k, then find the k-edge-augmentation of that graph and add it to the solution. Then add all edges in avail between k-edge subgraphs to the solution. See Also -------- :func:`k_edge_augmentation` Example ------- >>> G = nx.path_graph((1, 2, 3, 4, 5, 6, 7)) >>> G.add_node(8) >>> avail = [(1, 3), (1, 4), (1, 5), (2, 4), (2, 5), (3, 5), (1, 8)] >>> sorted(partial_k_edge_augmentation(G, k=2, avail=avail)) [(1, 5), (1, 8)] """ def _edges_between_disjoint(H, only1, only2): """ finds edges between disjoint nodes """ only1_adj = {u: set(H.adj[u]) for u in only1} for u, neighbs in only1_adj.items(): # Find the neighbors of u in only1 that are also in only2 neighbs12 = neighbs.intersection(only2) for v in neighbs12: yield (u, v) avail_uv, avail_w = _unpack_available_edges(avail, weight=weight, G=G) # Find which parts of the graph can be k-edge-connected H = G.copy() H.add_edges_from( ((u, v, {'weight': w, 'generator': (u, v)}) for (u, v), w in zip(avail, avail_w))) k_edge_subgraphs = list(nx.k_edge_subgraphs(H, k=k)) # Generate edges to k-edge-connect internal subgraphs for nodes in k_edge_subgraphs: if len(nodes) > 1: # Get the k-edge-connected subgraph C = H.subgraph(nodes).copy() # Find the internal edges that were available sub_avail = { d['generator']: d['weight'] for (u, v, d) in C.edges(data=True) if 'generator' in d } # Remove potential augmenting edges C.remove_edges_from(sub_avail.keys()) # Find a subset of these edges that makes the compoment # k-edge-connected and ignore the rest for edge in nx.k_edge_augmentation(C, k=k, avail=sub_avail): yield edge # Generate all edges between CCs that could not be k-edge-connected for cc1, cc2 in it.combinations(k_edge_subgraphs, 2): for (u, v) in _edges_between_disjoint(H, cc1, cc2): d = H.get_edge_data(u, v) edge = d.get('generator', None) if edge is not None: yield edge
def _augment_and_check(G, k, avail=None, weight=None, verbose=False, orig_k=None, max_aug_k=None): """ Does one specific augmentation and checks for properties of the result """ if orig_k is None: try: orig_k = nx.edge_connectivity(G) except nx.NetworkXPointlessConcept: orig_k = 0 info = {} try: if avail is not None: # ensure avail is in dict form avail_dict = dict(zip(*_unpack_available_edges(avail, weight=weight))) else: avail_dict = None try: # Find the augmentation if possible generator = nx.k_edge_augmentation(G, k=k, weight=weight, avail=avail) assert_false(isinstance(generator, list), 'should always return an iter') aug_edges = [] for edge in generator: aug_edges.append(edge) except nx.NetworkXUnfeasible: infeasible = True info['infeasible'] = True assert_equal(len(aug_edges), 0, 'should not generate anything if unfeasible') if avail is None: n_nodes = G.number_of_nodes() assert_less_equal(n_nodes, k, ( 'unconstrained cases are only unfeasible if |V| <= k. ' 'Got |V|={} and k={}'.format(n_nodes, k) )) else: if max_aug_k is None: G_aug_all = G.copy() G_aug_all.add_edges_from(avail_dict.keys()) try: max_aug_k = nx.edge_connectivity(G_aug_all) except nx.NetworkXPointlessConcept: max_aug_k = 0 assert_less(max_aug_k, k, ( 'avail should only be unfeasible if using all edges ' 'doesnt acheive k-edge-connectivity')) # Test for a partial solution partial_edges = list(nx.k_edge_augmentation( G, k=k, weight=weight, partial=True, avail=avail)) info['n_partial_edges'] = len(partial_edges) if avail_dict is None: assert_equal(set(partial_edges), set(complement_edges(G)), ( 'unweighted partial solutions should be the complement')) elif len(avail_dict) > 0: H = G.copy() # Find the partial / full augmented connectivity H.add_edges_from(partial_edges) partial_conn = nx.edge_connectivity(H) H.add_edges_from(set(avail_dict.keys())) full_conn = nx.edge_connectivity(H) # Full connectivity should be no better than our partial # solution. assert_equal(partial_conn, full_conn, 'adding more edges should not increase k-conn') # Find the new edge-connectivity after adding the augmenting edges aug_edges = partial_edges else: infeasible = False # Find the weight of the augmentation num_edges = len(aug_edges) if avail is not None: total_weight = sum([avail_dict[e] for e in aug_edges]) else: total_weight = num_edges info['total_weight'] = total_weight info['num_edges'] = num_edges # Find the new edge-connectivity after adding the augmenting edges G_aug = G.copy() G_aug.add_edges_from(aug_edges) try: aug_k = nx.edge_connectivity(G_aug) except nx.NetworkXPointlessConcept: aug_k = 0 info['aug_k'] = aug_k # Do checks if not infeasible and orig_k < k: assert_greater_equal(info['aug_k'], k, ( 'connectivity should increase to k={} or more'.format(k))) assert_greater_equal(info['aug_k'], orig_k, ( 'augmenting should never reduce connectivity')) _assert_solution_properties(G, aug_edges, avail_dict) except Exception: info['failed'] = True print('edges = {}'.format(list(G.edges()))) print('nodes = {}'.format(list(G.nodes()))) print('aug_edges = {}'.format(list(aug_edges))) print('info = {}'.format(info)) raise else: if verbose: print('info = {}'.format(info)) if infeasible: aug_edges = None return aug_edges, info
def _augment_and_check(G, k, avail=None, weight=None, verbose=False, orig_k=None, max_aug_k=None): """ Does one specific augmentation and checks for properties of the result """ if orig_k is None: try: orig_k = nx.edge_connectivity(G) except nx.NetworkXPointlessConcept: orig_k = 0 info = {} try: if avail is not None: # ensure avail is in dict form avail_dict = dict( zip(*_unpack_available_edges(avail, weight=weight))) else: avail_dict = None try: # Find the augmentation if possible generator = nx.k_edge_augmentation(G, k=k, weight=weight, avail=avail) assert not isinstance(generator, list), 'should always return an iter' aug_edges = [] for edge in generator: aug_edges.append(edge) except nx.NetworkXUnfeasible: infeasible = True info['infeasible'] = True assert len( aug_edges) == 0, 'should not generate anything if unfeasible' if avail is None: n_nodes = G.number_of_nodes() assert n_nodes <= k, ( 'unconstrained cases are only unfeasible if |V| <= k. ' f'Got |V|={n_nodes} and k={k}') else: if max_aug_k is None: G_aug_all = G.copy() G_aug_all.add_edges_from(avail_dict.keys()) try: max_aug_k = nx.edge_connectivity(G_aug_all) except nx.NetworkXPointlessConcept: max_aug_k = 0 assert max_aug_k < k, ( 'avail should only be unfeasible if using all edges ' 'does not achieve k-edge-connectivity') # Test for a partial solution partial_edges = list( nx.k_edge_augmentation(G, k=k, weight=weight, partial=True, avail=avail)) info['n_partial_edges'] = len(partial_edges) if avail_dict is None: assert set(partial_edges) == set(complement_edges(G)), ( 'unweighted partial solutions should be the complement') elif len(avail_dict) > 0: H = G.copy() # Find the partial / full augmented connectivity H.add_edges_from(partial_edges) partial_conn = nx.edge_connectivity(H) H.add_edges_from(set(avail_dict.keys())) full_conn = nx.edge_connectivity(H) # Full connectivity should be no better than our partial # solution. assert partial_conn == full_conn, 'adding more edges should not increase k-conn' # Find the new edge-connectivity after adding the augmenting edges aug_edges = partial_edges else: infeasible = False # Find the weight of the augmentation num_edges = len(aug_edges) if avail is not None: total_weight = sum([avail_dict[e] for e in aug_edges]) else: total_weight = num_edges info['total_weight'] = total_weight info['num_edges'] = num_edges # Find the new edge-connectivity after adding the augmenting edges G_aug = G.copy() G_aug.add_edges_from(aug_edges) try: aug_k = nx.edge_connectivity(G_aug) except nx.NetworkXPointlessConcept: aug_k = 0 info['aug_k'] = aug_k # Do checks if not infeasible and orig_k < k: assert info['aug_k'] >= k, ( f'connectivity should increase to k={k} or more') assert info['aug_k'] >= orig_k, ( 'augmenting should never reduce connectivity') _assert_solution_properties(G, aug_edges, avail_dict) except Exception: info['failed'] = True print(f"edges = {list(G.edges())}") print(f"nodes = {list(G.nodes())}") print(f"aug_edges = {list(aug_edges)}") print(f"info = {info}") raise else: if verbose: print(f'info = {info}') if infeasible: aug_edges = None return aug_edges, info
def scale(input_file, output_file, scale_factor, bridges=0.1, sampling_factor=0.5, precision=0.95, connect=False, stitching_type="all-to-all", merge_nfs=False, verbose=True): if verbose and mpi.rank == 0: print(""" ______ ______ _______ _____ _ _ _______ _______ _______ _______ ______ | ____ |_____/ |_____| |_____] |_____| |______ | |_____| | |______ |_____/ |_____| | \_ | | | | | ______| |_____ | | |_____ |______ | \_ |0.1| """) print( "▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬" ) print( "Scale factor: {}. Bridges: {}%. Precision: {}%. Sampling factor: {}. Connect: {}. Stitching:{}. NFS: {}" .format(scale_factor, bridges * 100, precision * 100, sampling_factor, connect, stitching_type, merge_nfs), ) print( "▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬" ) print() total_t = time.time() # Step X: Parse sticthing type and check if valid stitching_type = StitchType.parse_type(stitching_type) # Step X: Read distribute edges and load graph loading_t = time.time() edges, partition_map, total_nodes = distributor.distribute_edges( input_file) graph = util.load_graph_from_edges(edges) edges = None # Free memory if verbose and mpi.rank == 0: print("=================================") print("Loading time:", round(time.time() - loading_t, 2), "seconds") print("=================================") # Step X: Calculate weights (how many % of the nodes to sample) for each node nodes_amount = mpi.comm.alltoall([graph.number_of_nodes()] * mpi.size) weights = list( map(lambda a: ceil(a / sum(nodes_amount) * 100) / 100.0, nodes_amount)) # Step X: Split factor into sample rounds sampling_factor = min(sampling_factor, scale_factor) factors = [ sampling_factor for _ in range(int(scale_factor / sampling_factor)) ] remaining_factor = scale_factor - round(sum(factors), 2) if remaining_factor: factors.append(remaining_factor) # Step X: Run distributed sampling samples = [] for i, factor in enumerate(factors): sampling_t = time.time() samples.append( sampler.sample(graph, int(total_nodes * factor), weights[mpi.rank], partition_map, precision)) if verbose and mpi.rank == 0: print("Sampling time {}/{}:".format(i + 1, len(factors)), round(time.time() - sampling_t, 2), "seconds") if verbose and mpi.rank == 0: print("=================================") # Step X: Connect the graph if connect: connecting_t = time.time() for sample in samples: sample.add_edges_from(nx.k_edge_augmentation(nx.Graph(sample), 1)) if verbose and mpi.rank == 0: print("Connecting time:", round(time.time() - connecting_t, 2), "seconds") print("=================================") # Step X: Rename vertices relabeling_t = time.time() util.relabel_samples(samples) if verbose and mpi.rank == 0: print("Relabeling time:", round(time.time() - relabeling_t, 2), "seconds") print("=================================") # Step X: Stitch samples locally and distributively stitching_t = time.time() stitcher.stitch_samples(samples, bridges, stitching_type) if verbose and mpi.rank == 0: print("Stiching time:", round(time.time() - stitching_t, 2), "seconds") print("=================================") # Step X: Merge distributed samples into master file dumping_t = time.time() merger.merge_samples(samples, output_file, merge_nfs) if verbose and mpi.rank == 0: print("Dumping time:", round(time.time() - dumping_t, 2), "seconds") print("=================================") print() print("▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬") if verbose and mpi.rank == 0: print("▬▬▬", "Total time:", round(time.time() - total_t, 2), "seconds", "▬▬▬▬") print("▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬")
def build_graph_from_strings(args): if args.graph_degree >= args.graph_size: raise ValueError( "Requested graph degree %s larger than graph size %s" % (args.graph_degree, args.model)) if not os.path.isfile(args.input_data): raise ValueError("Input file %s doesn't exist" % args.input_data) tiling_grammar = grammar.TilingGrammar([]) if os.path.isfile(args.grammar): tiling_grammar.load(args.grammar) else: raise ValueError("Grammar file %s doesn't exist" % args.grammar) if TREE_GRAMMAR: tiling_grammar.convert_to_tree_grammar() data = pandas.read_hdf(args.input_data, 'table') print("Number of SMILES strings: ", len(data)) if args.graph_size <= len(data): data = data.sample(n=args.graph_size) words = data["structure"] tmp_ids = data.index.tolist() selected_ids = [int(x) for x in tmp_ids] # setup toolbar sys.stdout.write("Inserting graph nodes [%s]" % (" " * 10)) sys.stdout.flush() sys.stdout.write("\b" * (10 + 1)) # return to start of line, after '[' search_graph = nx.Graph() #graph nodes search_graph.add_nodes_from(selected_ids) #graph edges for i, idx in enumerate(selected_ids): if i % (len(selected_ids) / 10) == len(selected_ids) / 10 - 1: sys.stdout.write("#") sys.stdout.flush() #add an edge to each similar word for j, idy in enumerate(selected_ids): if tiling_grammar.similar_words(words[idx], words[idy]): search_graph.add_edge(idx, idy, weight=0.0) #connect to k-nearest points in "string" space dist_id_pairs = [] for j in range(len(selected_ids)): idy = selected_ids[j] if idx == idy: continue dist = tiling_grammar.word_similarity(words[idx], words[idy]) dist_id_pairs.append((dist, idy)) if len(dist_id_pairs) % args.graph_degree == 0: dist_id_pairs = sorted(dist_id_pairs) dist_id_pairs = dist_id_pairs[:args.graph_degree] dist_id_pairs = sorted(dist_id_pairs) dist_id_pairs = dist_id_pairs[:args.graph_degree] for d, idy in dist_id_pairs: similarity = tiling_grammar.word_similarity(words[idx], words[idy]) search_graph.add_edge(idx, idy, weight=similarity) sys.stdout.write("\n") print("number of connected components before augmentation: ", nx.number_connected_components(search_graph)) complement = list(nx.k_edge_augmentation(search_graph, k=1, partial=True)) for (n_i, n_j) in complement: similarity = tiling_grammar.word_similarity(words[int(n_i)], words[int(n_j)]) search_graph.add_edge(n_i, n_j, weight=similarity) nx.write_graphml(search_graph, args.latent_graph)
def build_latent_graph(args): if args.graph_degree >= args.graph_size: raise ValueError( "Requested graph degree %s larger than graph size %s" % (args.graph_degree, args.model)) if not os.path.isfile(args.input_data): raise ValueError("Input file %s doesn't exist" % args.input_data) model, tiling_grammar, latent_data, charset = load_input(args) permuted_ids = np.random.permutation(len(latent_data)) selected_ids = [] words = [] # setup toolbar sys.stdout.write("Decoding samples [%s]" % (" " * 10)) sys.stdout.flush() sys.stdout.write("\b" * (10 + 1)) # return to start of line, after '[' for i, idx in enumerate(permuted_ids): if i % (len(permuted_ids) / 10) == len(permuted_ids) / 10 - 1: sys.stdout.write("#") sys.stdout.flush() decoded_data = model.decoder.predict(latent_data[idx].reshape( 1, args.latent_dim)).argmax(axis=2)[0] word = decode_smiles_from_indexes(decoded_data, charset) if not tiling_grammar.check_word(word): continue selected_ids.append(idx) words.append(word) if len(selected_ids) >= args.graph_size: break sys.stdout.write("\n") # setup toolbar sys.stdout.write("Inserting graph nodes [%s]" % (" " * 10)) sys.stdout.flush() sys.stdout.write("\b" * (10 + 1)) # return to start of line, after '[' search_graph = nx.Graph() #graph nodes search_graph.add_nodes_from(selected_ids) #graph edges for i, idx in enumerate(selected_ids): if i % (len(selected_ids) / 10) == len(selected_ids) / 10 - 1: sys.stdout.write("#") sys.stdout.flush() #add an edge to each similar word for j, idy in enumerate(selected_ids): if tiling_grammar.similar_words(words[i], words[j]): search_graph.add_edge(idx, idy, weight=0.0) #connect to k-nearest points in latent space dist_id_pairs = [] for j in range(len(selected_ids)): dist = np.linalg.norm(latent_data[selected_ids[i]] - latent_data[selected_ids[j]]) dist_id_pairs.append((dist, j)) if len(dist_id_pairs) % args.graph_degree == 0: dist_id_pairs = sorted(dist_id_pairs) dist_id_pairs = dist_id_pairs[:args.graph_degree] dist_id_pairs = sorted(dist_id_pairs) dist_id_pairs = dist_id_pairs[:args.graph_degree] for d, j in dist_id_pairs: similarity = tiling_grammar.word_similarity(words[i], words[j]) idy = selected_ids[j] search_graph.add_edge(idx, idy, weight=similarity) sys.stdout.write("\n") print("number of connected components before augmentation: ", nx.number_connected_components(search_graph)) complement = list(nx.k_edge_augmentation(search_graph, k=1, partial=True)) if len(complement) >= 2: for (n_i, n_j) in complement: decoded_data_i = model.decoder.predict( latent_data[int(n_i)].reshape( 1, args.latent_dim)).argmax(axis=2)[0] word_i = decode_smiles_from_indexes(decoded_data_i, charset) decoded_data_j = model.decoder.predict( latent_data[int(n_j)].reshape( 1, args.latent_dim)).argmax(axis=2)[0] word_j = decode_smiles_from_indexes(decoded_data_j, charset) similarity = tiling_grammar.word_similarity(word_i, word_j) search_graph.add_edge(n_i, n_j, weight=similarity) nx.write_graphml(search_graph, args.latent_graph)
def graph_creator(): results = xlwt.Workbook(encoding="utf-8") sheet1 = results.add_sheet('Community_Robustness') sheet2 = results.add_sheet('Experience_Coverage') sheet3 = results.add_sheet('Degree_Coverage') col = 0 row = 0 for i in range(0, len(Sheet_first_row)): sheet1.write(row, col, str(Sheet_first_row[i])) col += 1 col = 0 for i in range(0, len(perc_exp_sheet)): sheet2.write(row, col, str(perc_exp_sheet[i])) sheet3.write(row, col, str(perc_exp_sheet[i])) col += 1 for db in db_list: com = 0 isu = 0 row += 1 try: count = db.count() if count > 0: main_entry = db.find()[count - 1] repo_name = main_entry.get('name') repo_owner = main_entry.get('owner') repo_owner_fc = main_entry.get('owner_followers_count') repo_issues_count = main_entry.get('statistics').get('total_issues') repo_issues_comments_count = main_entry.get('statistics').get('total_issues_comments') repo_stars = main_entry.get('popularity').get('stars') repo_contributors = main_entry.get('contributors_count') if repo_contributors: comments_list = [] # main loop i = 0 for e in db.find(): type = e.get('type') if not e.get('contributors_count') and type != 'Commit': issue_number = e.get('issue_number') author = e.get('author') followers = e.get('author_followers_count') if type == 'IssueOpened': isu += 1 comments = e.get('comments_count') graph.add_node(issue_number, name=issue_number, n_type=type, author=author, afc=followers, comments_count=comments) # i += 1 # if i>10: # break #2ndloop i = 0 for e in db.find(): type = e.get('type') if not e.get('contributors_count') and type != 'Commit': issue_number = e.get('issue_number') author = e.get('author') followers = e.get('author_followers_count') if type == 'Comment': if author not in comments_list: com += 1 comments_list.append(author) graph.add_node(author, name=author, n_type=type, author=author, afc=followers) if graph.has_edge(issue_number, author): graph[issue_number][author]['weight'] += 1 else: graph.add_edge(issue_number, author, weight=1) # i += 1 # if i>10: # break popularities = [] degrees = [] people = [] connections = [] for n in graph.nodes(): node = graph.nodes[n] node_type = node['n_type'] if node_type != 'IssueOpened': if graph.degree[n] > 1: people.append(node['afc']) connections.append(graph.degree[n]) popularities.append(node['afc']) degrees.append(graph.degree(weight='weight')[n]) r = coeff(popularities, degrees) r_val = r[2] p_val = r[3] # K-Aug connectivity needed_edges_to_connect = len(sorted(nx.k_edge_augmentation(graph, k=1))) edges_to_connect_full_graph = isu - 1 edges = len(graph.edges()) community_score = 1.00 - (needed_edges_to_connect/float(edges_to_connect_full_graph)) # community_score = 1.00 - (needed_edges_to_connect/float(edges)) info = [ repo_name, repo_owner, repo_owner_fc, repo_issues_count, repo_issues_comments_count, repo_stars, repo_contributors, r_val, p_val, community_score ] col = 0 for i in range(0, len(Sheet_first_row)): sheet1.write(row, col, str(info[i])) col += 1 sum_connection = sum(connections) sum_people = sum(people) sorted_people_connections = sorted(zip(people, connections)) # print sorted_people_connections sorted_connections_people = sorted(zip(connections, people)) # print sorted_connections_people experience_coverage = [] degree_coverage = [] for kk in range(0, 10): indexx = int(len(people) * (1-perc_exp[kk])) sum_exp = 0 sum_deg = 0 if indexx > 0: indexx -= 1 for jj in range(indexx, len(people)): sum_exp += sorted_people_connections[jj][1] sum_deg += sorted_connections_people[jj][1] experience_coverage.append(sum_exp / float(sum_connection)) degree_coverage.append(sum_deg / float(sum_people)) experience_coverage_sheet = [repo_name,repo_issues_count,repo_stars,repo_contributors] degree_coverage_sheet = [repo_name,repo_issues_count,repo_stars,repo_contributors] for i in range(0, len(experience_coverage)): experience_coverage_sheet.append(experience_coverage[i]) degree_coverage_sheet.append(degree_coverage[i]) col = 0 for i in range(0, len(perc_exp_sheet)): sheet2.write(row, col, str(experience_coverage_sheet[i])) sheet3.write(row, col, str(degree_coverage_sheet[i])) col += 1 print repo_name nx.write_graphml(graph, "/Users/Abduljaleel/Desktop/net1.graphml") graph.clear() break except Exception as er: graph.clear() print er.message
def make_network(graph_name="Network", total_size=100, max_neighbors=4, hub_depth=5, k=1): G = nx.Graph(name=graph_name + "_" + str(datetime.timestamp(datetime.now()))) # Generate hubs to serve as the backbone of the network G, index = add_hub(G, index=0, x_range=(0, total_size), y_range=(0, total_size), hub_depth=hub_depth) # Create nodes with random x and y coordinate attributes for n in range(0, total_size): if str(n) not in G.nodes(): G.add_node(str(n), coordinates=(np.random.randint(0, total_size), np.random.randint(0, total_size)), hub=0, perfsonar=False) # Ensure nodes are sorted to before distances are calculated to allow indexing of source-destination pairs node_coordinates = sorted(nx.get_node_attributes(G, 'coordinates').items(), key=lambda x: int(x[0])) # https://stackoverflow.com/questions/54732086/finding-euclidean-distance-between-all-pair-of-points distances = np.array(euclidean_distances([c[1] for c in node_coordinates])) distances_dict = {} for i in range(len(node_coordinates)): temp_dict = {} for j in range(len(node_coordinates)): temp_dict[node_coordinates[j][0]] = distances[i][j] distances_dict[node_coordinates[i][0]] = temp_dict.copy() # https://stackoverflow.com/questions/16817948/i-have-need-the-n-minimum-index-values-in-a-numpy-array # This only works because we are numbering nodes sequentially from 0 (i.e., their key and their index is the same) closest_neighbors = [ list(arr.argsort()[1:max_neighbors + 1]) for arr in distances ] closest_neighbors_dict = {} for i in range(len(node_coordinates)): closest_neighbors_dict[node_coordinates[i][0]] = [ str(n) for n in closest_neighbors[i] ] for node in list(G.nodes()): neighbors = closest_neighbors_dict[node][:np.random. randint(1, max_neighbors + 1)] for neighbor in neighbors: add_detailed_edge(G, (node, neighbor)) for edge in nx.k_edge_augmentation(G, k): add_detailed_edge(G, (edge[0], edge[1])) G = save_network(G) return G