Example #1
0
def partial_k_edge_augmentation(G, k, avail, weight=None):
    """Finds augmentation that k-edge-connects as much of the graph as possible.

    When a k-edge-augmentation is not possible, we can still try to find a
    small set of edges that partially k-edge-connects as much of the graph as
    possible. All possible edges are generated between remaining parts.
    This minimizes the number of k-edge-connected subgraphs in the resulting
    graph and maxmizes the edge connectivity between those subgraphs.

    Parameters
    ----------
    G : NetworkX graph
       An undirected graph.

    k : integer
        Desired edge connectivity

    avail : dict or a set of 2 or 3 tuples
        For more details, see :func:`k_edge_augmentation`.

    weight : string
        key to use to find weights if ``avail`` is a set of 3-tuples.
        For more details, see :func:`k_edge_augmentation`.

    Yields
    ------
    edge : tuple
        Edges in the partial augmentation of G. These edges k-edge-connect any
        part of G where it is possible, and maximally connects the remaining
        parts. In other words, all edges from avail are generated except for
        those within subgraphs that have already become k-edge-connected.

    Notes
    -----
    Construct H that augments G with all edges in avail.
    Find the k-edge-subgraphs of H.
    For each k-edge-subgraph, if the number of nodes is more than k, then find
    the k-edge-augmentation of that graph and add it to the solution. Then add
    all edges in avail between k-edge subgraphs to the solution.

    See Also
    --------
    :func:`k_edge_augmentation`

    Example
    -------
    >>> G = nx.path_graph((1, 2, 3, 4, 5, 6, 7))
    >>> G.add_node(8)
    >>> avail = [(1, 3), (1, 4), (1, 5), (2, 4), (2, 5), (3, 5), (1, 8)]
    >>> sorted(partial_k_edge_augmentation(G, k=2, avail=avail))
    [(1, 5), (1, 8)]
    """

    def _edges_between_disjoint(H, only1, only2):
        """ finds edges between disjoint nodes """
        only1_adj = {u: set(H.adj[u]) for u in only1}
        for u, neighbs in only1_adj.items():
            # Find the neighbors of u in only1 that are also in only2
            neighbs12 = neighbs.intersection(only2)
            for v in neighbs12:
                yield (u, v)

    avail_uv, avail_w = _unpack_available_edges(avail, weight=weight, G=G)

    # Find which parts of the graph can be k-edge-connected
    H = G.copy()
    H.add_edges_from(
        (
            (u, v, {"weight": w, "generator": (u, v)})
            for (u, v), w in zip(avail, avail_w)
        )
    )
    k_edge_subgraphs = list(nx.k_edge_subgraphs(H, k=k))

    # Generate edges to k-edge-connect internal subgraphs
    for nodes in k_edge_subgraphs:
        if len(nodes) > 1:
            # Get the k-edge-connected subgraph
            C = H.subgraph(nodes).copy()
            # Find the internal edges that were available
            sub_avail = {
                d["generator"]: d["weight"]
                for (u, v, d) in C.edges(data=True)
                if "generator" in d
            }
            # Remove potential augmenting edges
            C.remove_edges_from(sub_avail.keys())
            # Find a subset of these edges that makes the compoment
            # k-edge-connected and ignore the rest
            yield from nx.k_edge_augmentation(C, k=k, avail=sub_avail)

    # Generate all edges between CCs that could not be k-edge-connected
    for cc1, cc2 in it.combinations(k_edge_subgraphs, 2):
        for (u, v) in _edges_between_disjoint(H, cc1, cc2):
            d = H.get_edge_data(u, v)
            edge = d.get("generator", None)
            if edge is not None:
                yield edge
Example #2
0
def benchmark_diffnet(sij_generator,
                      ntimes=100,
                      optimalities=['D', 'A', 'Etree'],
                      constant_relative_error=False,
                      epsilon=1e-2):
    '''
    For each optimality, compute the reduction of covariance
    in the D-, A-, and E-optimal in reference to the minimum
    spanning tree.

    Args:
    sij_generator: function - sij_generator() generates a symmetric
    matrix of sij.

    Returns:
    ( stats, avg, topo ): tuple - stats['D'|'A'|'E'][o] is a numpy array
    of the covariance ratio ('D': ln(det(C)), 'A': tr(C), 'E': max(eig(C)))
    
    avg['D'|'A'|'E'][o] is the corresponding mean.

    topo[o][0] is the histogram of n_{ii}/s_{ii}.
    topo[o][1] is the histogram of n_{ij}/s_{ij} for j!=i.
    topo[o][2] is the list of connectivities of the measurement networks
    topo[o][3] is the list containing the numbers of edges that need to be 
       added to the measurement networks to make the graphs 2-edge-connected 
       (which ensures a cycle between any two nodes).
 
    o can be 'D', 'A', 'Etree', 'MSTn', 'MSTs', 'MSTv', 'cstn', 'cstv', 'csts'.
    '''
    stats = dict(D=dict(), A=dict(), E=dict())
    for s in stats:
        for o in optimalities + [ 'MSTn', 'MSTs', 'MSTv' ] + \
            [ 'cstn', 'csts', 'cstv' ]:
            stats[s][o] = np.zeros(ntimes)
    emin = -5
    emax = 2
    nbins = 2 * (emax + 1 - emin)
    bins = np.concatenate([[0], np.logspace(emin, emax, nbins)])

    # topo records the topology of the optimal measurement networks
    topo = dict([
        (o,
         [np.zeros(nbins, dtype=float),
          np.zeros(nbins, dtype=float), [], []]) for o in optimalities
    ])
    nfails = 0
    for t in xrange(ntimes):
        if constant_relative_error:
            results = dict()
            si, sij = sij_generator()
            for o in optimalities:
                if o == 'A':
                    results[o] = A_optimize_const_relative_error(si)
                elif o == 'D':
                    results[o] = D_optimize_const_relative_error(si)
                else:
                    results.update(optimize(sij, [o]))
        else:
            sij = sij_generator()
            results = optimize(sij, optimalities)
        ssum = np.sum(np.triu(sij))
        if None in results.values():
            nfails += 1
            continue
        for o in optimalities:
            n = np.array(results[o])
            n[n < 0] = 0
            nos = ssum * n / sij
            d = np.diag(nos)
            u = [
                nos[i, j] for i in xrange(n.shape[0])
                for j in xrange(i + 1, n.shape[0])
            ]
            hd, _ = np.histogram(d, bins, density=False)
            hu, _ = np.histogram(u, bins, density=False)
            topo[o][0] += hd
            topo[o][1] += hu
            nos[nos < epsilon] = 0
            gdn = nx.from_numpy_matrix(nos)
            topo[o][2].append(nx.edge_connectivity(gdn))
            topo[o][3].append(len(sorted(nx.k_edge_augmentation(gdn, 2))))

        results.update(
            dict(MSTn=MST_optimize(sij, 'n'),
                 MSTs=MST_optimize(sij, 'std'),
                 MSTv=MST_optimize(sij, 'var')))
        results.update(
            dict(cstn=const_allocation(sij, 'n'),
                 csts=const_allocation(sij, 'std'),
                 cstv=const_allocation(sij, 'var')))
        CMSTn = covariance(cvxopt.div(results['MSTn'], sij**2))
        DMSTn = np.log(linalg.det(CMSTn))
        AMSTn = np.trace(CMSTn)
        EMSTn = np.max(linalg.eig(CMSTn)[0]).real
        for o in results:
            n = results[o]
            C = covariance(cvxopt.div(n, sij**2))
            D = np.log(linalg.det(C))
            A = np.trace(C)
            E = np.max(linalg.eig(C)[0]).real
            stats['D'][o][t - nfails] = D - DMSTn
            stats['A'][o][t - nfails] = A / AMSTn
            stats['E'][o][t - nfails] = E / EMSTn

    avg = dict()
    for s in stats:
        avg[s] = dict()
        for o in stats[s]:
            stats[s][o] = stats[s][o][:ntimes - nfails]
            avg[s][o] = np.mean(stats[s][o])

    for o in optimalities:
        topo[o][0] /= (ntimes - nfails)
        topo[o][1] /= (ntimes - nfails)
    return stats, avg, topo
Example #3
0
def sparse_A_optimal_network(sij,
                             nadd=1.,
                             nsofar=None,
                             n_measure=0,
                             connectivity=2,
                             sparse_by_fluctuation=True):
    '''
    Construct a sparse A-optimal network, so that (approximately) only
    max_measure different measurements will receive resource
    allocations, while guaranteeing the given degree of connectivity.

    Args:

    sij: KxK symmetric matrix, where the measurement variance of the
    difference between i and j is proportional to s[i][j]^2 =
    s[j][i]^2, and the measurement variance of i is proportional to
    s[i][i]^2.
    nadd: float, nadd gives the additional number of samples to be collected in
    the next iteration.
    nsofar: KxK symmetric matrix, where nsofar[i,j] is the number of samples
    that has already been collected for (i,j) pair.
    n_measure: int, the number of measurements to receive allocations.  
    The actual number of measurements with non-zero allocation might exceed
    this number in order to guarantee the connectivity. If it is zero, the
    number of measurements will be determined by the connectivity requirement.
    connectivity: int, ensure that the resulting difference network is k-edge
    connected.
    sparse_by_fluctuation: bool, if True, generate the sparse network by 
    minimizing \sum_e s_e in the k-connected spanning subgraph.
    
    Return:

    KxK symmetric matrix of float, the (i,j) element of which gives the
    number of samples to be allocated to the measurement of (i,j) difference
    in the next iteration.
    
    '''
    K = sij.size[0]

    if nsofar is None:
        nsofar = np.zeros((K, K), dtype=float)
    if not sparse_by_fluctuation:
        # First, get the dense optimal network
        nij = update_A_optimal_sdp(sij, nadd, nsofar)

        def weight(i, j, epsilon=1e-10):
            n = nij[i, j]
            large = 1 / epsilon
            if n > epsilon:
                return 1. / n
            else:
                return large
    else:

        def weight(i, j):
            return sij[i, j]

    # Next, get the k-connected graph that approximately minimizes the
    # sum of 1/n_{ij}.
    G = nx.Graph()
    G.add_nodes_from(range(K))
    G.add_node('O')
    edges = []

    for i in xrange(K):
        edges.append(('O', i, weight(i, i)))
        for j in xrange(i + 1, K):
            edges.append((i, j, weight(i, j)))
    edges = list(nx.k_edge_augmentation(G, k=connectivity, partial=True))

    # Include only the edges that guarantee k-connectivity and nothing else
    only_include_measurements = set([])
    for i, j in edges:
        if 'O' == i:
            only_include_measurements.add((j, j))
        elif 'O' == j:
            only_include_measurements.add((i, i))
        else:
            if i < j:
                only_include_measurements.add((i, j))
            else:
                only_include_measurements.add((j, i))

    # If there is additional allowance for the number of measurements,
    # add the remaining ones with the largest allocations from the dense
    # network.
    if (len(only_include_measurements) < n_measure):
        indices = []
        for i in xrange(K):
            for j in xrange(i, K):
                if (i, j) in only_include_measurements:
                    continue
                heapq.heappush(indices, (weight(i, j), (i, j)))
        addition = []
        for m in xrange(n_measure - len(only_include_measurements)):
            _w, (i, j) = heapq.heappop(indices)
            addition.append((i, j))
        only_include_measurements.update(addition)

    nij = update_A_optimal_sdp(sij, nadd, nsofar, only_include_measurements)

    return nij
Example #4
0
def k_edge_augmentation(G, k, avail=None, partial=False):
    return it.starmap(e_, nx.k_edge_augmentation(G, k, avail=avail,
                                                 partial=partial))
def partial_k_edge_augmentation(G, k, avail, weight=None):
    """Finds augmentation that k-edge-connects as much of the graph as possible.

    When a k-edge-augmentation is not possible, we can still try to find a
    small set of edges that partially k-edge-connects as much of the graph as
    possible. All possible edges are generated between remaining parts.
    This minimizes the number of k-edge-connected subgraphs in the resulting
    graph and maxmizes the edge connectivity between those subgraphs.

    Parameters
    ----------
    G : NetworkX graph
       An undirected graph.

    k : integer
        Desired edge connectivity

    avail : dict or a set of 2 or 3 tuples
        For more details, see :func:`k_edge_augmentation`.

    weight : string
        key to use to find weights if ``avail`` is a set of 3-tuples.
        For more details, see :func:`k_edge_augmentation`.

    Yields
    ------
    edge : tuple
        Edges in the partial augmentation of G. These edges k-edge-connect any
        part of G where it is possible, and maximally connects the remaining
        parts. In other words, all edges from avail are generated except for
        those within subgraphs that have already become k-edge-connected.

    Notes
    -----
    Construct H that augments G with all edges in avail.
    Find the k-edge-subgraphs of H.
    For each k-edge-subgraph, if the number of nodes is more than k, then find
    the k-edge-augmentation of that graph and add it to the solution. Then add
    all edges in avail between k-edge subgraphs to the solution.

    See Also
    --------
    :func:`k_edge_augmentation`

    Example
    -------
    >>> G = nx.path_graph((1, 2, 3, 4, 5, 6, 7))
    >>> G.add_node(8)
    >>> avail = [(1, 3), (1, 4), (1, 5), (2, 4), (2, 5), (3, 5), (1, 8)]
    >>> sorted(partial_k_edge_augmentation(G, k=2, avail=avail))
    [(1, 5), (1, 8)]
    """
    def _edges_between_disjoint(H, only1, only2):
        """ finds edges between disjoint nodes """
        only1_adj = {u: set(H.adj[u]) for u in only1}
        for u, neighbs in only1_adj.items():
            # Find the neighbors of u in only1 that are also in only2
            neighbs12 = neighbs.intersection(only2)
            for v in neighbs12:
                yield (u, v)

    avail_uv, avail_w = _unpack_available_edges(avail, weight=weight, G=G)

    # Find which parts of the graph can be k-edge-connected
    H = G.copy()
    H.add_edges_from(
        ((u, v, {'weight': w, 'generator': (u, v)})
         for (u, v), w in zip(avail, avail_w)))
    k_edge_subgraphs = list(nx.k_edge_subgraphs(H, k=k))

    # Generate edges to k-edge-connect internal subgraphs
    for nodes in k_edge_subgraphs:
        if len(nodes) > 1:
            # Get the k-edge-connected subgraph
            C = H.subgraph(nodes).copy()
            # Find the internal edges that were available
            sub_avail = {
                d['generator']: d['weight']
                for (u, v, d) in C.edges(data=True)
                if 'generator' in d
            }
            # Remove potential augmenting edges
            C.remove_edges_from(sub_avail.keys())
            # Find a subset of these edges that makes the compoment
            # k-edge-connected and ignore the rest
            for edge in nx.k_edge_augmentation(C, k=k, avail=sub_avail):
                yield edge

    # Generate all edges between CCs that could not be k-edge-connected
    for cc1, cc2 in it.combinations(k_edge_subgraphs, 2):
        for (u, v) in _edges_between_disjoint(H, cc1, cc2):
            d = H.get_edge_data(u, v)
            edge = d.get('generator', None)
            if edge is not None:
                yield edge
def _augment_and_check(G, k, avail=None, weight=None, verbose=False,
                       orig_k=None, max_aug_k=None):
    """
    Does one specific augmentation and checks for properties of the result
    """
    if orig_k is None:
        try:
            orig_k = nx.edge_connectivity(G)
        except nx.NetworkXPointlessConcept:
            orig_k = 0
    info = {}
    try:
        if avail is not None:
            # ensure avail is in dict form
            avail_dict = dict(zip(*_unpack_available_edges(avail,
                                                           weight=weight)))
        else:
            avail_dict = None
        try:
            # Find the augmentation if possible
            generator = nx.k_edge_augmentation(G, k=k, weight=weight,
                                               avail=avail)
            assert_false(isinstance(generator, list),
                         'should always return an iter')
            aug_edges = []
            for edge in generator:
                aug_edges.append(edge)
        except nx.NetworkXUnfeasible:
            infeasible = True
            info['infeasible'] = True
            assert_equal(len(aug_edges), 0,
                         'should not generate anything if unfeasible')

            if avail is None:
                n_nodes = G.number_of_nodes()
                assert_less_equal(n_nodes, k, (
                    'unconstrained cases are only unfeasible if |V| <= k. '
                    'Got |V|={} and k={}'.format(n_nodes, k)
                ))
            else:
                if max_aug_k is None:
                    G_aug_all = G.copy()
                    G_aug_all.add_edges_from(avail_dict.keys())
                    try:
                        max_aug_k = nx.edge_connectivity(G_aug_all)
                    except nx.NetworkXPointlessConcept:
                        max_aug_k = 0

                assert_less(max_aug_k, k, (
                    'avail should only be unfeasible if using all edges '
                    'doesnt acheive k-edge-connectivity'))

            # Test for a partial solution
            partial_edges = list(nx.k_edge_augmentation(
                G, k=k, weight=weight, partial=True, avail=avail))

            info['n_partial_edges'] = len(partial_edges)

            if avail_dict is None:
                assert_equal(set(partial_edges), set(complement_edges(G)), (
                    'unweighted partial solutions should be the complement'))
            elif len(avail_dict) > 0:
                H = G.copy()

                # Find the partial / full augmented connectivity
                H.add_edges_from(partial_edges)
                partial_conn = nx.edge_connectivity(H)

                H.add_edges_from(set(avail_dict.keys()))
                full_conn = nx.edge_connectivity(H)

                # Full connectivity should be no better than our partial
                # solution.
                assert_equal(partial_conn, full_conn,
                             'adding more edges should not increase k-conn')

            # Find the new edge-connectivity after adding the augmenting edges
            aug_edges = partial_edges
        else:
            infeasible = False

        # Find the weight of the augmentation
        num_edges = len(aug_edges)
        if avail is not None:
            total_weight = sum([avail_dict[e] for e in aug_edges])
        else:
            total_weight = num_edges

        info['total_weight'] = total_weight
        info['num_edges'] = num_edges

        # Find the new edge-connectivity after adding the augmenting edges
        G_aug = G.copy()
        G_aug.add_edges_from(aug_edges)
        try:
            aug_k = nx.edge_connectivity(G_aug)
        except nx.NetworkXPointlessConcept:
            aug_k = 0
        info['aug_k'] = aug_k

        # Do checks
        if not infeasible and orig_k < k:
            assert_greater_equal(info['aug_k'], k, (
                'connectivity should increase to k={} or more'.format(k)))

        assert_greater_equal(info['aug_k'], orig_k, (
            'augmenting should never reduce connectivity'))

        _assert_solution_properties(G, aug_edges, avail_dict)

    except Exception:
        info['failed'] = True
        print('edges = {}'.format(list(G.edges())))
        print('nodes = {}'.format(list(G.nodes())))
        print('aug_edges = {}'.format(list(aug_edges)))
        print('info  = {}'.format(info))
        raise
    else:
        if verbose:
            print('info  = {}'.format(info))

    if infeasible:
        aug_edges = None
    return aug_edges, info
Example #7
0
def _augment_and_check(G,
                       k,
                       avail=None,
                       weight=None,
                       verbose=False,
                       orig_k=None,
                       max_aug_k=None):
    """
    Does one specific augmentation and checks for properties of the result
    """
    if orig_k is None:
        try:
            orig_k = nx.edge_connectivity(G)
        except nx.NetworkXPointlessConcept:
            orig_k = 0
    info = {}
    try:
        if avail is not None:
            # ensure avail is in dict form
            avail_dict = dict(
                zip(*_unpack_available_edges(avail, weight=weight)))
        else:
            avail_dict = None
        try:
            # Find the augmentation if possible
            generator = nx.k_edge_augmentation(G,
                                               k=k,
                                               weight=weight,
                                               avail=avail)
            assert not isinstance(generator,
                                  list), 'should always return an iter'
            aug_edges = []
            for edge in generator:
                aug_edges.append(edge)
        except nx.NetworkXUnfeasible:
            infeasible = True
            info['infeasible'] = True
            assert len(
                aug_edges) == 0, 'should not generate anything if unfeasible'

            if avail is None:
                n_nodes = G.number_of_nodes()
                assert n_nodes <= k, (
                    'unconstrained cases are only unfeasible if |V| <= k. '
                    f'Got |V|={n_nodes} and k={k}')
            else:
                if max_aug_k is None:
                    G_aug_all = G.copy()
                    G_aug_all.add_edges_from(avail_dict.keys())
                    try:
                        max_aug_k = nx.edge_connectivity(G_aug_all)
                    except nx.NetworkXPointlessConcept:
                        max_aug_k = 0

                assert max_aug_k < k, (
                    'avail should only be unfeasible if using all edges '
                    'does not achieve k-edge-connectivity')

            # Test for a partial solution
            partial_edges = list(
                nx.k_edge_augmentation(G,
                                       k=k,
                                       weight=weight,
                                       partial=True,
                                       avail=avail))

            info['n_partial_edges'] = len(partial_edges)

            if avail_dict is None:
                assert set(partial_edges) == set(complement_edges(G)), (
                    'unweighted partial solutions should be the complement')
            elif len(avail_dict) > 0:
                H = G.copy()

                # Find the partial / full augmented connectivity
                H.add_edges_from(partial_edges)
                partial_conn = nx.edge_connectivity(H)

                H.add_edges_from(set(avail_dict.keys()))
                full_conn = nx.edge_connectivity(H)

                # Full connectivity should be no better than our partial
                # solution.
                assert partial_conn == full_conn, 'adding more edges should not increase k-conn'

            # Find the new edge-connectivity after adding the augmenting edges
            aug_edges = partial_edges
        else:
            infeasible = False

        # Find the weight of the augmentation
        num_edges = len(aug_edges)
        if avail is not None:
            total_weight = sum([avail_dict[e] for e in aug_edges])
        else:
            total_weight = num_edges

        info['total_weight'] = total_weight
        info['num_edges'] = num_edges

        # Find the new edge-connectivity after adding the augmenting edges
        G_aug = G.copy()
        G_aug.add_edges_from(aug_edges)
        try:
            aug_k = nx.edge_connectivity(G_aug)
        except nx.NetworkXPointlessConcept:
            aug_k = 0
        info['aug_k'] = aug_k

        # Do checks
        if not infeasible and orig_k < k:
            assert info['aug_k'] >= k, (
                f'connectivity should increase to k={k} or more')

        assert info['aug_k'] >= orig_k, (
            'augmenting should never reduce connectivity')

        _assert_solution_properties(G, aug_edges, avail_dict)

    except Exception:
        info['failed'] = True
        print(f"edges = {list(G.edges())}")
        print(f"nodes = {list(G.nodes())}")
        print(f"aug_edges = {list(aug_edges)}")
        print(f"info  = {info}")
        raise
    else:
        if verbose:
            print(f'info  = {info}')

    if infeasible:
        aug_edges = None
    return aug_edges, info
Example #8
0
def scale(input_file,
          output_file,
          scale_factor,
          bridges=0.1,
          sampling_factor=0.5,
          precision=0.95,
          connect=False,
          stitching_type="all-to-all",
          merge_nfs=False,
          verbose=True):
    if verbose and mpi.rank == 0:
        print("""
  ______  ______ _______  _____  _     _      _______ _______ _______        _______  ______
 |  ____ |_____/ |_____| |_____] |_____|      |______ |       |_____| |      |______ |_____/
 |_____| |    \_ |     | |       |     |      ______| |_____  |     | |_____ |______ |    \_ |0.1|
""")
        print(
            "▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬"
        )
        print(
            "Scale factor: {}. Bridges: {}%. Precision: {}%. Sampling factor: {}. Connect: {}. Stitching:{}. NFS: {}"
            .format(scale_factor, bridges * 100, precision * 100,
                    sampling_factor, connect, stitching_type, merge_nfs), )
        print(
            "▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬"
        )
        print()

    total_t = time.time()
    # Step X: Parse sticthing type and check if valid
    stitching_type = StitchType.parse_type(stitching_type)

    # Step X: Read distribute edges and load graph
    loading_t = time.time()
    edges, partition_map, total_nodes = distributor.distribute_edges(
        input_file)
    graph = util.load_graph_from_edges(edges)
    edges = None  # Free memory
    if verbose and mpi.rank == 0:
        print("=================================")
        print("Loading time:", round(time.time() - loading_t, 2), "seconds")
        print("=================================")
    # Step X: Calculate weights (how many % of the nodes to sample) for each node
    nodes_amount = mpi.comm.alltoall([graph.number_of_nodes()] * mpi.size)
    weights = list(
        map(lambda a: ceil(a / sum(nodes_amount) * 100) / 100.0, nodes_amount))
    # Step X: Split factor into sample rounds
    sampling_factor = min(sampling_factor, scale_factor)
    factors = [
        sampling_factor for _ in range(int(scale_factor / sampling_factor))
    ]
    remaining_factor = scale_factor - round(sum(factors), 2)
    if remaining_factor:
        factors.append(remaining_factor)
    # Step X: Run distributed sampling
    samples = []
    for i, factor in enumerate(factors):
        sampling_t = time.time()
        samples.append(
            sampler.sample(graph, int(total_nodes * factor), weights[mpi.rank],
                           partition_map, precision))
        if verbose and mpi.rank == 0:
            print("Sampling time {}/{}:".format(i + 1, len(factors)),
                  round(time.time() - sampling_t, 2), "seconds")
    if verbose and mpi.rank == 0:
        print("=================================")
    # Step X: Connect the graph
    if connect:
        connecting_t = time.time()
        for sample in samples:
            sample.add_edges_from(nx.k_edge_augmentation(nx.Graph(sample), 1))
        if verbose and mpi.rank == 0:
            print("Connecting time:", round(time.time() - connecting_t, 2),
                  "seconds")
            print("=================================")
    # Step X: Rename vertices
    relabeling_t = time.time()
    util.relabel_samples(samples)
    if verbose and mpi.rank == 0:
        print("Relabeling time:", round(time.time() - relabeling_t, 2),
              "seconds")
        print("=================================")
    # Step X: Stitch samples locally and distributively
    stitching_t = time.time()
    stitcher.stitch_samples(samples, bridges, stitching_type)
    if verbose and mpi.rank == 0:
        print("Stiching time:", round(time.time() - stitching_t, 2), "seconds")
        print("=================================")
    # Step X: Merge distributed samples into master file
    dumping_t = time.time()
    merger.merge_samples(samples, output_file, merge_nfs)
    if verbose and mpi.rank == 0:
        print("Dumping time:", round(time.time() - dumping_t, 2), "seconds")
        print("=================================")
        print()
        print("▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬")

    if verbose and mpi.rank == 0:
        print("▬▬▬", "Total time:", round(time.time() - total_t, 2), "seconds",
              "▬▬▬▬")
        print("▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬▬")
Example #9
0
def build_graph_from_strings(args):

    if args.graph_degree >= args.graph_size:
        raise ValueError(
            "Requested graph degree %s larger than graph size %s" %
            (args.graph_degree, args.model))

    if not os.path.isfile(args.input_data):
        raise ValueError("Input file %s doesn't exist" % args.input_data)

    tiling_grammar = grammar.TilingGrammar([])
    if os.path.isfile(args.grammar):
        tiling_grammar.load(args.grammar)
    else:
        raise ValueError("Grammar file %s doesn't exist" % args.grammar)

    if TREE_GRAMMAR:
        tiling_grammar.convert_to_tree_grammar()

    data = pandas.read_hdf(args.input_data, 'table')
    print("Number of SMILES strings: ", len(data))

    if args.graph_size <= len(data):
        data = data.sample(n=args.graph_size)

    words = data["structure"]

    tmp_ids = data.index.tolist()
    selected_ids = [int(x) for x in tmp_ids]

    # setup toolbar
    sys.stdout.write("Inserting graph nodes [%s]" % (" " * 10))
    sys.stdout.flush()
    sys.stdout.write("\b" * (10 + 1))  # return to start of line, after '['

    search_graph = nx.Graph()
    #graph nodes
    search_graph.add_nodes_from(selected_ids)
    #graph edges
    for i, idx in enumerate(selected_ids):
        if i % (len(selected_ids) / 10) == len(selected_ids) / 10 - 1:
            sys.stdout.write("#")
            sys.stdout.flush()
        #add an edge to each similar word
        for j, idy in enumerate(selected_ids):
            if tiling_grammar.similar_words(words[idx], words[idy]):
                search_graph.add_edge(idx, idy, weight=0.0)

        #connect to k-nearest points in "string" space
        dist_id_pairs = []
        for j in range(len(selected_ids)):
            idy = selected_ids[j]
            if idx == idy:
                continue
            dist = tiling_grammar.word_similarity(words[idx], words[idy])
            dist_id_pairs.append((dist, idy))
            if len(dist_id_pairs) % args.graph_degree == 0:
                dist_id_pairs = sorted(dist_id_pairs)
                dist_id_pairs = dist_id_pairs[:args.graph_degree]

        dist_id_pairs = sorted(dist_id_pairs)
        dist_id_pairs = dist_id_pairs[:args.graph_degree]

        for d, idy in dist_id_pairs:
            similarity = tiling_grammar.word_similarity(words[idx], words[idy])
            search_graph.add_edge(idx, idy, weight=similarity)

    sys.stdout.write("\n")

    print("number of connected components before augmentation: ",
          nx.number_connected_components(search_graph))

    complement = list(nx.k_edge_augmentation(search_graph, k=1, partial=True))
    for (n_i, n_j) in complement:
        similarity = tiling_grammar.word_similarity(words[int(n_i)],
                                                    words[int(n_j)])
        search_graph.add_edge(n_i, n_j, weight=similarity)

    nx.write_graphml(search_graph, args.latent_graph)
Example #10
0
def build_latent_graph(args):

    if args.graph_degree >= args.graph_size:
        raise ValueError(
            "Requested graph degree %s larger than graph size %s" %
            (args.graph_degree, args.model))

    if not os.path.isfile(args.input_data):
        raise ValueError("Input file %s doesn't exist" % args.input_data)

    model, tiling_grammar, latent_data, charset = load_input(args)

    permuted_ids = np.random.permutation(len(latent_data))
    selected_ids = []
    words = []

    # setup toolbar
    sys.stdout.write("Decoding samples [%s]" % (" " * 10))
    sys.stdout.flush()
    sys.stdout.write("\b" * (10 + 1))  # return to start of line, after '['

    for i, idx in enumerate(permuted_ids):
        if i % (len(permuted_ids) / 10) == len(permuted_ids) / 10 - 1:
            sys.stdout.write("#")
            sys.stdout.flush()
        decoded_data = model.decoder.predict(latent_data[idx].reshape(
            1, args.latent_dim)).argmax(axis=2)[0]
        word = decode_smiles_from_indexes(decoded_data, charset)

        if not tiling_grammar.check_word(word):
            continue

        selected_ids.append(idx)
        words.append(word)
        if len(selected_ids) >= args.graph_size:
            break

    sys.stdout.write("\n")

    # setup toolbar
    sys.stdout.write("Inserting graph nodes [%s]" % (" " * 10))
    sys.stdout.flush()
    sys.stdout.write("\b" * (10 + 1))  # return to start of line, after '['

    search_graph = nx.Graph()
    #graph nodes
    search_graph.add_nodes_from(selected_ids)
    #graph edges
    for i, idx in enumerate(selected_ids):
        if i % (len(selected_ids) / 10) == len(selected_ids) / 10 - 1:
            sys.stdout.write("#")
            sys.stdout.flush()
        #add an edge to each similar word
        for j, idy in enumerate(selected_ids):
            if tiling_grammar.similar_words(words[i], words[j]):
                search_graph.add_edge(idx, idy, weight=0.0)

        #connect to k-nearest points in latent space
        dist_id_pairs = []
        for j in range(len(selected_ids)):
            dist = np.linalg.norm(latent_data[selected_ids[i]] -
                                  latent_data[selected_ids[j]])
            dist_id_pairs.append((dist, j))
            if len(dist_id_pairs) % args.graph_degree == 0:
                dist_id_pairs = sorted(dist_id_pairs)
                dist_id_pairs = dist_id_pairs[:args.graph_degree]

        dist_id_pairs = sorted(dist_id_pairs)
        dist_id_pairs = dist_id_pairs[:args.graph_degree]

        for d, j in dist_id_pairs:
            similarity = tiling_grammar.word_similarity(words[i], words[j])
            idy = selected_ids[j]
            search_graph.add_edge(idx, idy, weight=similarity)

    sys.stdout.write("\n")

    print("number of connected components before augmentation: ",
          nx.number_connected_components(search_graph))

    complement = list(nx.k_edge_augmentation(search_graph, k=1, partial=True))
    if len(complement) >= 2:
        for (n_i, n_j) in complement:
            decoded_data_i = model.decoder.predict(
                latent_data[int(n_i)].reshape(
                    1, args.latent_dim)).argmax(axis=2)[0]
            word_i = decode_smiles_from_indexes(decoded_data_i, charset)

            decoded_data_j = model.decoder.predict(
                latent_data[int(n_j)].reshape(
                    1, args.latent_dim)).argmax(axis=2)[0]
            word_j = decode_smiles_from_indexes(decoded_data_j, charset)

            similarity = tiling_grammar.word_similarity(word_i, word_j)
            search_graph.add_edge(n_i, n_j, weight=similarity)

    nx.write_graphml(search_graph, args.latent_graph)
Example #11
0
def graph_creator():
    results = xlwt.Workbook(encoding="utf-8")
    sheet1 = results.add_sheet('Community_Robustness')
    sheet2 = results.add_sheet('Experience_Coverage')
    sheet3 = results.add_sheet('Degree_Coverage')

    col = 0
    row = 0
    for i in range(0, len(Sheet_first_row)):
        sheet1.write(row, col, str(Sheet_first_row[i]))
        col += 1

    col = 0
    for i in range(0, len(perc_exp_sheet)):
        sheet2.write(row, col, str(perc_exp_sheet[i]))
        sheet3.write(row, col, str(perc_exp_sheet[i]))
        col += 1

    for db in db_list:
        com = 0
        isu = 0
        row += 1
        try:
            count = db.count()
            if count > 0:
                main_entry = db.find()[count - 1]
                repo_name = main_entry.get('name')
                repo_owner = main_entry.get('owner')
                repo_owner_fc = main_entry.get('owner_followers_count')
                repo_issues_count = main_entry.get('statistics').get('total_issues')
                repo_issues_comments_count = main_entry.get('statistics').get('total_issues_comments')
                repo_stars = main_entry.get('popularity').get('stars')
                repo_contributors = main_entry.get('contributors_count')

                if repo_contributors:
                    comments_list = []
                    # main loop
                    i = 0
                    for e in db.find():
                        type = e.get('type')
                        if not e.get('contributors_count') and type != 'Commit':
                            issue_number = e.get('issue_number')
                            author = e.get('author')
                            followers = e.get('author_followers_count')

                            if type == 'IssueOpened':
                                isu += 1
                                comments = e.get('comments_count')
                                graph.add_node(issue_number, name=issue_number, n_type=type, author=author, afc=followers, comments_count=comments)
                        # i += 1
                        # if i>10:
                        #     break

                    #2ndloop
                    i = 0
                    for e in db.find():
                        type = e.get('type')
                        if not e.get('contributors_count') and type != 'Commit':
                            issue_number = e.get('issue_number')
                            author = e.get('author')
                            followers = e.get('author_followers_count')
                            if type == 'Comment':
                                if author not in comments_list:
                                    com += 1
                                    comments_list.append(author)
                                    graph.add_node(author, name=author, n_type=type, author=author, afc=followers)
                                if graph.has_edge(issue_number, author):
                                    graph[issue_number][author]['weight'] += 1
                                else:
                                    graph.add_edge(issue_number, author, weight=1)
                        # i += 1
                        # if i>10:
                        #     break

                    popularities = []
                    degrees = []
                    people = []
                    connections = []
                    for n in graph.nodes():
                        node = graph.nodes[n]
                        node_type = node['n_type']

                        if node_type != 'IssueOpened':

                            if graph.degree[n] > 1:
                                people.append(node['afc'])
                                connections.append(graph.degree[n])
                            popularities.append(node['afc'])
                            degrees.append(graph.degree(weight='weight')[n])

                    r = coeff(popularities, degrees)
                    r_val = r[2]
                    p_val = r[3]

                    # K-Aug connectivity
                    needed_edges_to_connect = len(sorted(nx.k_edge_augmentation(graph, k=1)))
                    edges_to_connect_full_graph = isu - 1
                    edges = len(graph.edges())
                    community_score = 1.00 - (needed_edges_to_connect/float(edges_to_connect_full_graph))
                    # community_score = 1.00 - (needed_edges_to_connect/float(edges))

                    info = [
                        repo_name,
                        repo_owner,
                        repo_owner_fc,
                        repo_issues_count,
                        repo_issues_comments_count,
                        repo_stars,
                        repo_contributors,
                        r_val,
                        p_val,
                        community_score
                    ]

                    col = 0
                    for i in range(0, len(Sheet_first_row)):
                        sheet1.write(row, col, str(info[i]))
                        col += 1

                    sum_connection = sum(connections)
                    sum_people = sum(people)

                    sorted_people_connections = sorted(zip(people, connections))
                    # print sorted_people_connections

                    sorted_connections_people = sorted(zip(connections, people))
                    # print sorted_connections_people

                    experience_coverage = []
                    degree_coverage = []

                    for kk in range(0, 10):
                        indexx = int(len(people) * (1-perc_exp[kk]))
                        sum_exp = 0
                        sum_deg = 0
                        if indexx > 0:
                            indexx -= 1
                        for jj in range(indexx, len(people)):
                            sum_exp += sorted_people_connections[jj][1]
                            sum_deg += sorted_connections_people[jj][1]

                        experience_coverage.append(sum_exp / float(sum_connection))
                        degree_coverage.append(sum_deg / float(sum_people))

                    experience_coverage_sheet = [repo_name,repo_issues_count,repo_stars,repo_contributors]
                    degree_coverage_sheet = [repo_name,repo_issues_count,repo_stars,repo_contributors]

                    for i in range(0, len(experience_coverage)):
                        experience_coverage_sheet.append(experience_coverage[i])
                        degree_coverage_sheet.append(degree_coverage[i])

                    col = 0
                    for i in range(0, len(perc_exp_sheet)):
                        sheet2.write(row, col, str(experience_coverage_sheet[i]))
                        sheet3.write(row, col, str(degree_coverage_sheet[i]))
                        col += 1

                    print repo_name
                    nx.write_graphml(graph, "/Users/Abduljaleel/Desktop/net1.graphml")
                    graph.clear()
                    break
        except Exception as er:
            graph.clear()
            print er.message
Example #12
0
def make_network(graph_name="Network",
                 total_size=100,
                 max_neighbors=4,
                 hub_depth=5,
                 k=1):
    G = nx.Graph(name=graph_name + "_" +
                 str(datetime.timestamp(datetime.now())))

    # Generate hubs to serve as the backbone of the network
    G, index = add_hub(G,
                       index=0,
                       x_range=(0, total_size),
                       y_range=(0, total_size),
                       hub_depth=hub_depth)

    # Create nodes with random x and y coordinate attributes
    for n in range(0, total_size):
        if str(n) not in G.nodes():
            G.add_node(str(n),
                       coordinates=(np.random.randint(0, total_size),
                                    np.random.randint(0, total_size)),
                       hub=0,
                       perfsonar=False)

    # Ensure nodes are sorted to before distances are calculated to allow indexing of source-destination pairs
    node_coordinates = sorted(nx.get_node_attributes(G, 'coordinates').items(),
                              key=lambda x: int(x[0]))

    # https://stackoverflow.com/questions/54732086/finding-euclidean-distance-between-all-pair-of-points
    distances = np.array(euclidean_distances([c[1] for c in node_coordinates]))

    distances_dict = {}
    for i in range(len(node_coordinates)):
        temp_dict = {}
        for j in range(len(node_coordinates)):
            temp_dict[node_coordinates[j][0]] = distances[i][j]
        distances_dict[node_coordinates[i][0]] = temp_dict.copy()

    # https://stackoverflow.com/questions/16817948/i-have-need-the-n-minimum-index-values-in-a-numpy-array
    # This only works because we are numbering nodes sequentially from 0 (i.e., their key and their index is the same)
    closest_neighbors = [
        list(arr.argsort()[1:max_neighbors + 1]) for arr in distances
    ]

    closest_neighbors_dict = {}
    for i in range(len(node_coordinates)):
        closest_neighbors_dict[node_coordinates[i][0]] = [
            str(n) for n in closest_neighbors[i]
        ]

    for node in list(G.nodes()):
        neighbors = closest_neighbors_dict[node][:np.random.
                                                 randint(1, max_neighbors + 1)]
        for neighbor in neighbors:
            add_detailed_edge(G, (node, neighbor))

    for edge in nx.k_edge_augmentation(G, k):
        add_detailed_edge(G, (edge[0], edge[1]))

    G = save_network(G)
    return G