Example #1
0
def split_subslice_into_putative_modules(G_optimized, improvement_delta, modularity_score_objective, best_modularity):
    cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)]
    cur_modularity = modularity(G_optimized, cur_components, weight='weight')
    if cur_modularity >= modularity_score_objective:
        return True, best_modularity

        if len(n_nodes) < 4:
            G_optimized.remove_nodes_from(n_nodes)

    cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)]
    if len(cur_components) == 0:
        return True, best_modularity

    optimized_connected_components = girvan_newman(G_optimized)
    cur_components = sorted(next(optimized_connected_components))
    cur_modularity = modularity(G_optimized, cur_components, weight='weight')
    if cur_modularity <= best_modularity + improvement_delta:
        return True, best_modularity

    else:
        optimal_components = cur_components

        edges_to_remove = []
        for cur_edge in G_optimized.edges:
            included = False
            for n_nodes in optimal_components:
                if cur_edge[0] in n_nodes and cur_edge[1] in n_nodes:
                    included = True
            if not included:
                edges_to_remove.append(cur_edge)

        G_optimized.remove_edges_from(edges_to_remove)

        return False, cur_modularity
Example #2
0
def compute_molecule(universe):
    '''
    Cluster atoms into molecules.

    The algorithm is to create a network graph containing every atom (in every
    frame as nodes and bonds as edges). Using this connectivity information,
    one can perform a (breadth first) traversal of the network graph to cluster
    all nodes (whose indices correspond to physical atoms).

    Args:
        universe (:class:`~exatomic.universe.Universe`): Atomic universe

    Returns:
        objs (tuple): Molecule indices (for atom dataframe(s)) and molecule dataframe

    Warning:
        This function will modify (in place) a few tables of the universe!
    '''
    if 'bond_count' not in universe.atom:    # The bond count is used to find single atoms;
        universe.compute_bond_count()        # single atoms are treated as molecules.
    b0 = None
    b1 = None
    bonded = universe.two[universe.two['bond'] == True]
    if universe.is_periodic:
        mapper = universe.projected_atom['atom']
        b0 = bonded['prjd_atom0'].map(mapper)
        b1 = bonded['prjd_atom1'].map(mapper)
    else:
        b0 = bonded['atom0']
        b1 = bonded['atom1']
    graph = Graph()
    graph.add_edges_from(zip(b0.values, b1.values))
    mapper = {}
    for i, molecule in enumerate(connected_components(graph)):
        for atom in molecule:
            mapper[atom] = i
    n = 1
    if len(mapper.values()) > 0:
        n += max(mapper.values())
    else:
        n -= 1
    idxs = universe.atom[universe.atom['bond_count'] == 0].index
    for i, index in enumerate(idxs):
        mapper[index] = i + n
    # Set the molecule indices
    universe.atom['molecule'] = universe.atom.index.map(lambda idx: mapper[idx])
    # Now compute molecule table
    universe.atom['mass'] = universe.atom['symbol'].map(symbol_to_element_mass)
    # The coordinates of visual_atom represent grouped molecules for
    # periodic calculations and absolute coordinates for free boundary conditions.
    molecules = universe.atom.groupby('molecule')
    molecule = molecules['symbol'].value_counts().unstack().fillna(0).astype(np.int64)
    molecule.columns.name = None
    molecule['frame'] = universe.atom.drop_duplicates('molecule').set_index('molecule')['frame']
    molecule['mass'] = molecules['mass'].sum()
    del universe.atom['mass']
    frame = universe.atom[['molecule', 'frame']].drop_duplicates('molecule')
    frame = frame.set_index('molecule')['frame'].astype(np.int64)
    molecule['frame'] = frame.astype('category')
    return Molecule(molecule)
Example #3
0
    def helper2(G):
        T = nx.minimum_spanning_tree(G)
        curr_lowest = average_pairwise_distance(T)
        curr_lowest_tree = T

        S = min_weighted_dominating_set(T)

        newG = nx.subgraph(T, S)

        ncc = nx.number_connected_components(newG)
        ccs = list(connected_components(newG))

        for i in range(len(ccs) - 1):
            curr_node = ccs[i].pop()
            ccs[i].add(curr_node)
            next_node = ccs[i + 1].pop()
            ccs[i + 1].add(next_node)
            path = nx.dijkstra_path(G, curr_node, next_node)

            for n in path:
                if (n not in list(newG.nodes)):
                    S.add(n)

            newG = nx.subgraph(G, S)
            newT = nx.minimum_spanning_tree(newG)
            if (is_valid_network(G, newT)):
                apd = average_pairwise_distance(newT)
                if (apd < curr_lowest):
                    curr_lowest = apd
                    curr_lowest_tree = newT

        return curr_lowest_tree
Example #4
0
def is_bipartite_node_set(G, nodes):
    """Returns True if nodes and G/nodes are a bipartition of G.

    Parameters
    ----------
    G : NetworkX graph

    nodes: list or container
      Check if nodes are a one of a bipartite set.

    Examples
    --------
    >>> from networkx.algorithms import bipartite
    >>> G = nx.path_graph(4)
    >>> X = set([1, 3])
    >>> bipartite.is_bipartite_node_set(G, X)
    True

    Notes
    -----
    For connected graphs the bipartite sets are unique.  This function handles
    disconnected graphs.
    """
    S = set(nodes)
    for CC in (G.subgraph(c).copy() for c in connected_components(G)):
        X, Y = sets(CC)
        if not ((X.issubset(S) and Y.isdisjoint(S)) or
                (Y.issubset(S) and X.isdisjoint(S))):
            return False
    return True
Example #5
0
def approximate_steiner(graph, terminals):
    steiner_tree = nx.Graph()
    num_terminals = len(terminals)
    all_terminal_paths = list()
    for i in range(0, num_terminals):
        for j in range(i + 1, num_terminals):
            paths = get_paths(graph, terminals[i], terminals[j])
            least_cost_path, least_cost = get_least_cost_path(graph, paths)
            path = dict()
            path['cost'] = least_cost
            path['path'] = least_cost_path
            print "Path" + str(path['path'])
            all_terminal_paths.append(path)

    all_terminal_paths.sort(key=lambda x: x['cost'])
    for t_path in all_terminal_paths:
        steiner_tree.add_path(t_path['path'])
        if check_terminals_connected(steiner_tree, terminals):
            break
    conn_components = list(comp.connected_components(steiner_tree))
    while len(conn_components) > 1:
        comp1 = conn_components[0]
        comp2 = conn_components[1]
        for j in range(0, len(comp1)):
            for k in range(0, len(comp2)):
                if graph.has_edge(comp1[j], comp2[k]):
                    steiner_tree.add_edge(comp1[j], comp2[k])
                    break
        conn_components = list(comp.connected_components(steiner_tree))

    while True:
        try:
            cycle = nx.find_cycle(steiner_tree)
            print('Cycle found')
            edge = cycle[0]
            steiner_tree.remove_edge(edge[0], edge[1])
        except:
            break
    weights = nx.get_node_attributes(graph, 'weight')
    steiner_cost = 0
    for node in list(steiner_tree.nodes):
        steiner_cost = steiner_cost + weights[node]
    return steiner_tree, steiner_cost
Example #6
0
def retain_relevant_slices(G_original, module_sig_th):
    global G_modularity

    pertubed_nodes = []
    for cur_node in G_modularity.nodes():
        if G_modularity.nodes[cur_node]["pertubed_node"]:
            pertubed_nodes.append(cur_node)

    ccs = [
        G_modularity.subgraph(c) for c in connected_components(G_modularity)
    ]
    params = []
    p = multiprocessing.Pool(constants.N_OF_THREADS)
    n_G_original = len(G_original)
    n_pertubed_nodes = len(pertubed_nodes)
    pertubed_nodes_in_ccs = []
    print(f"number of slices: {len(list(ccs))}")
    for i_cur_cc, cur_cc in enumerate(ccs):
        pertubed_nodes_in_ccs.append(
            len([
                cur_node for cur_node in cur_cc
                if G_modularity.nodes[cur_node]["pertubed_node"]
            ]))
    perturbation_factor = min(0.7, (float(n_pertubed_nodes) / n_G_original) *
                              (1 + 100 / n_G_original**0.5))

    for i_cur_cc, cur_cc in enumerate(ccs):
        params.append([
            n_G_original, cur_cc, i_cur_cc, n_pertubed_nodes,
            perturbation_factor
        ])

    res = [a for a in p.map(pf_filter, params) if a is not None]
    print(f'# of slices after perturbation TH: {len(res)}/{len(params)}')
    p.close()
    if len(res) == 0:
        return nx.Graph(), [], []
    large_modules, sig_scores = zip(*res)
    fdr_bh_results = fdrcorrection0(sig_scores,
                                    alpha=module_sig_th,
                                    method='indep',
                                    is_sorted=False)

    # print(fdr_bh_results)
    # print(f'min: {min(list(fdr_bh_results[1]))}')
    passed_modules = [
        cur_cc
        for cur_cc, is_passed_th in zip(large_modules, fdr_bh_results[0])
        if is_passed_th
    ]
    return nx.algorithms.operators.union_all(passed_modules) if len(passed_modules) > 0 else nx.Graph(), [list(m.nodes)
                                                                                                          for m in
                                                                                                          passed_modules], \
           fdr_bh_results[1]
Example #7
0
def get_diameter(graph):
    networkx_graph = to_networkx(graph).to_undirected()

    sub_graph_list = [
        networkx_graph.subgraph(c)
        for c in connected_components(networkx_graph)
    ]
    sub_graph_diam = []
    for sub_g in sub_graph_list:
        sub_graph_diam.append(diameter(sub_g))
    return max(sub_graph_diam)
Example #8
0
def kdconnect(root_nodes, trees=None, tol=0.75):
    from networkx.algorithms.components import connected_components
    import networkx as nx

    cnt1, cnt2 = 0, 0
    # make trees if they were not added before
    if trees is None:
        trees = []
        for node in root_nodes:
            tr = gp.KDTreeIndex(fwd=True, bkwd=True)(node)
            trees.append(tr)

    # get a node count for validation
    for node in root_nodes:
        cnt1 += len(list(node.__iter__(fwd=True, bkwd=True)))

    gg = nx.Graph()
    for i in range(len(trees)):
        ti = trees[i]
        for j in range(i+1, len(trees)):
            tj = trees[j]
            res_ij = ti.query_ball_tree(tj, tol)
            adj = [(di, x) for di, x in enumerate(res_ij) if len(x) > 0]
            if any(adj):
                # unique indicies from tree_i and tree_j
                un_tix = np.unique([i for i, j in adj])
                un_tjx = np.unique([j for i, j in adj])
                # closest points between unique positions
                dists = distance.cdist(ti.data[un_tix], tj.data[un_tjx])
                armi = np.unravel_index(np.argmin(dists, axis=None), dists.shape)

                # retrieve node ids from respective trees, and connect
                node_id1 = ti[un_tix[armi[0]]]
                node_id2 = tj[un_tjx[armi[1]]]
                ndi = gutil.node_with_id(root_nodes[i], node_id1)
                ndj = gutil.node_with_id(root_nodes[j], node_id2)
                ndi.connect_to(ndj, is_pipe=True)
                # add root indexes to component graph
                gg.add_edge(i, j)

    final_roots = []
    # gather new root nodes using connected_component algorithm
    for component in connected_components(gg):
        # pick a random index from the set ...
        ix = list(component)[0]
        a_root = root_nodes[ix]
        cnt2 += len(list(a_root.__iter__(fwd=True, bkwd=True)))
        final_roots.append(a_root)

    # sanity check - number of nodes should not have changed
    assert cnt1 == cnt2, 'unequal number of nodes before and after merge'
    return final_roots
Example #9
0
def get_communities_fluid(G):
    connected_components = components.connected_components(G)
    modules = []
    min_size = 50
    coef = 1. / min_size
    for component in connected_components:
        if len(component) < min_size:
            modules = modules + [component]
            continue
        k = int(np.ceil(coef * len(G.node)))
        modules = modules + list(
            community.asyn_fluidc(G.subgraph(component), k, seed=123))
    return modules
Example #10
0
def get_putative_modules(G,
                         full_G=None,
                         improvement_delta=0,
                         modularity_score_objective=1,
                         module_threshold=0.05,
                         n_cc=1.0):
    """"""

    if full_G == None:
        full_G = G
    G_optimized = G.copy()

    # clean subslice from cycles and isolated nodes
    G_optimized.remove_edges_from(list(nx.selfloop_edges(G_optimized)))
    G_optimized.remove_nodes_from(list(nx.isolates(G_optimized)))

    # check subslice enrichment for active nodes
    pertubed_nodes = [
        cur_node for cur_node in full_G.nodes
        if full_G.nodes[cur_node]["pertubed_node"]
    ]
    pertubed_nodes_in_cc = [
        n for n in G_optimized.nodes if G_optimized.nodes[n]["pertubed_node"]
    ]
    n_nodes = list(G_optimized.nodes)
    sig_score = hypergeom.sf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes),
                             len(n_nodes)) \
                + hypergeom.pmf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes),
                                len(n_nodes))

    sig_score = sig_score / n_cc

    # if subslice is not enriched for active nodes split in into putative modules. otherwise, report it as a single putative module
    # print(f'{sig_score}<{module_threshold} and {len(G_optimized.nodes)}<30')
    is_enriched_sublice = (len(G_optimized.nodes) < 100) or len(
        G_optimized.nodes) == 0  # sig_score<module_threshold and l

    break_loop = is_enriched_sublice
    best_modularity = -1
    while not break_loop:
        break_loop, best_modularity = split_subslice_into_putative_modules(
            G_optimized, improvement_delta, modularity_score_objective,
            best_modularity)

    G_optimized.remove_nodes_from(list(nx.isolates(G_optimized)))

    cc_optimized = [] if len(G_optimized.nodes) == 0 else [
        G_optimized.subgraph(c) for c in connected_components(G_optimized)
    ]

    return G_optimized, cc_optimized
Example #11
0
def process_graph(graph):
    """
    Process information in graph, returning a table (table data + column names) as a result. Each row of the table
    represents a side-chain, and each column records one property of the side-chain, such as the number of heavy atoms
    it contains, whether it is a hydrogen bond donor or acceptor, ect.
    """
    scaffold_nodes = []
    for node_id, node in graph.nodes.items():
        if 'is_scaffold' in node:
            scaffold_nodes.append(node_id)

    # Remove scaffold
    graph.remove_nodes_from(scaffold_nodes)

    # Initialize data
    graph_info = []

    # Iterate through disconnected subgraphs
    for subgraph in connected_components(graph):
        attached_atom_id = None
        num_heavy_atoms = 0
        is_hbd = False
        is_hba = False

        for node_id in subgraph:
            node = graph.nodes[node_id]
            if attached_atom_id is None and 'anchor' in node:
                attached_atom_id = node['anchor']

            if not is_hba and 'is_hba' in node:
                is_hba = True

            if not is_hbd and 'is_hbd' in node:
                is_hbd = True
            num_heavy_atoms += 1

        if attached_atom_id is None:
            raise ValueError

        is_hbd_and_hba = is_hbd and is_hba
        graph_info.append([attached_atom_id, num_heavy_atoms, int(is_hbd), int(is_hba), int(is_hbd_and_hba)])

    # Convert graph_info to dataframe
    if not graph_info:
        raise NoSubstitutionException()

    graph_info = np.array(graph_info, dtype=np.int32)
    col_names = ['attached_atom_id', 'num_heavy_atoms', 'is_hbd', 'is_hba', 'is_hbd_and_hba']
    col_names = {key: val for val, key in enumerate(col_names)}
    return graph_info, col_names
Example #12
0
def get_graph_diameter(data):
    networkx_graph = to_networkx(data).to_undirected()

    sub_graph_list = [
        networkx_graph.subgraph(c)
        for c in connected_components(networkx_graph)
    ]
    sub_graph_diam = []
    for sub_g in sub_graph_list:
        sub_graph_diam.append(diameter(sub_g))
    data.diameter = max(sub_graph_diam)

    if data.x is None:
        data.x = torch.ones(data.num_nodes, 1)

    return data
    def build_giant_component(self, return_component=False):
        '''
        Stores the giant component in self.giant component. If return_component true returns the nx subgraph

        :param return_component: Bool
        :return: nx.Graph instance
        '''

        from networkx.algorithms.components import connected_components

        giant_component_nodes = max(connected_components(self.graph), key=len)

        self.giant_component = self.graph.subgraph(
            giant_component_nodes).copy()

        if (return_component):
            return self.giant_component
    def post_process(self):
        trackings = self.parents['irit_harmo_tracking'].results['irit_harmo_tracking'].data_object.value

        graph = Graph()

        for t, h in [(track, track.harmo_link(trackings)) for track in trackings]:

            graph.add_node(t)

            if len(h) > 0:

                graph.add_edges_from([(t, o) for o in h])

        res = self.new_result(time_mode='global')
        res.data_object.value = [c2 for c in connected_components(graph) for c2 in Cluster(c).harmo_sub()]
        self.add_result(res)

        return
Example #15
0
def compute_molecule(universe):
    """
    Cluster atoms into molecules and create the :class:`~exatomic.molecule.Molecule`
    table.

    Args:
        universe: Atomic universe

    Returns:
        molecule: Molecule table

    Warning:
        This function modifies the universe's atom (:class:`~exatomic.atom.Atom`)
        table in place!
    """
    nodes = universe.atom.index.values
    bonded = universe.atom_two.loc[universe.atom_two['bond'] == True,
                                   ['atom0', 'atom1']]
    edges = zip(bonded['atom0'].astype(np.int64),
                bonded['atom1'].astype(np.int64))
    g = nx.Graph()
    g.add_nodes_from(nodes)
    g.add_edges_from(edges)
    # generate molecule indices for the atom table
    mapper = {}
    i = 0
    for k, v in g.degree():  # First handle single atom "molecules"
        if v == 0:
            mapper[k] = i
            i += 1
    for seht in connected_components(g):  # Second handle multi atom molecules
        for adx in seht:
            mapper[adx] = i
        i += 1
    universe.atom['molecule'] = universe.atom.index.map(lambda x: mapper[x])
    universe.atom['mass'] = universe.atom['symbol'].map(sym2mass).astype(float)
    grps = universe.atom.groupby('molecule')
    molecule = grps['symbol'].value_counts().unstack().fillna(0).astype(
        np.int64)
    molecule.columns.name = None
    molecule['mass'] = grps['mass'].sum()
    universe.atom['molecule'] = universe.atom['molecule'].astype('category')
    del universe.atom['mass']
    return molecule
Example #16
0
def compute_molecule(universe):
    """
    Cluster atoms into molecules and create the :class:`~exatomic.molecule.Molecule`
    table.

    Args:
        universe: Atomic universe

    Returns:
        molecule: Molecule table

    Warning:
        This function modifies the universe's atom (:class:`~exatomic.atom.Atom`)
        table in place!
    """
    nodes = universe.atom.index.values
    bonded = universe.atom_two.ix[universe.atom_two["bond"] == True, ["atom0", "atom1"]]
    edges = zip(bonded["atom0"].astype(np.int64), bonded["atom1"].astype(np.int64))
    g = nx.Graph()
    g.add_nodes_from(nodes)
    g.add_edges_from(edges)
    # generate molecule indices for the atom table
    mapper = {}
    i = 0
    for k, v in g.degree().items():  # First handle single atom "molecules"
        if v == 0:
            mapper[k] = i
            i += 1
    for seht in connected_components(g):  # Second handle multi atom molecules
        for adx in seht:
            mapper[adx] = i
        i += 1
    universe.atom["molecule"] = universe.atom.index.map(lambda x: mapper[x])
    sym2mass = symbol_to_element_mass()
    universe.atom["mass"] = universe.atom["symbol"].map(sym2mass)
    grps = universe.atom.groupby("molecule")
    molecule = grps["symbol"].value_counts().unstack().fillna(0).astype(np.int64)
    molecule.columns.name = None
    molecule["mass"] = grps["mass"].sum()
    universe.atom["molecule"] = universe.atom["molecule"].astype("category")
    del universe.atom["mass"]
    return molecule
Example #17
0
def get_graph_diameter(data):
    '''
    compute the graph diameter and add the attribute to data object
    :param data: the graph
    :return: the graph representation augmented with diameter attribute
    '''
    networkx_graph = to_networkx(data).to_undirected()

    sub_graph_list = [
        networkx_graph.subgraph(c)
        for c in connected_components(networkx_graph)
    ]
    sub_graph_diam = []
    for sub_g in sub_graph_list:
        sub_graph_diam.append(diameter(sub_g))
    data.diameter = max(sub_graph_diam)

    if data.x is None:
        data.x = torch.ones(data.num_nodes, 1)

    return data
Example #18
0
def chordal_graph_cliques(G):
    """Returns the set of maximal cliques of a chordal graph.

    The algorithm breaks the graph in connected components and performs a
    maximum cardinality search in each component to get the cliques.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    Returns
    -------
    cliques : A set containing the maximal cliques in G.

    Raises
    ------
    NetworkXError
        The algorithm does not support DiGraph, MultiGraph and MultiDiGraph.
        If the input graph is an instance of one of these classes, a
        :exc:`NetworkXError` is raised.
        The algorithm can only be applied to chordal graphs. If the input
        graph is found to be non-chordal, a :exc:`NetworkXError` is raised.

    Examples
    --------
    >>> import networkx as nx
    >>> e= [(1,2),(1,3),(2,3),(2,4),(3,4),(3,5),(3,6),(4,5),(4,6),(5,6),(7,8)]
    >>> G = nx.Graph(e)
    >>> G.add_node(9)
    >>> setlist = nx.chordal_graph_cliques(G)
    """
    if not is_chordal(G):
        raise nx.NetworkXError("Input graph is not chordal.")

    cliques = set()
    for C in (G.subgraph(c).copy() for c in connected_components(G)):
        cliques |= _connected_chordal_graph_cliques(C)

    return cliques
Example #19
0
def resolve_duplicate_clusters(clusters, pairs):
    if not clusters:
        summary.add('Duplicate components', 0)
        summary.add('Clusters truely duplicate', 0)
        return dict(), dict()

    G = nx.Graph()

    G.add_nodes_from(clusters)
    G.add_edges_from(pairs)
    #
    # print('Nodes:', G.number_of_nodes())
    # print('Edges:', G.number_of_edges())

    components_list = []
    if not components.is_connected(G):
        # verbosity('Graph not connected, components = ' + str(components.number_connected_components(G)), args.quiet)
        for component in components.connected_components(G):
            components_list.append(component)
    else:
        components_list.append(clusters)

    summary.add('Duplicate components', components.number_connected_components(G))

    translation_dict = {}
    duplicate_weights = {}
    for component in components_list:
        main_node = min(component)
        component.remove(main_node)
        try:
            duplicate_weights[len(component)] += 1
        except KeyError:
            duplicate_weights[len(component)] = 1

        for node in component:
            translation_dict[node] = main_node
            summary.add('Clusters truely duplicate')

    return translation_dict, duplicate_weights
Example #20
0
    def post_process(self):
        trackings = self.parents['irit_harmo_tracking'].results[
            'irit_harmo_tracking'].data_object.value

        graph = Graph()

        for t, h in [(track, track.harmo_link(trackings))
                     for track in trackings]:

            graph.add_node(t)

            if len(h) > 0:

                graph.add_edges_from([(t, o) for o in h])

        res = self.new_result(time_mode='global')
        res.data_object.value = [
            c2 for c in connected_components(graph)
            for c2 in Cluster(c).harmo_sub()
        ]
        self.add_result(res)

        return
Example #21
0
def report_connectedness(G, save_img_path=None):
    """
        Checks if the graph is connected
        and returns the connected components
        if the graph is disconnected
        
        G (nx.Graph): graph for which the 
                top nodes must be determined.
                
        save_img_path (str): path to save visualisation
                of the components detected.
        
        Returns:
            True: if the given graph is connected.
            
            False, connected_components: if the graph
                is disconnected along with list of sets of
                nodes representing the components.
    """
    # aggregrate connectedness metrics
    is_connected = components.is_connected(G)
    # get the connected components
    connected_components = components.connected_components(G)

    # save the disconnected components visualisation if the path given
    if save_img_path:
        colors = np.linspace(0, 1, len(connected_components))
        com_color_map = dict()
        for idx, com in enumerate(connected_components):
            for node in com:
                com_color_map[node] = colors[idx]

        labels = nx.draw_networkx_labels(G, pos=pos)
        nx.draw(G, pos, node_color=list(com_color_map.values()))
        plt.savefig(save_img_path, format="PNG")

    return (is_connected, connected_components)
Example #22
0
                        for j, pp in enumerate(kept_i2pp):
                            seg_j = segments[pp2i[pp]]
                            if isinstance(seg_j, Nphthong) and len(seg_j) == 2 and kept_dist_mat[i, j] == insert_cost:
                                g.add_edge(i, j)
                    elif isinstance(seg, Nphthong):
                        for j, pp in enumerate(kept_i2pp):
                            seg_j = segments[pp2i[pp]]
                            if isinstance(seg_j, Segment) and seg_j.is_vowel() and kept_dist_mat[i, j] == insert_cost:
                                g.add_edge(i, j)
                            elif isinstance(seg_j, Nphthong) and abs(len(seg) - len(seg_j)) == 1 and kept_dist_mat[i, j] == insert_cost:
                                g.add_edge(i, j)

                query_sound = st.selectbox('Query sound', sorted(kept_i2pp))
                st.write(get_connected_sounds(query_sound, g, kept_dist_mat, kept_i2pp, kept_pp2i))

                cc = list(connected_components(g))
                assert len(cc) == 1

                # Compute average number of connected sounds.
                cnt = dict()
                for i in kept_ids:
                    cnt[i2pp[i]] = len(g.edges(i))
                st.write(f'Average number of connected sounds: {(sum(cnt.values()) / len(kept_ids)):.3f}')

                if should_proceed('about_to_save'):
                    proto_ph_map = dict()
                    for i in kept_ids:
                        ph = i2pp[i]
                        proto_ph_map[ph] = ph

                    lengths = [len(pp) for pp in kept_i2pp]
Example #23
0
def _chordal_graph_cliques(G):
    """Returns all maximal cliques of a chordal graph.

    The algorithm breaks the graph in connected components and performs a
    maximum cardinality search in each component to get the cliques.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    Returns
    -------
    iterator
        An iterator over maximal cliques, each of which is a frozenset of
        nodes in `G`. The order of cliques is arbitrary.

    Raises
    ------
    NetworkXError
        The algorithm does not support DiGraph, MultiGraph and MultiDiGraph.
        If the input graph is an instance of one of these classes, a
        :exc:`NetworkXError` is raised.
        The algorithm can only be applied to chordal graphs. If the input
        graph is found to be non-chordal, a :exc:`NetworkXError` is raised.

    Examples
    --------
    >>> e = [
    ...     (1, 2),
    ...     (1, 3),
    ...     (2, 3),
    ...     (2, 4),
    ...     (3, 4),
    ...     (3, 5),
    ...     (3, 6),
    ...     (4, 5),
    ...     (4, 6),
    ...     (5, 6),
    ...     (7, 8),
    ... ]
    >>> G = nx.Graph(e)
    >>> G.add_node(9)
    >>> cliques = [c for c in _chordal_graph_cliques(G)]
    >>> cliques[0]
    frozenset({1, 2, 3})
    """
    if not is_chordal(G):
        raise nx.NetworkXError("Input graph is not chordal.")

    for C in (G.subgraph(c).copy() for c in connected_components(G)):
        if C.number_of_nodes() == 1:
            yield frozenset(C.nodes())
        else:
            unnumbered = set(C.nodes())
            v = arbitrary_element(C)
            unnumbered.remove(v)
            numbered = {v}
            clique_wanna_be = {v}
            while unnumbered:
                v = _max_cardinality_node(C, unnumbered, numbered)
                unnumbered.remove(v)
                numbered.add(v)
                new_clique_wanna_be = set(C.neighbors(v)) & numbered
                sg = C.subgraph(clique_wanna_be)
                if _is_complete_graph(sg):
                    new_clique_wanna_be.add(v)
                    if not new_clique_wanna_be >= clique_wanna_be:
                        yield frozenset(clique_wanna_be)
                    clique_wanna_be = new_clique_wanna_be
                else:
                    raise nx.NetworkXError("Input graph is not chordal.")
            yield frozenset(clique_wanna_be)
    # Handle cliques
    try:
        signal.signal(signal.SIGALRM, percolate.clique_handler)
        if CCid in ['03ae', '03b0', '03b2', '03b5', '03b7',
                    '0893']:  # Skip histones and other complex OGs
            raise percolate.CliqueError
        signal.alarm(90)
        cliques = list(find_cliques(G))
        signal.alarm(0)
    except percolate.CliqueError:
        print(f'CliqueError: {CCid}')
        for k in ks:
            subOGs = set()
            core = k_core(G, k)
            for component in connected_components(core):
                subOGs.add(
                    frozenset(
                        [frozenset(edge) for edge in core.edges(component)]))
            OGs_ks[k].append(subOGs)
            classify_CC(CCtypes_ks[k], subOGs)
        continue  # Continue to next OG

    # Handle percolation
    for k in ks:
        try:
            signal.signal(signal.SIGALRM, percolate.percolate_handler)
            signal.alarm(90)
            subOGs = list(
                percolate.k_clique_communities_progressive(G, k, cliques))
            signal.alarm(0)
Example #25
0
def partition_reads(tint, maximum_ilp_size):
    reads = tint['reads']
    read_reps = tint['read_reps']
    I = tint['ilp_data']['I']
    FL = tint['ilp_data']['FL']
    tint['partitions'] = list()

    rids = sorted(I.keys())
    unique_data = dict()
    edges = list()
    for i in rids:
        d = (tuple(I[i]), (FL[i][0], FL[i][1], reads[read_reps[i][0]]['poly_tail_category']))
        if d in unique_data:
            unique_data[d].append(i)
        else:
            unique_data[d] = [i]
    unique_data = list(unique_data.items())
    N = len(unique_data)
    for i in range(N):
        for j in range(i+1, N):
            d1, (f1, l1, t1) = unique_data[i][0]
            d2, (f2, l2, t2) = unique_data[j][0]
            f = max(f1, f2)
            l = min(l1, l2)
            o = l-f+1
            d = sum(x != y for x, y in zip(d1[f:l+1], d2[f:l+1]))
            w = sum(x == y == 1 for x, y in zip(d1[f:l+1], d2[f:l+1]))
            if t1 != 'N' and t2 != 'N' and t1 != t2:
                continue
            if w < 1:
                continue
            if (o > 3 and d < 3) or (1 <= o <= 3 and d == 0):
                edges.append((i, j))
    G = Graph()
    G.add_nodes_from(range(N))
    G.add_edges_from(edges)
    while True:
        edges_to_remove = list()
        for i, j in G.edges:
            n1 = set(G.neighbors(i))
            n2 = set(G.neighbors(j))
            if len(n1) == 1 or len(n2) == 1 or len(n1 & n2) > 0:
                continue
            edges_to_remove.append((i, j))
        G.remove_edges_from(edges_to_remove)
        if len(edges_to_remove) == 0:
            break
    for c in components.connected_components(G):
        rids = list()
        incomp = list()
        for c in split_list_evenly(list(c), maximum_ilp_size):
            for idx, i in enumerate(c):
                rids.extend(unique_data[i][1])
                for j in c[idx+1:]:
                    i,j = min(i,j),max(i,j)
                    assert i<j
                    if G.has_edge(i,j):
                        continue
                    for rid_1 in unique_data[i][1]:
                        for rid_2 in unique_data[j][1]:
                            incomp.append((rid_1,rid_2))
            tint['partitions'].append((rids, incomp))
Example #26
0
    def compute_volumes(self, queries=None, evidence=None, cache=True):
        """Computes the unnormalized probabilities of univariate and
        bivariate literals in 'queries' associated to univariate
        literals and a list of uni/bivariate clauses representing the
        'evidence'.

        Returns (Z given evidence, list[volumes of queries given evidence]).

        Raises NotImplementedError if the literals are not uni/bivariate.

        Parameters
        ----------
        queries : list of pysmt.FNode instances (optional)
            Uni/bivariate literals
        evidence : iterable of pysmt.FNode instances (optional)
            Uni/bivariate clauses, default: None
        cache : bool (optional)
            If True, integrals are cached, default: False
        """

        if not nx.is_forest(self.primal.G):
            raise NotImplementedError("MP requires a forest-shaped primal graph")

        if queries is None:
            queries = []
        else:
            queries = [flip_negated_literals_cnf(q) for q in queries]

        if cache is True and self.cache is None:
            self.cache = dict()
            self.cache_hit = [0, 0]

        elif cache is False:
            self.cache = None

        # send around messages, possibly accounting for 'evidence'
        self._compute_marginals(evidence=evidence)

        # compute the partition function as the product of the marginals of any node
        # for each connected component in the primal graph
        components = list(connected_components(self.primal.G))
        Z_components = []
        for comp_vars in components:
            x = list(comp_vars)[0]
            full_marginal = self._get_full_marginal(x)
            comp_Z = self.piecewise_symbolic_integral(full_marginal, x)
            Z_components.append(comp_Z)

        query_volumes = []
        for q in queries:
            q_vars = list(q.get_free_variables())
            if not all([qv.symbol_type() == REAL for qv in q_vars]):
                raise NotImplementedError("Supporting lra queries only")

            x = q_vars[0].symbol_name()
            if len(q_vars) == 1:

                # univariate query
                l, u = domains_to_intervals(q)[0]
                q_msg = [(l, u, 1)]

                # intersecting with the node symbolic marginal
                q_marginal = self._get_msgs_intersection(
                    [self._get_full_marginal(x), q_msg]
                )
                q_vol = self.piecewise_symbolic_integral(q_marginal, x)

                # account for the volume of unconnected variables
                for i, comp_vars in enumerate(components):
                    if x not in comp_vars:
                        q_vol *= Z_components[i]

                query_volumes.append(q_vol)

            elif len(q_vars) == 2:

                # bivariate query
                y = q_vars[1].symbol_name()

                # creates a new message using the query 'q' as evidence
                q_marginal = self._compute_message(x, y, evidence=[q])
                q_marginal = self._get_msgs_intersection([q_marginal] +
                                                          [self.marginals[y][z]
                                                           for z in
                                                           self.marginals[y]
                                                           if z != x])

                y_potentials = self.primal.nodes()[y]['potentials']
                if len(y_potentials) > 0:
                    potential_msgs = self._parse_potentials(
                        y_potentials, self.primal.nodes()[y]['var']
                    )
                    q_marginal = self._get_msgs_intersection(
                        potential_msgs + [q_marginal]
                    )

                q_vol = self.piecewise_symbolic_integral(q_marginal, y)

                # account for the volume of unconnected variables
                for i, comp_vars in enumerate(components):
                    if x not in comp_vars:
                        q_vol *= Z_components[i]

                query_volumes.append(q_vol)

            else:
                raise NotImplementedError(
                    "Queries of ariety > 2 aren't supported")

        Z = 1.0
        for Z_comp in Z_components:
            Z *= Z_comp

        if self.cache_hit is not None:
            # TODO: check if cache_hit index should be True or False
            print("\tHITS: {}/{} (ratio {})".format(self.cache_hit[True],
                                                    sum(self.cache_hit),
                                                    self.cache_hit[True] /
                                                    sum(self.cache_hit)))

        Z = float(Z.as_expr())
        query_volumes = [float(qv.as_expr()) for qv in query_volumes]
        return Z, query_volumes
def compute_undirected_graph_metrics(G):
    assert type(G) is nx.Graph

    # degrees stats
    degrees = np.array([i for _, i in G.degree])
    degrees_k_freq = np.unique(degrees, return_counts=True)[1]
    degrees_corr = numeric_attribute_correlation(G, dict(G.degree),
                                                 dict(G.degree))

    # clustering
    global_clustering = transitivity(G)
    local_clustering_mean = average_clustering(G)

    # fraction of connected node pairs (any path len)
    f_connected_node_pairs = fraction_of_connected_node_pairs(G)

    # centralization
    cent_metrics = centralization_metrics(G, prefix="_ud")

    # modularity
    modularity_metrics = compute_modularity_metrics(G)

    # largest CC
    CC1_nodes = max(connected_components(G), key=len)
    CC1 = G.subgraph(CC1_nodes).copy()
    f_CC1_nodes = len(CC1) / len(G)

    # algebraic_connectivity of the largest CC
    algebraic_connectivity_CC1 = None
    if len(CC1) > 2:
        try:
            algebraic_connectivity_CC1 = algebraic_connectivity(CC1, seed=0)
        except:
            algebraic_connectivity_CC1 = None

    # connected components
    CC = connected_components(G)
    CC_sizes = np.array([len(cc_i) for cc_i in CC])

    CC_metrics = {}
    for k in CC_k_thresholds:
        CC_metrics[f"n_CC_{k}"] = np.sum(CC_sizes >= k)

    # k-core
    k_core_metrics = {}
    G_core_number = core_number(G)

    for k in k_core_ks:
        k_core_subgraph = k_core(G, k=k, core_number=G_core_number)
        k_core_metrics[f"core_{k}_n_nodes"] = len(k_core_subgraph.nodes)
        k_core_metrics[f"core_{k}_n_edges"] = len(k_core_subgraph.edges)
        k_core_metrics[f"core_{k}_density"] = density(k_core_subgraph)
        k_core_metrics[f"core_{k}_n_CC"] = len(
            list(connected_components(k_core_subgraph)))

    # k-truss
    k_truss_metrics = {}

    for k in k_truss_ks:
        k_truss_subgraph = k_truss(G, k=k)
        k_truss_metrics[f"truss_{k}_n_nodes"] = len(k_truss_subgraph.nodes)
        k_truss_metrics[f"truss_{k}_n_edges"] = len(k_truss_subgraph.edges)
        k_truss_metrics[f"truss_{k}_density"] = density(k_truss_subgraph)
        k_truss_metrics[f"truss_{k}_n_CC"] = len(
            list(connected_components(k_truss_subgraph)))

    metrics = {
        "n_edges_ud":
        len(G.edges()),
        "density_ud":
        density(G),
        # degree stats
        "degrees_mean":
        safe(np.mean, degrees),
        "degrees_var":
        safe(np.var, degrees),
        "degrees_hidx":
        safe(h_index, degrees),
        "degrees_gini":
        safe(gini, degrees + eps),
        "degrees_f0":
        safe(np.mean, (degrees == 0)),
        "degrees_corr":
        degrees_corr,
        "degrees_pk_ent":
        entropy(degrees_k_freq),
        "degrees_pk_gini":
        gini(degrees_k_freq),
        # fraction of connected node pairs with path of any length
        "f_connected_node_pairs_ud":
        f_connected_node_pairs,
        # clustering coefficients
        "global_clustering_ud":
        global_clustering,
        "local_clustering_mean_ud":
        local_clustering_mean,
        # centralization
        **cent_metrics,
        # modularity
        **modularity_metrics,
        # fraction of nodes in the largest CC
        "f_CC1_nodes":
        f_CC1_nodes,
        # algebraic connectivity of the largest CC
        "algebraic_connectivity_CC1":
        algebraic_connectivity_CC1,
        # connected components
        **CC_metrics,
        # k-core
        **k_core_metrics,
        # k-truss
        **k_truss_metrics
    }

    return metrics
Example #28
0
    def compute_volumes(self, queries=None, evidence=None, cache=True):
        """Computes the unnormalized probabilities of univariate and
        bivariate literals in 'queries' associated to univariate
        literals and a list of uni/bivariate clauses representing the
        'evidence'.

        Returns (Z given evidence, list[volumes of queries given evidence]).

        Raises NotImplementedError if the literals are not uni/bivariate.

        Parameters
        ----------
        queries : list of pysmt.FNode instances (optional)
            Uni/bivariate literals
        evidence : iterable of pysmt.FNode instances (optional)
            Uni/bivariate clauses, default: None
        cache : bool (optional)
            If True, integrals are cached, default: True
        """

        if not nx.is_forest(self.primal.G):
            raise NotImplementedError(
                "MP requires a forest-shaped primal graph")

        if queries is None:
            queries = []
        else:
            queries = [flip_negated_literals_cnf(q) for q in queries]

        if cache is False:
            self.cache = None
        elif cache is True and self.cache is None:
            self.cache = Manager().dict()
            self.cache_hit = [0, 0]
        else:
            self.cache = Manager().dict(self.cache)  # needed?

        # compute the partition function as the product of the marginals of any node
        # for each connected component in the primal graph
        components = list(connected_components(self.primal.G))
        subproblems = []
        pysmt_env = get_env()
        for comp_vars in components:
            subprimal = PrimalGraph.from_graph(
                self.primal.G.subgraph(comp_vars))
            subvars = {subprimal.nodes()[n]['var'] for n in subprimal.nodes()}

            submarginals = {
                k: v
                for k, v in self.marginals.items() if k in comp_vars
            }
            """
            if self.cache is not None:
                subcache = self.cache
            else:
                subcache = None
            """

            if evidence is None:
                subevidence = None
            else:
                subevidence = [
                    e for e in evidence
                    if set(e.get_free_symbols()).issubset(subvars)
                ]
            subproblems.append(
                (subprimal, submarginals, self.smt_solver, self.cache,
                 self.tolerance, self.rand_gen, pysmt_env, subevidence))

        with Pool(processes=self.n_processes) as pool:
            results = pool.starmap(MP2WMI._compute_marginals, subproblems)

        for submarginals, ch in results:
            self.marginals.update(submarginals)
            if self.cache is not None:
                self.cache_hit[True] += ch[True]
                self.cache_hit[False] += ch[False]

        Z_components = []
        for comp_vars in components:
            x = list(comp_vars)[0]
            full_marginal = MP2WMI._get_full_marginal(self.primal,
                                                      self.marginals,
                                                      self.tolerance, x)
            comp_Z, ch = MP2WMI._piecewise_symbolic_integral(
                self.cache, full_marginal, x)
            if self.cache is not None:
                self.cache_hit[True] += ch[True]
                self.cache_hit[False] += ch[False]

            Z_components.append(comp_Z)

        query_volumes = []
        for q in queries:
            q_vars = list(q.get_free_variables())
            if not all([qv.symbol_type() == REAL for qv in q_vars]):
                raise NotImplementedError("Supporting lra queries only")

            x = q_vars[0].symbol_name()
            if len(q_vars) == 1:

                # univariate query
                l, u = domains_to_intervals(q)[0]
                q_msg = [(l, u, 1)]

                # intersecting with the node symbolic marginal
                q_marginal = MP2WMI._get_msgs_intersection([
                    MP2WMI._get_full_marginal(self.primal, self.marginals,
                                              self.tolerance, x), q_msg
                ], self.tolerance)
                q_vol, ch = MP2WMI._piecewise_symbolic_integral(
                    self.cache, q_marginal, x)
                if self.cache is not None:
                    self.cache_hit[True] += ch[True]
                    self.cache_hit[False] += ch[False]

                # account for the volume of unconnected variables
                for i, comp_vars in enumerate(components):
                    if x not in comp_vars:
                        q_vol *= Z_components[i]

                query_volumes.append(q_vol)

            elif len(q_vars) == 2:

                # bivariate query
                y = q_vars[1].symbol_name()

                # creates a new message using the query 'q' as evidence
                q_marginal, ch = MP2WMI._compute_message(self.primal,
                                                         self.marginals,
                                                         self.smt_solver,
                                                         self.cache,
                                                         self.tolerance,
                                                         x,
                                                         y,
                                                         evidence=[q])

                if self.cache is not None:
                    self.cache_hit[True] += ch[True]
                    self.cache_hit[False] += ch[False]

                marg_not_x = [
                    self.marginals[y][z] for z in self.marginals[y] if z != x
                ]
                q_marginal = MP2WMI._get_msgs_intersection(
                    [q_marginal] + marg_not_x, self.tolerance)

                y_potentials = self.primal.nodes()[y]['potentials']
                if len(y_potentials) > 0:
                    potential_msgs = MP2WMI._parse_potentials(
                        y_potentials,
                        self.primal.nodes()[y]['var'])
                    q_marginal = self._get_msgs_intersection(
                        potential_msgs + [q_marginal], self.tolerance)

                q_vol, ch = MP2WMI._piecewise_symbolic_integral(
                    self.cache, q_marginal, y)

                if self.cache is not None:
                    self.cache_hit[True] += ch[True]
                    self.cache_hit[False] += ch[False]

                # account for the volume of unconnected variables
                for i, comp_vars in enumerate(components):
                    if x not in comp_vars:
                        q_vol *= Z_components[i]

                query_volumes.append(q_vol)

            else:
                raise NotImplementedError(
                    "Queries of ariety > 2 aren't supported")

        Z = 1.0
        for Z_comp in Z_components:
            Z *= Z_comp

        if self.cache is not None:
            # TODO: check if cache_hit index should be True or False
            print("\tHITS: {}/{} (ratio {})".format(
                self.cache_hit[True], sum(self.cache_hit),
                self.cache_hit[True] / sum(self.cache_hit)))

        Z = float(Z.as_expr())
        query_volumes = [float(qv.as_expr()) for qv in query_volumes]
        return Z, query_volumes
    def to_bayesian_model(self):
        """
        Creates a Bayesian Model which is a minimum I-Map for this markov model.

        The ordering of parents may not remain constant. It would depend on the
        ordering of variable in the junction tree (which is not constant) all the
        time. Also, if the model is not connected, the connected components are
        treated as separate models, converted, and then joined together.

        Examples
        --------
        >>> from ProbabilityModel.models import MarkovModel
        >>> from ProbabilityModel.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> bm = mm.to_bayesian_model()
        """
        from ProbabilityModel.models import BayesianModel

        # If the graph is not connected, treat them as separate models and join them together in the end.
        bms = []
        for node_set in connected_components(self):
            bm = BayesianModel()
            var_clique_dict = defaultdict(tuple)
            var_order = []

            subgraph = self.subgraph(node_set)

            # Create a junction tree from the markov model.
            # Creation of clique tree involves triangulation, finding maximal cliques
            # and creating a tree from these cliques
            junction_tree = MarkovModel(subgraph.edges()).to_junction_tree()

            # create an ordering of the nodes based on the ordering of the clique
            # in which it appeared first
            root_node = next(iter(junction_tree.nodes()))
            bfs_edges = nx.bfs_edges(junction_tree, root_node)
            for node in root_node:
                var_clique_dict[node] = root_node
                var_order.append(node)
            for edge in bfs_edges:
                clique_node = edge[1]
                for node in clique_node:
                    if not var_clique_dict[node]:
                        var_clique_dict[node] = clique_node
                        var_order.append(node)

            # create a bayesian model by adding edges from parent of node to node as
            # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}}
            for node_index in range(len(var_order)):
                node = var_order[node_index]
                node_parents = (set(var_clique_dict[node]) -
                                set([node])).intersection(
                                    set(var_order[:node_index]))
                bm.add_edges_from([(parent, node) for parent in node_parents])
                # TODO : Convert factor into CPDs
            bms.append(bm)

        # Join the bms in a single model.
        final_bm = BayesianModel()
        for bm in bms:
            final_bm.add_edges_from(bm.edges())
            final_bm.add_nodes_from(bm.nodes())
        return final_bm