Beispiel #1
0
def decompose_paired_neighborhoods(graph_component,
                                   radius=None,
                                   distance=None,
                                   min_radius=0,
                                   max_radius=1,
                                   min_distance=0,
                                   max_distance=0):
    if radius is not None:
        min_radius = max_radius = radius
    if distance is not None:
        min_distance = max_distance = distance
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = get_pairs(subgraph, min_radius, max_radius, min_distance,
                               max_distance)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        if distance == 0:
            new_signature = serialize(['neighborhood', radius], signature)
        else:
            new_signature = serialize(
                ['paired_neighborhoods', radius, distance], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #2
0
def decompose_node_join(graph_component, min_size=1, max_size=1):
    if max_size < min_size:
        max_size = min_size
    new_subgraphs_list = []
    new_signatures_list = []
    # for each distinct pair of subgraphs
    for i, (g, signature_i) in enumerate(
            zip(graph_component.subgraphs, graph_component.signatures)):
        g_node_set = set(g.nodes())
        for j, (m, signature_j) in enumerate(
                zip(graph_component.subgraphs, graph_component.signatures)):
            if j > i:
                m_node_set = set(m.nodes())
                # check if the node set intersection has
                # size equal or larger than min_size
                intr_size = len(g_node_set.intersection(m_node_set))
                if max_size is not None:
                    condition = intr_size >= min_size and intr_size <= max_size
                else:
                    condition = intr_size >= min_size
                if condition:
                    # if so return the union of the nodes
                    component = g_node_set.union(m_node_set)
                    new_subgraphs = get_subgraphs_from_node_components(
                        graph_component.graph, [component])
                    new_signature = serialize(
                        ['node_join', min_size, max_size], signature_i,
                        signature_j)
                    new_subgraphs_list += new_subgraphs
                    new_signatures_list += [new_signature]

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #3
0
def decompose_break(graph_component, min_size=1, max_size=None, n_edges=1):
    new_subgraphs_list = []
    new_signatures_list = []
    # for each distinct pair of subgraphs
    for i, (g, signature_i) in enumerate(zip(graph_component.subgraphs, graph_component.signatures)):
        components = []
        effective_min_size = compute_effective_size(g,min_size)
        effective_max_size = compute_effective_size(g,max_size)
        for edges in combinations(g.edges(),n_edges):    
            gp = g.copy()
            for i,j in edges:
                gp.remove_edge(i, j)
            if nx.number_connected_components(gp) >= 2:
                for component in nx.connected_components(gp):
                    if effective_min_size <= len(component) <= effective_max_size: 
                        components.append(tuple(sorted(component)))
        components = set(components)
        for component in components:
            new_subgraphs = get_subgraphs_from_node_components(graph_component.graph, [component])
            new_signature = serialize(['nbreak', min_size, n_edges], signature_i)
            new_subgraphs_list += new_subgraphs
            new_signatures_list += [new_signature]

    gc = GraphComponent(
        graph=graph_component.graph,
        subgraphs=new_subgraphs_list,
        signatures=new_signatures_list)
    return gc
Beispiel #4
0
def decompose_frequency(graph_component,
                        min_size=1,
                        max_size=None,
                        disjoint=True):
    """decompose_frequency."""
    new_subgraphs_list = []
    new_signatures_list = []

    signature = '*'.join(sorted(set(graph_component.signatures)))
    # count the number of components for each node
    node_counts = dict()
    for subgraph in graph_component.subgraphs:
        for node in subgraph.nodes():
            if node in node_counts:
                node_counts[node] += 1
            else:
                node_counts[node] = 1
    # a component is a connected component where all nodes
    # have a count >= min_size and <= max_size
    new_subgraphs = _frequency_decomposition(graph_component.graph,
                                             node_counts, min_size, max_size,
                                             disjoint)
    new_signature = serialize(['frequency', min_size, max_size], signature)
    new_signatures = [new_signature] * len(new_subgraphs)
    new_subgraphs_list += new_subgraphs
    new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #5
0
def decompose_intersection(graph_component_A, graph_component_B):
    new_subgraphs_list = []
    new_signatures_list = []

    for g, signature_A in zip(graph_component_A.subgraphs,
                              graph_component_A.signatures):
        g_node_set = set(g.nodes())
        for m, signature_B in zip(graph_component_B.subgraphs,
                                  graph_component_B.signatures):
            m_node_set = set(m.nodes())
            # check the node set intersection
            intr_size = len(g_node_set.intersection(m_node_set))
            if intr_size >= 1 and g_node_set != m_node_set:
                component = g_node_set.intersection(m_node_set)
                if len(component) > 0:
                    new_subgraph = get_subgraphs_from_node_components(
                        graph_component_A.graph, [component])
                    new_subgraphs_list += new_subgraph
                    new_signature = serialize(['intersection'], signature_A,
                                              signature_B)
                    new_signatures_list.append(new_signature)
    gc = GraphComponent(graph=graph_component_A.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #6
0
def decompose_pair(graph_component, distance=1):
    new_subgraphs_list = []
    new_signatures_list = []
    components_memory = set()
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = pair_decomposition(graph_component.graph,
                                        graph_component.subgraphs,
                                        distance=distance)
        new_components = []
        for component in components:
            c = tuple(component)
            if c not in components_memory:
                new_components.append(component)
                components_memory.add(c)
        if new_components:
            new_subgraphs = get_subgraphs_from_node_components(
                graph_component.graph, new_components)
            new_signature = serialize(['pair', distance], signature)
            new_signatures = [new_signature] * len(new_subgraphs)
            new_subgraphs_list += new_subgraphs
            new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #7
0
def decompose_pair_binary(graph_component_first,
                          graph_component_second,
                          distance=1,
                          keep_second_component=True):
    new_subgraphs_list = []
    new_signatures_list = []

    near_sets, dist_sets = preprocess_distances(graph_component_first.graph,
                                                distance)

    # for each distinct pair of subgraphs
    for g, signature_g in zip(graph_component_first.subgraphs,
                              graph_component_first.signatures):
        g_node_set = set(g.nodes())
        g_near_set = set()
        g_dist_set = set()
        for u in g_node_set:
            g_near_set.update(near_sets[u])
            g_dist_set.update(dist_sets[u])
        g_dist_set = g_dist_set.difference(g_near_set)

        if len(g_dist_set) > 0:
            for m, signature_m in zip(graph_component_second.subgraphs,
                                      graph_component_second.signatures):
                m_node_set = set(m.nodes())

                # if there is at least one node in m that is at the desired
                # distance and if there is no node in m that has a smaller
                # distance then make a component from the union of the nodes
                # for each node in g consider the set A of all nodes at
                # distance less than max and the set B of all nodes at distance
                # equal to max
                # consider the intersection of A with nodes in m:
                # this must be empty
                # consider the intersection of B with nodes in m:
                # this must be not empty
                near_intr_size = len(g_near_set.intersection(m_node_set))
                dist_intr_size = len(g_dist_set.intersection(m_node_set))
                condition = near_intr_size == 0 and dist_intr_size != 0
                if condition:
                    if keep_second_component:
                        component = g_node_set.union(m_node_set)
                    else:
                        component = g_node_set
                    new_subgraph = get_subgraphs_from_node_components(
                        graph_component_first.graph, [component])[0]
                    from eden.display import line_serialize_graph
                    #print(line_serialize_graph(new_subgraph))
                    new_subgraphs_list.append(new_subgraph)
                    new_signature = serialize(
                        ['pair_binary', distance, keep_second_component],
                        signature_g, signature_m)
                    #print(new_signature)
                    new_signatures_list += new_signature

    gc = GraphComponent(graph=graph_component_first.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #8
0
def decompose_connected_component(graph_component):
    subgraphs = get_subgraphs_from_graph_component(graph_component)
    if len(subgraphs) == 0:
        subgraphs = [graph_component.graph]
    node_components = []
    for subgraph in subgraphs:
        components = nx.connected_components(subgraph)
        node_components += [set(c) for c in components]
    gc = GraphComponent(graph=graph_component.graph,
                        node_components=node_components,
                        edge_components=[],
                        signature=serialize(['connected'],
                                            graph_component.signature))
    return gc
Beispiel #9
0
def decompose_abstract_and_non_abstract(graph_component,
                                        node_label=None,
                                        edge_label=None,
                                        isa_label='isa',
                                        mode='intersection'):
    graph = nx.Graph(graph_component.graph)
    n = len(graph_component.graph)
    for ii, (subgraph_i, signature_i) in enumerate(
            zip(graph_component.subgraphs, graph_component.signatures)):
        i = ii + n
        graph.add_node(i, label=signature_i, isa=list(subgraph_i.nodes()))
        for v in subgraph_i.nodes():
            graph.add_edge(i, v, label=isa_label, nesting=True)
        for jj, (subgraph_j, signature_j) in enumerate(
                zip(graph_component.subgraphs, graph_component.signatures)):
            if jj > ii:
                j = jj + n
                if mode == 'intersection':
                    intersect = set(subgraph_i.nodes()) & set(
                        subgraph_j.nodes())
                    if intersect:  # Not empty
                        graph.add_edge(i,
                                       j,
                                       label=len(intersect),
                                       isa=list(intersect))
                if mode == 'edge':
                    subgraph_i_all_edges = _get_edges(graph_component.graph,
                                                      subgraph_i.nodes())
                    subgraph_j_all_edges = _get_edges(graph_component.graph,
                                                      subgraph_j.nodes())
                    intersect = set(subgraph_i_all_edges) & set(
                        subgraph_j_all_edges)
                    if intersect:  # Not empty
                        graph.add_edge(i,
                                       j,
                                       label=len(intersect),
                                       isa=list(intersect))

    if node_label:
        nx.set_node_attributes(graph, node_label, 'label')
    if edge_label:
        nx.set_edge_attributes(graph, edge_label, 'label')
    signature = '_'.join(sorted(set(graph_component.signatures)))
    new_signature = serialize(['abstract_non_abstract'], signature)

    gc = GraphComponent(graph=graph,
                        subgraphs=[graph],
                        signatures=[new_signature])
    return gc
Beispiel #10
0
def decompose_discriminative(graph_component,
                             min_size=2,
                             max_size=8,
                             node_scoring_func=None,
                             threshold=0):
    func = discriminative_decomposition(min_size=min_size,
                                        max_size=max_size,
                                        node_scoring_func=node_scoring_func,
                                        threshold=threshold)
    gc = decompose(graph_component, func)
    gc.signature = serialize([
        'discriminative', min_size, max_size, node_scoring_func.__name__,
        threshold
    ], graph_component.signature)
    return gc
Beispiel #11
0
def decompose_central_and_non_central(graph_component, k_top=2):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures):
        components = central_and_non_central_decomposition(subgraph, k_top)
        new_subgraphs = get_subgraphs_from_node_components(graph_component.graph, components)
        new_signature = serialize(['central_and_non_central', k_top], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(
        graph=graph_component.graph,
        subgraphs=new_subgraphs_list,
        signatures=new_signatures_list)
    return gc
Beispiel #12
0
def decompose_context(graph_component, radius=1):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures):
        context_edges = context_component_decomposition(graph_component.graph, subgraph, radius)
        new_subgraphs = get_subgraphs_from_edge_components(graph_component.graph, [context_edges])
        new_signature = serialize(['context', radius], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(
        graph=graph_component.graph,
        subgraphs=new_subgraphs_list,
        signatures=new_signatures_list)
    return gc
Beispiel #13
0
def decompose_communities(graph_component):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = greedy_modularity_communities(subgraph)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(['communities'], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #14
0
def decompose_dilatate(graph_component, radius=1):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        component = get_component_neighborhood_component(
            graph_component.graph, subgraph, radius)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, [component])
        new_signature = serialize(['dilatate', radius], signature)
        new_subgraphs_list += new_subgraphs
        new_signatures_list.append(new_signature)

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #15
0
def decompose_graphlet(graph_component, size=5):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = graphlet_decomposition(subgraph, size=size)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(['graphlet', size], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #16
0
def decompose_cycles_and_non_cycles(graph_component):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = cycle_and_non_cycle_decomposition(subgraph)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(['cycles_and_non_cycles'], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #17
0
def decompose_relation(relation_graph_component,
                       graph_component,
                       type_of='single'):
    new_subgraphs_list = []
    new_signatures_list = []

    for g, signature_C in zip(relation_graph_component.subgraphs,
                              relation_graph_component.signatures):
        g_node_set = set(g.nodes())
        subcomponents = []
        new_signatures = []
        for m, signature in zip(graph_component.subgraphs,
                                graph_component.signatures):
            m_node_set = set(m.nodes())
            # check the node set intersection
            intr_size = len(g_node_set.intersection(m_node_set))
            if type_of == 'single':
                condition = (intr_size == 1)
            elif type_of == 'partial':
                condition = intr_size >= 1
            elif type_of == 'total':
                condition = intr_size == len(g_node_set)
            else:
                condition = False
            if condition:
                # if so save the component
                subcomponents.append(m_node_set)
                new_signatures.append(signature)
        if len(subcomponents) >= 2:
            comp_combs = itertools.combinations(subcomponents, 2)
            sig_combs = itertools.combinations(new_signatures, 2)
            for component_pair, signature_pair in zip(comp_combs, sig_combs):
                component_A, component_B = component_pair
                signature_A, signature_B = signature_pair
                component = component_A.union(component_B)
                new_subgraph = get_subgraphs_from_node_components(
                    relation_graph_component.graph, [component])
                new_subgraphs_list += new_subgraph
                new_signature = serialize(['relation', type_of], signature_C,
                                          signature_A, signature_B)
                new_signatures_list.append(new_signature)
    gc = GraphComponent(graph=relation_graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #18
0
def decompose_iterated_clique(
        graph_component, n_iter=1, min_n_iter=0, min_size=2, max_size=None):
    components = graph_component.node_components
    components += edge_to_node_components(graph_component.edge_components)
    nc = iterated_clique_decomposition(
        graph_component.graph,
        components=components,
        n_iter=n_iter,
        min_n_iter=min_n_iter,
        min_size=min_size,
        max_size=max_size)
    gc = GraphComponent(
        graph=graph_component.graph,
        node_components=nc,
        edge_components=[],
        signature=serialize(['iterated_clique', n_iter, min_n_iter, min_size, max_size],
                            graph_component.signature))
    return gc
Beispiel #19
0
def decompose_degree_and_non_degree(graph_component, min_size=2, max_size=None):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures):
        components = degree_non_degree_decomposition(
            subgraph, min_size=min_size, max_size=max_size)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(
            ['degree_and_non_degree', min_size, max_size], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(
        graph=graph_component.graph,
        subgraphs=new_subgraphs_list,
        signatures=new_signatures_list)
    return gc
Beispiel #20
0
def decompose_relation_binary(relation_graph_component,
                              graph_component_first,
                              graph_component_second,
                              type_of='single',
                              keep_second_component=True):
    new_subgraphs_list = []
    new_signatures_list = []

    for g, signature_C in zip(relation_graph_component.subgraphs,
                              relation_graph_component.signatures):
        g_node_set = set(g.nodes())
        subcomponents_first, signatures_first = get_intersecting_subgraphs(
            g_node_set, graph_component_first, type_of)
        subcomponents_second, signatures_second = get_intersecting_subgraphs(
            g_node_set, graph_component_second, type_of)
        if len(subcomponents_first) >= 1 and len(subcomponents_second) >= 1:
            component_pairs = itertools.product(subcomponents_first,
                                                subcomponents_second)
            signatures_pairs = itertools.product(signatures_first,
                                                 signatures_second)
            for component_pair, signatures_pair in zip(component_pairs,
                                                       signatures_pairs):
                component_A, component_B = component_pair
                signature_A, signature_B = signatures_pair
                if component_A != component_B:
                    if keep_second_component:
                        component = component_A.union(component_B)
                    else:
                        component = component_A
                    new_subgraph = get_subgraphs_from_node_components(
                        relation_graph_component.graph, [component])
                    new_subgraphs_list += new_subgraph
                    new_signature = serialize(
                        ['relation_binary', type_of, keep_second_component],
                        signature_C, signature_A, signature_B)
                    new_signatures_list += new_signature

    gc = GraphComponent(graph=relation_graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #21
0
def decompose_path(graph_component, length=None, min_len=1, max_len=None):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = path_decomposition(subgraph,
                                        length=length,
                                        min_len=min_len,
                                        max_len=max_len)
        new_subgraphs = get_subgraphs_from_edge_components(
            graph_component.graph, components)
        new_signature = serialize(['path', length, min_len, max_len],
                                  signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #22
0
def decompose_partition_frequency(graph_component,
                                  step_size=1,
                                  num_intervals=None,
                                  disjoint=True):
    """decompose_partition_frequency."""
    new_subgraphs_list = []
    new_signatures_list = []

    signature = '*'.join(sorted(set(graph_component.signatures)))
    # count the number of components for each node
    node_counts = dict()
    for subgraph in graph_component.subgraphs:
        for node in subgraph.nodes():
            if node in node_counts:
                node_counts[node] += 1
            else:
                node_counts[node] = 1
    # compute range of count values
    count_values = [value for value in node_counts.values()]
    min_count_value, max_count_value = min(count_values), max(count_values)
    if disjoint:
        max_count_value = max_count_value + 1
    if num_intervals is not None:
        step_size = (max_count_value - min_count_value) // num_intervals
    # a component is a connected component where all nodes
    # have a count included within step_size
    for min_size in range(min_count_value, max_count_value, step_size):
        max_size = min_size + step_size
        new_subgraphs = _frequency_decomposition(graph_component.graph,
                                                 node_counts, min_size,
                                                 max_size, disjoint)
        new_signature = serialize(['frequency', min_size, max_size], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures
    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
def decompose_negative(graph_component,
                       ktop=0,
                       part_importance_estimator=None):
    new_subgraphs_list = []
    new_signatures_list = []
    for subgraph, signature in zip(graph_component.subgraphs,
                                   graph_component.signatures):
        components = negative_decomposition(
            subgraph,
            ktop=ktop,
            part_importance_estimator=part_importance_estimator)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(['negative', ktop], signature)
        new_signatures = [new_signature] * len(new_subgraphs)
        new_subgraphs_list += new_subgraphs
        new_signatures_list += new_signatures

    gc = GraphComponent(graph=graph_component.graph,
                        subgraphs=new_subgraphs_list,
                        signatures=new_signatures_list)
    return gc
Beispiel #24
0
def decompose_all_union(graph_component):
    subgraphs = graph_component.subgraphs
    # Warning: as it is hard to keep track of which components are
    # finally united, we simply mangle all signatures into one
    # as the sorted union of all signatures
    new_signature = '_'.join(sorted(set(graph_component.signatures)))
    new_subgraphs = []
    new_signatures = []
    if len(subgraphs) > 0:
        g = subgraphs[0]
        for subgraph in subgraphs[1:]:
            g = nx.compose(g, subgraph)
        components = nx.connected_components(g)
        new_subgraphs = get_subgraphs_from_node_components(
            graph_component.graph, components)
        new_signature = serialize(['union'], new_signature)
        new_signatures = [new_signature] * len(new_subgraphs)

    gc = GraphComponent(
        graph=graph_component.graph,
        subgraphs=new_subgraphs,
        signatures=new_signatures)
    return gc