def decompose_pair(graph_component, distance=1): new_subgraphs_list = [] new_signatures_list = [] components_memory = set() for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = pair_decomposition(graph_component.graph, graph_component.subgraphs, distance=distance) new_components = [] for component in components: c = tuple(component) if c not in components_memory: new_components.append(component) components_memory.add(c) if new_components: new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, new_components) new_signature = serialize(['pair', distance], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_paired_neighborhoods(graph_component, radius=None, distance=None, min_radius=0, max_radius=1, min_distance=0, max_distance=0): if radius is not None: min_radius = max_radius = radius if distance is not None: min_distance = max_distance = distance new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = get_pairs(subgraph, min_radius, max_radius, min_distance, max_distance) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, components) if distance == 0: new_signature = serialize(['neighborhood', radius], signature) else: new_signature = serialize( ['paired_neighborhoods', radius, distance], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_node_join(graph_component, min_size=1, max_size=1): if max_size < min_size: max_size = min_size new_subgraphs_list = [] new_signatures_list = [] # for each distinct pair of subgraphs for i, (g, signature_i) in enumerate( zip(graph_component.subgraphs, graph_component.signatures)): g_node_set = set(g.nodes()) for j, (m, signature_j) in enumerate( zip(graph_component.subgraphs, graph_component.signatures)): if j > i: m_node_set = set(m.nodes()) # check if the node set intersection has # size equal or larger than min_size intr_size = len(g_node_set.intersection(m_node_set)) if max_size is not None: condition = intr_size >= min_size and intr_size <= max_size else: condition = intr_size >= min_size if condition: # if so return the union of the nodes component = g_node_set.union(m_node_set) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, [component]) new_signature = serialize( ['node_join', min_size, max_size], signature_i, signature_j) new_subgraphs_list += new_subgraphs new_signatures_list += [new_signature] gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_frequency(graph_component, min_size=1, max_size=None, disjoint=True): """decompose_frequency.""" new_subgraphs_list = [] new_signatures_list = [] signature = '*'.join(sorted(set(graph_component.signatures))) # count the number of components for each node node_counts = dict() for subgraph in graph_component.subgraphs: for node in subgraph.nodes(): if node in node_counts: node_counts[node] += 1 else: node_counts[node] = 1 # a component is a connected component where all nodes # have a count >= min_size and <= max_size new_subgraphs = _frequency_decomposition(graph_component.graph, node_counts, min_size, max_size, disjoint) new_signature = serialize(['frequency', min_size, max_size], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_break(graph_component, min_size=1, max_size=None, n_edges=1): new_subgraphs_list = [] new_signatures_list = [] # for each distinct pair of subgraphs for i, (g, signature_i) in enumerate(zip(graph_component.subgraphs, graph_component.signatures)): components = [] effective_min_size = compute_effective_size(g,min_size) effective_max_size = compute_effective_size(g,max_size) for edges in combinations(g.edges(),n_edges): gp = g.copy() for i,j in edges: gp.remove_edge(i, j) if nx.number_connected_components(gp) >= 2: for component in nx.connected_components(gp): if effective_min_size <= len(component) <= effective_max_size: components.append(tuple(sorted(component))) components = set(components) for component in components: new_subgraphs = get_subgraphs_from_node_components(graph_component.graph, [component]) new_signature = serialize(['nbreak', min_size, n_edges], signature_i) new_subgraphs_list += new_subgraphs new_signatures_list += [new_signature] gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_intersection(graph_component_A, graph_component_B): new_subgraphs_list = [] new_signatures_list = [] for g, signature_A in zip(graph_component_A.subgraphs, graph_component_A.signatures): g_node_set = set(g.nodes()) for m, signature_B in zip(graph_component_B.subgraphs, graph_component_B.signatures): m_node_set = set(m.nodes()) # check the node set intersection intr_size = len(g_node_set.intersection(m_node_set)) if intr_size >= 1 and g_node_set != m_node_set: component = g_node_set.intersection(m_node_set) if len(component) > 0: new_subgraph = get_subgraphs_from_node_components( graph_component_A.graph, [component]) new_subgraphs_list += new_subgraph new_signature = serialize(['intersection'], signature_A, signature_B) new_signatures_list.append(new_signature) gc = GraphComponent(graph=graph_component_A.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_pair_binary(graph_component_first, graph_component_second, distance=1, keep_second_component=True): new_subgraphs_list = [] new_signatures_list = [] near_sets, dist_sets = preprocess_distances(graph_component_first.graph, distance) # for each distinct pair of subgraphs for g, signature_g in zip(graph_component_first.subgraphs, graph_component_first.signatures): g_node_set = set(g.nodes()) g_near_set = set() g_dist_set = set() for u in g_node_set: g_near_set.update(near_sets[u]) g_dist_set.update(dist_sets[u]) g_dist_set = g_dist_set.difference(g_near_set) if len(g_dist_set) > 0: for m, signature_m in zip(graph_component_second.subgraphs, graph_component_second.signatures): m_node_set = set(m.nodes()) # if there is at least one node in m that is at the desired # distance and if there is no node in m that has a smaller # distance then make a component from the union of the nodes # for each node in g consider the set A of all nodes at # distance less than max and the set B of all nodes at distance # equal to max # consider the intersection of A with nodes in m: # this must be empty # consider the intersection of B with nodes in m: # this must be not empty near_intr_size = len(g_near_set.intersection(m_node_set)) dist_intr_size = len(g_dist_set.intersection(m_node_set)) condition = near_intr_size == 0 and dist_intr_size != 0 if condition: if keep_second_component: component = g_node_set.union(m_node_set) else: component = g_node_set new_subgraph = get_subgraphs_from_node_components( graph_component_first.graph, [component])[0] from eden.display import line_serialize_graph #print(line_serialize_graph(new_subgraph)) new_subgraphs_list.append(new_subgraph) new_signature = serialize( ['pair_binary', distance, keep_second_component], signature_g, signature_m) #print(new_signature) new_signatures_list += new_signature gc = GraphComponent(graph=graph_component_first.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose(graph_component, func): subgraphs = get_subgraphs_from_graph_component(graph_component) if not subgraphs: subgraphs = [graph_component.graph] node_components = accumulate(func, subgraphs) gc = GraphComponent(graph=graph_component.graph, node_components=node_components, edge_components=[], signature='decompose') return gc
def decompose_concatenate(*graph_components): new_subgraphs_list = [] new_signatures_list = [] for graph_component in graph_components: for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): new_subgraphs_list.append(subgraph) new_signatures_list.append(signature) gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def combined_decomposition_function(in_graph_component): subgraphs = [] signatures = [] for decomposition_function in decomposition_functions: graph_component = decomposition_function(in_graph_component) subgraphs += graph_component.subgraphs signatures += graph_component.signatures gc = GraphComponent( graph=in_graph_component.graph, subgraphs=subgraphs, signatures=signatures) return gc
def decompose_relabel_max_node_degree(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): labels = [v for v in dict(subgraph.degree()).values()] new_signature = signature + '_max_node_degree_%d' % max(labels) new_subgraphs_list.append(subgraph) new_signatures_list.append(new_signature) gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relabel_node_degree_frequency(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): labels = [v for v in dict(subgraph.degree()).values()] new_signature = signature + '_node_degree_frequency_' + '_'.join(['%s:%s' % (k, v) for k, v in Counter(labels).most_common()]) new_subgraphs_list.append(subgraph) new_signatures_list.append(new_signature) gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relabel_estimator(graph_component, graph_estimator=None): new_subgraphs_list = [] new_signatures_list = [] preds = graph_estimator.predict(graph_component.subgraphs) for subgraph, signature, pred in zip(graph_component.subgraphs, graph_component.signatures, preds): new_signature = signature + '_estimator_%s' % pred new_subgraphs_list.append(subgraph) new_signatures_list.append(new_signature) gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relabel_distinct_node_labels(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): labels = [subgraph.nodes[u]['label'] for u in subgraph.nodes()] new_signature = signature + '_distinct_node_labels_%d' % len(set(labels)) new_subgraphs_list.append(subgraph) new_signatures_list.append(new_signature) gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relabel_node_size(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): node_size_label = '%d' % subgraph.number_of_nodes() new_signature = signature + '_node_size_' + node_size_label new_subgraphs_list.append(subgraph) new_signatures_list.append(new_signature) gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def combined_decomposition_function(in_graph_component): subgraphs = [] signatures = [] for decomposition_function in decomposition_functions: graph_component = decomposition_function(in_graph_component) subgraphs += graph_component.subgraphs signatures += graph_component.signatures new_signature = 'disjunction' + '*'.join(sorted(set(signatures))) new_signatures = [new_signature] * len(subgraphs) gc = GraphComponent( graph=in_graph_component.graph, subgraphs=subgraphs, signatures=new_signatures) return gc
def decompose_connected_component(graph_component): subgraphs = get_subgraphs_from_graph_component(graph_component) if len(subgraphs) == 0: subgraphs = [graph_component.graph] node_components = [] for subgraph in subgraphs: components = nx.connected_components(subgraph) node_components += [set(c) for c in components] gc = GraphComponent(graph=graph_component.graph, node_components=node_components, edge_components=[], signature=serialize(['connected'], graph_component.signature)) return gc
def decompose_abstract_and_non_abstract(graph_component, node_label=None, edge_label=None, isa_label='isa', mode='intersection'): graph = nx.Graph(graph_component.graph) n = len(graph_component.graph) for ii, (subgraph_i, signature_i) in enumerate( zip(graph_component.subgraphs, graph_component.signatures)): i = ii + n graph.add_node(i, label=signature_i, isa=list(subgraph_i.nodes())) for v in subgraph_i.nodes(): graph.add_edge(i, v, label=isa_label, nesting=True) for jj, (subgraph_j, signature_j) in enumerate( zip(graph_component.subgraphs, graph_component.signatures)): if jj > ii: j = jj + n if mode == 'intersection': intersect = set(subgraph_i.nodes()) & set( subgraph_j.nodes()) if intersect: # Not empty graph.add_edge(i, j, label=len(intersect), isa=list(intersect)) if mode == 'edge': subgraph_i_all_edges = _get_edges(graph_component.graph, subgraph_i.nodes()) subgraph_j_all_edges = _get_edges(graph_component.graph, subgraph_j.nodes()) intersect = set(subgraph_i_all_edges) & set( subgraph_j_all_edges) if intersect: # Not empty graph.add_edge(i, j, label=len(intersect), isa=list(intersect)) if node_label: nx.set_node_attributes(graph, node_label, 'label') if edge_label: nx.set_edge_attributes(graph, edge_label, 'label') signature = '_'.join(sorted(set(graph_component.signatures))) new_signature = serialize(['abstract_non_abstract'], signature) gc = GraphComponent(graph=graph, subgraphs=[graph], signatures=[new_signature]) return gc
def decompose_context(graph_component, radius=1): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): context_edges = context_component_decomposition(graph_component.graph, subgraph, radius) new_subgraphs = get_subgraphs_from_edge_components(graph_component.graph, [context_edges]) new_signature = serialize(['context', radius], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_central_and_non_central(graph_component, k_top=2): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = central_and_non_central_decomposition(subgraph, k_top) new_subgraphs = get_subgraphs_from_node_components(graph_component.graph, components) new_signature = serialize(['central_and_non_central', k_top], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_graphlet(graph_component, size=5): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = graphlet_decomposition(subgraph, size=size) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, components) new_signature = serialize(['graphlet', size], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_nodes_relabel_degree(graph_component): assert ( len(graph_component.subgraphs[0]) == len(graph_component.graph) ), 'Node relabeling is allowed only on the original graph, not on components' graph = graph_component.graph.copy() for u in graph.nodes(): graph.nodes[u]['label'] = graph.degree[u] for u, v in graph.edges(): graph.edges[u, v]['label'] = '::' signatures = ['node_relabel_degree'] subgraphs = [graph] gc = GraphComponent(graph=graph, subgraphs=subgraphs, signatures=signatures) return gc
def decompose_dilatate(graph_component, radius=1): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): component = get_component_neighborhood_component( graph_component.graph, subgraph, radius) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, [component]) new_signature = serialize(['dilatate', radius], signature) new_subgraphs_list += new_subgraphs new_signatures_list.append(new_signature) gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_communities(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = greedy_modularity_communities(subgraph) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, components) new_signature = serialize(['communities'], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_cycles_and_non_cycles(graph_component): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = cycle_and_non_cycle_decomposition(subgraph) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, components) new_signature = serialize(['cycles_and_non_cycles'], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relation(relation_graph_component, graph_component, type_of='single'): new_subgraphs_list = [] new_signatures_list = [] for g, signature_C in zip(relation_graph_component.subgraphs, relation_graph_component.signatures): g_node_set = set(g.nodes()) subcomponents = [] new_signatures = [] for m, signature in zip(graph_component.subgraphs, graph_component.signatures): m_node_set = set(m.nodes()) # check the node set intersection intr_size = len(g_node_set.intersection(m_node_set)) if type_of == 'single': condition = (intr_size == 1) elif type_of == 'partial': condition = intr_size >= 1 elif type_of == 'total': condition = intr_size == len(g_node_set) else: condition = False if condition: # if so save the component subcomponents.append(m_node_set) new_signatures.append(signature) if len(subcomponents) >= 2: comp_combs = itertools.combinations(subcomponents, 2) sig_combs = itertools.combinations(new_signatures, 2) for component_pair, signature_pair in zip(comp_combs, sig_combs): component_A, component_B = component_pair signature_A, signature_B = signature_pair component = component_A.union(component_B) new_subgraph = get_subgraphs_from_node_components( relation_graph_component.graph, [component]) new_subgraphs_list += new_subgraph new_signature = serialize(['relation', type_of], signature_C, signature_A, signature_B) new_signatures_list.append(new_signature) gc = GraphComponent(graph=relation_graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_iterated_clique( graph_component, n_iter=1, min_n_iter=0, min_size=2, max_size=None): components = graph_component.node_components components += edge_to_node_components(graph_component.edge_components) nc = iterated_clique_decomposition( graph_component.graph, components=components, n_iter=n_iter, min_n_iter=min_n_iter, min_size=min_size, max_size=max_size) gc = GraphComponent( graph=graph_component.graph, node_components=nc, edge_components=[], signature=serialize(['iterated_clique', n_iter, min_n_iter, min_size, max_size], graph_component.signature)) return gc
def decompose_degree_and_non_degree(graph_component, min_size=2, max_size=None): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = degree_non_degree_decomposition( subgraph, min_size=min_size, max_size=max_size) new_subgraphs = get_subgraphs_from_node_components( graph_component.graph, components) new_signature = serialize( ['degree_and_non_degree', min_size, max_size], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent( graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_relation_binary(relation_graph_component, graph_component_first, graph_component_second, type_of='single', keep_second_component=True): new_subgraphs_list = [] new_signatures_list = [] for g, signature_C in zip(relation_graph_component.subgraphs, relation_graph_component.signatures): g_node_set = set(g.nodes()) subcomponents_first, signatures_first = get_intersecting_subgraphs( g_node_set, graph_component_first, type_of) subcomponents_second, signatures_second = get_intersecting_subgraphs( g_node_set, graph_component_second, type_of) if len(subcomponents_first) >= 1 and len(subcomponents_second) >= 1: component_pairs = itertools.product(subcomponents_first, subcomponents_second) signatures_pairs = itertools.product(signatures_first, signatures_second) for component_pair, signatures_pair in zip(component_pairs, signatures_pairs): component_A, component_B = component_pair signature_A, signature_B = signatures_pair if component_A != component_B: if keep_second_component: component = component_A.union(component_B) else: component = component_A new_subgraph = get_subgraphs_from_node_components( relation_graph_component.graph, [component]) new_subgraphs_list += new_subgraph new_signature = serialize( ['relation_binary', type_of, keep_second_component], signature_C, signature_A, signature_B) new_signatures_list += new_signature gc = GraphComponent(graph=relation_graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc
def decompose_path(graph_component, length=None, min_len=1, max_len=None): new_subgraphs_list = [] new_signatures_list = [] for subgraph, signature in zip(graph_component.subgraphs, graph_component.signatures): components = path_decomposition(subgraph, length=length, min_len=min_len, max_len=max_len) new_subgraphs = get_subgraphs_from_edge_components( graph_component.graph, components) new_signature = serialize(['path', length, min_len, max_len], signature) new_signatures = [new_signature] * len(new_subgraphs) new_subgraphs_list += new_subgraphs new_signatures_list += new_signatures gc = GraphComponent(graph=graph_component.graph, subgraphs=new_subgraphs_list, signatures=new_signatures_list) return gc