def enhance_base_cip(base_cip, abstract_cip, mergeids, base_graph, hash_bitmask, mod_dict, core_hash): # we cheated a little with the core, so we need to undo our cheating whatever = base_cip.graph.copy() base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() + mergeids).copy() for n in mergeids: base_cip.graph.node[n]['core'] = True for n, d in base_cip.graph.nodes(data=True): if 'core' not in d: d['interface'] = True d['distance_dependent_label'] = whatever.node[n][ 'distance_dependent_label'] base_cip.core_hash = core_hash # merging cip info with the abstract graph base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash, abstract_cip.interface_hash, get_mods(mod_dict, mergeids), 0, hash_bitmask) base_cip.core_nodes_count = abstract_cip.core_nodes_count base_cip.radius = abstract_cip.radius base_cip.abstract_thickness = abstract_cip.thickness # i want to see what they look like :) base_cip.abstract_view = abstract_cip.graph base_cip.distance_dict = abstract_cip.distance_dict return base_cip
def _compute_vertex_based_features(self, seq): if seq is None or len(seq) == 0: raise Exception("ERROR: something went wrong, empty instance.") # extract kmer hash codes for all kmers up to r in all positions in seq feature_dict = {} seq_len = len(seq) neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] for pos in range(seq_len): # construct features as pairs of kmers up to distance d for all radii up to r feature_list = defaultdict(lambda: defaultdict(float)) for radius in range(self.min_r, self.r + 1): if radius < len(neighborhood_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): if pos + distance + radius < seq_len: feature_code = fast_hash_4( neighborhood_hash_cache[pos][radius], radius, distance, neighborhood_hash_cache[pos + distance][radius], self.bitmask, ) key = fast_hash_2(radius, distance, self.bitmask) feature_list[key][feature_code] += 1 feature_dict.update(self._normalization(feature_list, pos)) data_matrix = self._convert_dict_to_sparse_matrix(feature_dict) return data_matrix
def _transform(self, instance_id, seq): if seq is None or len(seq) == 0: raise Exception("ERROR: something went wrong, empty instance # %d." % instance_id) if len(seq) == 2 and len(seq[1]) > 0: # assume the instance is a pair (header,seq) and extract only seq seq = seq[1] # extract kmer hash codes for all kmers up to r in all positions in seq seq_len = len(seq) neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] # construct features as pairs of kmers up to distance d for all radii up to r feature_list = defaultdict(lambda: defaultdict(float)) for pos in range(seq_len): for radius in range(self.min_r, self.r + 1): if radius < len(neighborhood_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): second_endpoint = pos + distance if second_endpoint + radius < seq_len: feature_code = fast_hash_4( neighborhood_hash_cache[pos][radius], radius, distance, neighborhood_hash_cache[second_endpoint][radius], self.bitmask, ) key = fast_hash_2(radius, distance, self.bitmask) feature_list[key][feature_code] += 1 return self._normalization(feature_list, instance_id)
def _transform_vertex_pair_base(self, graph, vertex_v, vertex_u, distance, feature_list, connection_weight=1): # for all radii for radius in range(self.min_r, self.r + 2, 2): for label_index in range(graph.graph['label_size']): if radius < len(graph.node[vertex_v]['neighborhood_graph_hash'][label_index]) and \ radius < len(graph.node[vertex_u]['neighborhood_graph_hash'][label_index]): # feature as a pair of neighbourhoods at a radius,distance # canonicazation of pair of neighborhoods vertex_v_hash = graph.node[vertex_v][ 'neighborhood_graph_hash'][label_index][radius] vertex_u_hash = graph.node[vertex_u][ 'neighborhood_graph_hash'][label_index][radius] if vertex_v_hash < vertex_u_hash: first_hash, second_hash = (vertex_v_hash, vertex_u_hash) else: first_hash, second_hash = (vertex_u_hash, vertex_v_hash) feature = fast_hash_4(first_hash, second_hash, radius, distance, self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) # if self.weighted == False : if graph.graph.get('weighted', False) is False: feature_list[key][feature] += 1 else: feature_list[key][feature] += connection_weight * \ (graph.node[vertex_v]['neighborhood_graph_weight'][radius] + graph.node[vertex_u]['neighborhood_graph_weight'][radius])
def _transform_distance(self, feature_list=None, pos=None, radius=None, seq_len=None, neigh_hash_cache=None, neighborhood_weight_cache=None): distances = list(range(self.min_d, self.d + 1)) distances += list(range(-self.d, -self.min_d)) for distance in distances: end = pos + distance # Note: after having computed pos, we now treat # distance as the positive value only distance = abs(distance) cond1 = self.weights_dict is None if cond1 or self.weights_dict.get((radius, distance), 0) != 0: if end >= 0 and end + radius < seq_len: pfeat = neigh_hash_cache[pos][radius] efeat = neigh_hash_cache[end][radius] feature_code = fast_hash_4(pfeat, efeat, radius, distance, self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) if neighborhood_weight_cache: pw = neighborhood_weight_cache[pos][radius] feature_list[key][feature_code] += pw ew = neighborhood_weight_cache[end][radius] feature_list[key][feature_code] += ew else: feature_list[key][feature_code] += 1
def _transform(self, instance_id, seq): if seq is None or len(seq) == 0: raise Exception('ERROR: something went wrong, empty instance # %d.' % instance_id) if len(seq) == 2 and len(seq[1]) > 0: # assume the instance is a pair (header,seq) and extract only seq seq = seq[1] # extract kmer hash codes for all kmers up to r in all positions in seq seq_len = len(seq) neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] # construct features as pairs of kmers up to distance d for all radii up to r feature_list = defaultdict(lambda: defaultdict(float)) for pos in range(seq_len): for radius in range(self.min_r, self.r + 1): if radius < len(neighborhood_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): second_endpoint = pos + distance if second_endpoint + radius < seq_len: feature_code = fast_hash_4(neighborhood_hash_cache[pos][radius], radius, distance, neighborhood_hash_cache[second_endpoint][radius], self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) feature_list[key][feature_code] += 1 return self._normalization(feature_list, instance_id)
def _transform_distance(self, feature_list=None, pos=None, radius=None, seq_len=None, neigh_hash_cache=None, neighborhood_weight_cache=None): distances = list(range(self.min_d, self.d + 1)) distances += list(range(-self.d, -self.min_d)) for distance in distances: end = pos + distance # Note: after having computed pos, we now treat # distance as the positive value only distance = abs(distance) cond1 = self.weights_dict is None if cond1 or self.weights_dict.get((radius, distance), 0) != 0: if end >= 0 and end + radius < seq_len: if self.use_only_context: pfeat = 42 else: pfeat = neigh_hash_cache[pos][radius] efeat = neigh_hash_cache[end][radius] feature_code = fast_hash_4(pfeat, efeat, radius, distance, self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) if neighborhood_weight_cache: pw = neighborhood_weight_cache[pos][radius] feature_list[key][feature_code] += pw ew = neighborhood_weight_cache[end][radius] feature_list[key][feature_code] += ew else: feature_list[key][feature_code] += 1
def enhance_base_cip(base_cip, abstract_cip, mergeids, base_graph, hash_bitmask, mod_dict, core_hash): ''' Parameters ---------- base_cip: cip a cip that was extracted from the base graph abstract_cip: cip a cip that was extracted from the abstract graph mergeids: list of int nodes in the base cip that are in the core of the abstract cip base_graph: graph the base graph hash_bitmask: int n/c mod_dict: dict {id in base_graph: modification to interface hash} if there is an exceptionaly important nodetype in thebase graph it makes sure that every substitution will preserve this nodetype Oo used eg to mark the beginning/end of rna sequences. endnode can only be replaced by endnode :) core_hash: hash for the core that will be used in the finished CIP Returns ------- a finished? CIP ''' # we cheated a little with the core, so we need to undo our cheating whatever = base_cip.graph.copy() base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() + mergeids).copy() for n in mergeids: base_cip.graph.node[n]['core'] = True for n, d in base_cip.graph.nodes(data=True): if 'core' not in d: d['interface'] = True d['distance_dependent_label'] = whatever.node[n]['distance_dependent_label'] base_cip.core_hash = core_hash # merging cip info with the abstract graph base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash, abstract_cip.interface_hash, get_mods(mod_dict, mergeids), 0, hash_bitmask) base_cip.core_nodes_count = abstract_cip.core_nodes_count base_cip.radius = abstract_cip.radius base_cip.abstract_thickness = abstract_cip.thickness # i want to see what they look like :) base_cip.abstract_view = abstract_cip.graph base_cip.distance_dict = abstract_cip.distance_dict return base_cip
def _transform_vertex_pair_valid(self, graph, vertex_v, vertex_u, radius, distance, feature_list, connection_weight=1): cw = connection_weight # we need to revert to r/2 and d/2 radius_dist_key = (radius / 2, distance / 2) # reweight using external weight dictionary len_v = len(graph.nodes[vertex_v]['neigh_graph_hash']) len_u = len(graph.nodes[vertex_u]['neigh_graph_hash']) if radius < len_v and radius < len_u: # feature as a pair of neighborhoods at a radius,distance # canonicalization of pair of neighborhoods vertex_v_labels = graph.nodes[vertex_v]['neigh_graph_hash'] vertex_v_hash = vertex_v_labels[radius] vertex_u_labels = graph.nodes[vertex_u]['neigh_graph_hash'] vertex_u_hash = vertex_u_labels[radius] if vertex_v_hash < vertex_u_hash: first_hash, second_hash = (vertex_v_hash, vertex_u_hash) else: first_hash, second_hash = (vertex_u_hash, vertex_v_hash) feature = fast_hash_4(first_hash, second_hash, radius, distance, self.bitmask) # half features are those that ignore the central vertex v # the reason to have those is to help model the context # independently from the identity of the vertex itself half_feature = fast_hash_3(vertex_u_hash, radius, distance, self.bitmask) if graph.graph.get('weighted', False) is False: if self.use_only_context is False: feature_list[radius_dist_key][feature] += cw feature_list[radius_dist_key][half_feature] += cw else: weight_v = graph.nodes[vertex_v]['neigh_graph_weight'] weight_u = graph.nodes[vertex_u]['neigh_graph_weight'] weight_vu_radius = weight_v[radius] + weight_u[radius] val = cw * weight_vu_radius # Note: add a feature only if the value is not 0 if val != 0: if self.use_only_context is False: feature_list[radius_dist_key][feature] += val half_val = cw * weight_u[radius] feature_list[radius_dist_key][half_feature] += half_val
def _transform_vertex_pair_valid(self, graph, vertex_v, vertex_u, radius, distance, feature_list, connection_weight=1): cw = connection_weight # we need to revert to r/2 and d/2 radius_dist_key = (radius / 2, distance / 2) # reweight using external weight dictionary len_v = len(graph.nodes[vertex_v]['neigh_graph_hash']) len_u = len(graph.nodes[vertex_u]['neigh_graph_hash']) if radius < len_v and radius < len_u: # feature as a pair of neighborhoods at a radius,distance # canonicalization of pair of neighborhoods vertex_v_labels = graph.nodes[vertex_v]['neigh_graph_hash'] vertex_v_hash = vertex_v_labels[radius] vertex_u_labels = graph.nodes[vertex_u]['neigh_graph_hash'] vertex_u_hash = vertex_u_labels[radius] if vertex_v_hash < vertex_u_hash: first_hash, second_hash = (vertex_v_hash, vertex_u_hash) else: first_hash, second_hash = (vertex_u_hash, vertex_v_hash) feature = fast_hash_4( first_hash, second_hash, radius, distance, self.bitmask) # half features are those that ignore the central vertex v # the reason to have those is to help model the context # independently from the identity of the vertex itself half_feature = fast_hash_3(vertex_u_hash, radius, distance, self.bitmask) if graph.graph.get('weighted', False) is False: feature_list[radius_dist_key][feature] += cw feature_list[radius_dist_key][half_feature] += cw else: weight_v = graph.nodes[vertex_v]['neigh_graph_weight'] weight_u = graph.nodes[vertex_u]['neigh_graph_weight'] weight_vu_radius = weight_v[radius] + weight_u[radius] val = cw * weight_vu_radius # Note: add a feature only if the value is not 0 if val != 0: feature_list[radius_dist_key][feature] += val half_val = cw * weight_u[radius] feature_list[radius_dist_key][half_feature] += half_val
def _transform_vertex_pair(self, graph, vertex_v, vertex_u, distance, feature_list, connection_weight=1): # for all radii for radius in range(self.min_r, self.r + 2, 2): for label_index in range(graph.graph['label_size']): if radius < len(graph.node[vertex_v] ['neigh_graph_hash'][label_index]) and \ radius < len(graph.node[vertex_u]['neigh_graph_hash'][label_index]): # feature as a pair of neighborhoods at a radius,distance # canonicalization of pair of neighborhoods vertex_v_hash = graph.node[vertex_v]['neigh_graph_hash'][ label_index][radius] vertex_u_hash = graph.node[vertex_u]['neigh_graph_hash'][ label_index][radius] if vertex_v_hash < vertex_u_hash: first_hash, second_hash = (vertex_v_hash, vertex_u_hash) else: first_hash, second_hash = (vertex_u_hash, vertex_v_hash) feature = fast_hash_4(first_hash, second_hash, radius, distance, self.bitmask) # half features are those that ignore the central vertex v # the reason to have those is to help model the context # independently from the identity of the vertex itself half_feature = fast_hash_3(vertex_u_hash, radius, distance, self.bitmask) key = fast_hash_2(radius, distance) if graph.graph.get('weighted', False) is False: feature_list[key][feature] += 1 feature_list[key][half_feature] += 1 else: val = connection_weight * \ (graph.node[vertex_v]['neigh_graph_weight'][radius] + graph.node[vertex_u]['neigh_graph_weight'][radius]) feature_list[key][feature] += val half_val = \ connection_weight * \ graph.node[vertex_u]['neigh_graph_weight'][radius] feature_list[key][half_feature] += half_val
def _transform_vertex_pair(self, graph, vertex_v, vertex_u, distance, feature_list, connection_weight=1): # for all radii for radius in range(self.min_r, self.r + 2, 2): for label_index in range(graph.graph['label_size']): if radius < len(graph.node[vertex_v] ['neigh_graph_hash'][label_index]) and \ radius < len(graph.node[vertex_u]['neigh_graph_hash'][label_index]): # feature as a pair of neighborhoods at a radius,distance # canonicalization of pair of neighborhoods vertex_v_hash = graph.node[vertex_v]['neigh_graph_hash'][label_index][radius] vertex_u_hash = graph.node[vertex_u]['neigh_graph_hash'][label_index][radius] if vertex_v_hash < vertex_u_hash: first_hash, second_hash = (vertex_v_hash, vertex_u_hash) else: first_hash, second_hash = (vertex_u_hash, vertex_v_hash) feature = fast_hash_4(first_hash, second_hash, radius, distance, self.bitmask) # half features are those that ignore the central vertex v # the reason to have those is to help model the context # independently from the identity of the vertex itself half_feature = fast_hash_3(vertex_u_hash, radius, distance, self.bitmask) key = fast_hash_2(radius, distance) if graph.graph.get('weighted', False) is False: feature_list[key][feature] += 1 feature_list[key][half_feature] += 1 else: val = connection_weight * \ (graph.node[vertex_v]['neigh_graph_weight'][radius] + graph.node[vertex_u]['neigh_graph_weight'][radius]) feature_list[key][feature] += val half_val = \ connection_weight * \ graph.node[vertex_u]['neigh_graph_weight'][radius] feature_list[key][half_feature] += half_val
def _compute_vertex_based_features(self, seq, weights=None): if seq is None or len(seq) == 0: raise Exception('ERROR: something went wrong, empty instance.') # extract kmer hash codes for all kmers up to r in all positions in seq vertex_based_features = [] seq_len = len(seq) if weights: if len(weights) != seq_len: raise Exception('ERROR: sequence and weights \ must be same length.') neighborhood_weight_cache = \ [self._compute_neighborhood_weight(weights, pos) for pos in range(seq_len)] neigh_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] for pos in range(seq_len): # construct features as pairs of kmers up to distance d # for all radii up to r local_features = defaultdict(lambda: defaultdict(float)) for radius in range(self.min_r, self.r + 1): if radius < len(neigh_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): end = pos + distance if end + radius < seq_len: feature_code = \ fast_hash_4(neigh_hash_cache[pos][radius], neigh_hash_cache[end][radius], radius, distance, self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) if weights: local_features[key][feature_code] += \ neighborhood_weight_cache[pos][radius] local_features[key][feature_code] += \ neighborhood_weight_cache[end][radius] else: local_features[key][feature_code] += 1 vertex_based_features.append(self._normalization(local_features, inner_normalization=False, normalization=self.normalization)) data_matrix = self._convert_dict_to_sparse_matrix(vertex_based_features) return data_matrix
def _transform(self, orig_seq): seq, weights = self._get_sequence_and_weights(orig_seq) # extract kmer hash codes for all kmers up to r in all positions in seq seq_len = len(seq) neigh_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] if weights: if len(weights) != seq_len: raise Exception('ERROR: sequence and weights \ must be same length.') neighborhood_weight_cache = \ [self._compute_neighborhood_weight(weights, pos) for pos in range(seq_len)] # construct features as pairs of kmers up to distance d # for all radii up to r feature_list = defaultdict(lambda: defaultdict(float)) for pos in range(seq_len): for radius in range(self.min_r, self.r + 1): if radius < len(neigh_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): end = pos + distance if end + radius < seq_len: feature_code = \ fast_hash_4(neigh_hash_cache[pos][radius], neigh_hash_cache[end][radius], radius, distance, self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) if weights: feature_list[key][feature_code] += \ neighborhood_weight_cache[pos][radius] feature_list[key][feature_code] += \ neighborhood_weight_cache[end][radius] else: feature_list[key][feature_code] += 1 return self._normalization(feature_list, inner_normalization=self.inner_normalization, normalization=self.normalization)
def _compute_vertex_based_features(self, seq): if seq is None or len(seq) == 0: raise Exception('ERROR: something went wrong, empty instance.') # extract kmer hash codes for all kmers up to r in all positions in seq feature_dict = {} seq_len = len(seq) neighborhood_hash_cache = [self._compute_neighborhood_hash(seq, pos) for pos in range(seq_len)] for pos in range(seq_len): # construct features as pairs of kmers up to distance d for all radii up to r feature_list = defaultdict(lambda: defaultdict(float)) for radius in range(self.min_r, self.r + 1): if radius < len(neighborhood_hash_cache[pos]): for distance in range(self.min_d, self.d + 1): if pos + distance + radius < seq_len: feature_code = fast_hash_4(neighborhood_hash_cache[pos][radius], radius, distance, neighborhood_hash_cache[pos + distance][radius], self.bitmask) key = fast_hash_2(radius, distance, self.bitmask) feature_list[key][feature_code] += 1 feature_dict.update(self._normalization(feature_list, pos)) data_matrix = self._convert_dict_to_sparse_matrix(feature_dict) return data_matrix
def extract_cips_base(node, graphmanager, base_thickness_list=None, hash_bitmask=None, mod_dict={}, radius_list=[], thickness_list=None, node_filter=lambda x, y: True): ''' Parameters ---------- node: int id of a node graphmanager: graph-wrapper the wrapper that contains the graph base_thickness_list: [int] thickness of SOMETHING hash_bitmask: int see above mod_dict: dict see above **argz: dict more args I guess these are meant: radius_list=None, thickness_list=None, node_filter=lambda x, y: True): Returns ------- [CIP] a list of core_interface_pairs ''' # if not filter(abstract_graph, node): # return [] # PREPARE abstract_graph = graphmanager.abstract_graph() base_graph = graphmanager.base_graph() if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]: edengraphtools._label_preprocessing(abstract_graph) if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]: edengraphtools._label_preprocessing(base_graph) # LOOK UP ABSTRACT GRAPHS NODE AND # EXTRACT CIPS NORMALY ON ABSTRACT GRAPH for n, d in abstract_graph.nodes(data=True): if node in d['contracted']: abs_node = n break else: raise Exception("IMPOSSIBLE NODE") abstract_cips = graphtools.extract_core_and_interface(root_node=abs_node, graph=abstract_graph, radius_list=[0], thickness_list=thickness_list, hash_bitmask=hash_bitmask, node_filter=node_filter) # VOR EVERY ABSTRACT CIP: EXTRACT BASE CIP cips = [] for abstract_cip in abstract_cips: base_level_cips = graphtools.extract_core_and_interface(node, base_graph, radius_list=radius_list, thickness_list=base_thickness_list, hash_bitmask=hash_bitmask) # VOR EVERY BASE CIP: hash interfaces and save the abstract view for base_cip in base_level_cips: cores = [n for n, d in base_cip.graph.nodes(data=True) if 'interface' not in d] base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash, abstract_cip.interface_hash, get_mods(mod_dict, cores), 1337, hash_bitmask) base_cip.abstract_view = abstract_cip.graph cips.append(base_cip) return cips
def extract_cips_base(node, graphmanager, base_thickness_list=None, hash_bitmask=None, mod_dict={}, **argz): ''' :param node: node in the BASE graph :: :return: a list of cips ''' # if not filter(abstract_graph, node): # return [] #PREPARE abstract_graph = graphmanager.abstract_graph() base_graph = graphmanager.base_graph() vectorizer = graphmanager.vectorizer if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]: vectorizer._label_preprocessing(abstract_graph) if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]: vectorizer._label_preprocessing(base_graph) # LOOK UP ABSTRACT GRAPHS NODE AND # EXTRACT CIPS NORMALY ON ABSTRACT GRAPH for n, d in abstract_graph.nodes(data=True): if node in d['contracted']: abs_node = n break else: raise Exception("IMPOSSIBLE NODE") radiuslist_backup = argz['radius_list'] argz['radius_list'] = [0] abstract_cips = graphtools.extract_core_and_interface( abs_node, abstract_graph, vectorizer=vectorizer, hash_bitmask=hash_bitmask, **argz) # VOR EVERY ABSTRACT CIP: EXTRACT BASE CIP cips = [] argz['radius_list'] = radiuslist_backup for abstract_cip in abstract_cips: argz['thickness_list'] = base_thickness_list base_level_cips = graphtools.extract_core_and_interface( node, base_graph, vectorizer=vectorizer, hash_bitmask=hash_bitmask, **argz) # VOR EVERY BASE CIP: hash interfaces and save the abstract view for base_cip in base_level_cips: cores = [ n for n, d in base_cip.graph.nodes(data=True) if 'interface' not in d ] base_cip.interface_hash = eden.fast_hash_4( base_cip.interface_hash, abstract_cip.interface_hash, get_mods(mod_dict, cores), 1337, hash_bitmask) base_cip.abstract_view = abstract_cip.graph cips.append(base_cip) return cips
def extract_cips(node, abstract_graph, base_graph, abstract_radius_list=None, abstract_thickness_list=None, base_thickness_list=None, vectorizer=None, hash_bitmask=None, mod_dict={}, **argz): ''' :param node: node in the abstract graph :param abstract_graph: the abstract graph expanded :param base_graph: the underlying real graph :param abstract_radius: radius in abstract graph :param abstract_thickness: thickness in abstr :param base_thickness: thickness for the base graph :return: a list of cips ''' # if not filter(abstract_graph, node): # return [] if 'hlabel' not in abstract_graph.node[abstract_graph.nodes()[0]]: vectorizer._label_preprocessing(abstract_graph) if 'hlabel' not in base_graph.node[base_graph.nodes()[0]]: vectorizer._label_preprocessing(base_graph) # on the abstract graph we use the normal extract cip stuff: abstract_cips = graphtools.extract_core_and_interface(node, abstract_graph, radius_list=abstract_radius_list, thickness_list=abstract_thickness_list, vectorizer=vectorizer, hash_bitmask=hash_bitmask, **argz) cips = [] for acip in abstract_cips: # now we need to calculate the real cips: # the trick is to also use the normal extractor, but in order to do that we need # to collapse the 'core' # MERGE THE CORE OF THE ABSTRACT GRAPH IN THE BASE GRAPH mergeids = [base_graph_id for radius in range( acip.radius + 1) for abstract_node_id in acip.distance_dict.get(radius) for base_graph_id in abstract_graph.node[abstract_node_id]['contracted']] base_copy = base_graph.copy() for node in mergeids[1:]: graphtools.merge(base_copy, mergeids[0], node) # do cip extraction and calculate the real core hash base_level_cips = graphtools.extract_core_and_interface(mergeids[0], base_copy, radius_list=[0], thickness_list=base_thickness_list, vectorizer=vectorizer, hash_bitmask=hash_bitmask, **argz) core_hash = graphtools.graph_hash(base_graph.subgraph(mergeids), hash_bitmask=hash_bitmask) # now we have a bunch of base_level_cips and need to attach info from the abstract cip. for base_cip in base_level_cips: # we cheated a little with the core, so we need to undo our cheating whatever=base_cip.graph.copy() base_cip.graph = base_graph.subgraph(base_cip.graph.nodes() + mergeids).copy() for n in mergeids: base_cip.graph.node[n]['core'] = True for n,d in base_cip.graph.nodes(data=True): if 'core' not in d: d['interface']=True d['distance_dependent_label'] = whatever.node[n]['distance_dependent_label'] base_cip.core_hash = core_hash # merging cip info with the abstract graph base_cip.interface_hash = eden.fast_hash_4(base_cip.interface_hash, acip.interface_hash, get_mods(mod_dict,mergeids),0, hash_bitmask) base_cip.core_nodes_count = acip.core_nodes_count base_cip.radius = acip.radius base_cip.abstract_thickness = acip.thickness # i want to see what they look like :) base_cip.abstract_view=acip.graph cips.append(base_cip) return cips