def transform(self, graph_list): """ TODO """ n = len(graph_list) #number of graphs phi = { } #dictionary representing the phi vector for each graph. phi[r][c]=v each row is a graph. each column is a feature for i in range(n): #for each graph r = 1 total_feats = {} while (r < self.k + 1): current_feats = {(i, k): v for (k, v) in self.getFeaturesApproximated( graph_list[i], r, self.h).items()} for (key, value) in current_feats.iteritems(): if total_feats.get(key) == None: total_feats[key] = value else: total_feats[key] += value r += 1 phi.update(total_feats) ve = convert_to_sparse_matrix(phi) if self.normalization: ve = pp.normalize(ve, norm='l2', axis=1) return ve
def __transform_serial(self, G_list, approximated=True, keepdictionary=False): """ Private method that converts a networkx graph list into a instance-features matrix @type G_list: networkx graph generator @param G_list: list of the graph to convert @type approximated: boolean @param approximated: true if use a hash function with probable collisions during feature decomposition. False no collision guaranteed @type keepdictionary: boolean @param keepdictionary: True if the instance-feature matrix is kept as a dictionary. Else is a csr_matrix @rtype: scipy.sparse.csr_matrix @return: the instance-features matrix """ feature_dict = {} MapEncToId = None if not keepdictionary: MapEncToId = self.UniqueMap() for instance_id, G in enumerate(G_list): feature_dict.update( self.__transform(instance_id, G, approximated, MapEncToId)) # if keepdictionary: # return (convert_to_sparse_matrix( feature_dict, MapEncToId ),feature_dict) # else: # return convert_to_sparse_matrix( feature_dict, MapEncToId ) ve = convert_to_sparse_matrix(feature_dict) if self.normalization: ve = normalize(ve, norm='l2', axis=1) return ve
def transform_serial_explicit(self,G_list,approximated=True): list_dict={} for instance_id, G in enumerate(G_list): if self.show: drawGraph(G) list_dict.update(self.__transform_explicit(instance_id, G, approximated)) return convert_to_sparse_matrix(list_dict)
def transform(self, graph_list): """ TODO """ n = len(graph_list) #number of graphs # list of the orthogonalized phi: phis[i] is the phi of the i-th iteration of the WL test. phis = [] for i in range(self.h + 1): phis.append({}) NodeIdToLabelId = [ 0 ] * n # NodeIdToLabelId[i][j] is labelid of node j in graph i label_lookup = {} #map from features to corresponding id label_counter = 0 #incremental value for label ids for i in range(n): #for each graph NodeIdToLabelId[i] = {} for j in graph_list[i].nodes(): #for each node if not label_lookup.has_key( graph_list[i].node[j]['label'] ): #update label_lookup and label ids from first iteration that consider node's labels label_lookup[graph_list[i].node[j] ['label']] = label_counter NodeIdToLabelId[i][j] = label_counter label_counter += 1 else: NodeIdToLabelId[i][j] = label_lookup[graph_list[i].node[j] ['label']] if self.__version == 0: #consider old FS features feature = self.__fsfeatsymbol + str( label_lookup[graph_list[i].node[j]['label']]) if not phis[0].has_key((i, feature)): phis[0][(i, feature)] = 0.0 phis[0][(i, feature)] += 1.0 ### MAIN LOOP it = 0 NewNodeIdToLabelId = copy.deepcopy( NodeIdToLabelId) #labels id of nex iteration while it <= self.h: #each iteration compute the next labellings (that are contexts of the previous) label_lookup = {} for i in range(n): #for each graph for j in graph_list[i].nodes( ): #for each node, consider its neighbourhood neighbors = [] for u in graph_list[i].neighbors(j): neighbors.append(NodeIdToLabelId[i][u]) neighbors.sort() #sorting neighbours long_label_string = str( NodeIdToLabelId[i] [j]) + self.__startsymbol #compute new labels id for u in neighbors: long_label_string += str(u) + self.__conjsymbol long_label_string = long_label_string[:-1] + self.__endsymbol if not label_lookup.has_key(long_label_string): label_lookup[long_label_string] = label_counter NewNodeIdToLabelId[i][j] = label_counter label_counter += 1 else: NewNodeIdToLabelId[i][j] = label_lookup[ long_label_string] if self.__version == 0 and it < self.h: #consider FS features feature = self.__fsfeatsymbol + str( NewNodeIdToLabelId[i][j]) if not phis[it].has_key((i, feature)): phis[it][(i, feature)] = 0.0 phis[it][(i, feature)] += 1.0 #adding feature with contexts if it < self.h: feature = str( NodeIdToLabelId[i] [j]) + self.__contextsymbol + str( NewNodeIdToLabelId[i][j]) #with context else: feature = str(NodeIdToLabelId[i][j]) #null context if not phis[it].has_key((i, feature)): phis[it][(i, feature)] = 0.0 phis[it][(i, feature)] += 1.0 NodeIdToLabelId = copy.deepcopy( NewNodeIdToLabelId) #update current labels id it = it + 1 ves = [convert_to_sparse_matrix(phi) for phi in phis] if self.normalization: ves = [pp.normalize(ve, norm='l2', axis=1) for ve in ves] return ves
def transform(self, graph_list): """ TODO """ n = len(graph_list) #number of graphs phi = { } #dictionary representing the phi vector for each graph. phi[r][c]=v each row is a graph. each column is a feature NodeIdToLabelId = [ 0 ] * n # NodeIdToLabelId[i][j] is labelid of node j in graph i label_lookup = {} #map from features to corresponding id label_counter = 0 #incremental value for label ids for i in range(n): #for each graph NodeIdToLabelId[i] = {} for j in graph_list[i].nodes(): #for each node if not label_lookup.has_key( graph_list[i].node[j]['label'] ): #update label_lookup and label ids from first iteration that consider node's labels label_lookup[graph_list[i].node[j] ['label']] = label_counter NodeIdToLabelId[i][j] = label_counter label_counter += 1 else: NodeIdToLabelId[i][j] = label_lookup[graph_list[i].node[j] ['label']] feature = self.__fsfeatsymbol + str( label_lookup[graph_list[i].node[j]['label']]) if not phi.has_key((i, feature)): phi[(i, feature)] = 0.0 phi[(i, feature)] += 1.0 ### MAIN LOOP it = 0 NewNodeIdToLabelId = copy.deepcopy( NodeIdToLabelId) #labels id of nex iteration while it < self.h: #each iteration compute the next labellings (that are contexts of the previous) label_lookup = {} for i in range(n): #for each graph for j in graph_list[i].nodes( ): #for each node, consider its neighbourhood neighbors = [] for u in graph_list[i].neighbors(j): neighbors.append(NodeIdToLabelId[i][u]) neighbors.sort() #sorting neighbours long_label_string = str( NodeIdToLabelId[i] [j]) + self.__startsymbol #compute new labels id for u in neighbors: long_label_string += str(u) + self.__conjsymbol long_label_string = long_label_string[:-1] + self.__endsymbol if not label_lookup.has_key(long_label_string): label_lookup[long_label_string] = label_counter NewNodeIdToLabelId[i][j] = label_counter label_counter += 1 else: NewNodeIdToLabelId[i][j] = label_lookup[ long_label_string] feature = self.__fsfeatsymbol + str( NewNodeIdToLabelId[i][j]) if not phi.has_key((i, feature)): phi[(i, feature)] = 0.0 phi[(i, feature)] += 1.0 NodeIdToLabelId = copy.deepcopy( NewNodeIdToLabelId) #update current labels id it = it + 1 ve = convert_to_sparse_matrix(phi) if self.normalization: ve = pp.normalize(ve, norm='l2', axis=1) return ve