def normalize_graph(self, subgraph, vertex): "U set of vertices. Return le receptive field du vertex (un graph normalisé)" ranked_subgraph_by_labeling_procedure = self.labeling_procedure( subgraph)['labeled_graph'] original_order_to_respect = nx.get_node_attributes( ranked_subgraph_by_labeling_procedure, 'labeling') subgraph_U = self.compute_subgraph_ranking( subgraph, vertex, original_order_to_respect ) #ordonne les noeuds w.r.t labeling procedure if len(subgraph_U.nodes()) > self.k: d = dict(nx.get_node_attributes(subgraph_U, 'labeling')) k_first_nodes = sorted(d, key=d.get)[0:self.k] subgraph_N = subgraph_U.subgraph(k_first_nodes) ranked_subgraph_by_labeling_procedure = self.labeling_procedure( subgraph)['labeled_graph'] original_order_to_respect = nx.get_node_attributes( ranked_subgraph_by_labeling_procedure, 'labeling') subgraph_ranked_N = self.compute_subgraph_ranking( subgraph_N, vertex, original_order_to_respect) elif len(subgraph_U.nodes()) < self.k: subgraph_ranked_N = self.add_dummy_nodes_at_the_end(subgraph_U) else: subgraph_ranked_N = subgraph_U return self.canonicalizes(subgraph_ranked_N)
def make_(self): "Result on one (w,k,length_attri) list (usually (w,k,1)) for 1D CNN " forcnn = [] self.all_subgraph = [] f = self.select_node_sequence() for graph in f: frelabel = nx.relabel_nodes( graph, nx.get_node_attributes( graph, 'labeling')) #rename the nodes wrt the labeling self.all_subgraph.append(frelabel) if self.one_hot > 0: forcnn.append([ utils.indices_to_one_hot(x[1], self.one_hot) for x in sorted(nx.get_node_attributes( frelabel, 'attr_name').items(), key=lambda x: x[0]) ]) else: forcnn.append([ x[1] for x in sorted(nx.get_node_attributes( frelabel, 'attr_name').items(), key=lambda x: x[0]) ]) if np.array(forcnn).shape[0] != self.w or np.array( forcnn).shape[1] != self.k: raise BadShapeError( 'Shapes do not match : {0} instead of {1}'.format( np.array(forcnn).shape, (self.w, self.k))) return forcnn
def measure_MI(g): att = nx.get_node_attributes(g, 'attr') fraction = 0 for k, v in att.items(): if (v == 0): activeNeighbors = [nbr for nbr in g.neighbors(k) if g.node[nbr]['attr']>0.5] tNeighbors = [n for n in g.neighbors(k)] totalNeighbors = len(tNeighbors) totalActive = len(activeNeighbors) if ( totalActive >= (totalNeighbors / 2) ): fraction = fraction + 1 print "Total Number of Neighbors: ", totalNeighbors print "Fraction: ", fraction r = nx.degree_assortativity_coefficient(g) print "Attribute Assortativity Coeff: " print(nx.attribute_assortativity_coefficient(g,'attr')) r = nx.degree_pearson_correlation_coefficient(g) print "Degree Pearson Correlation Coefficient: " print("%3.1f"%r)
def canonicalizes(self, subgraph): st = time.time() #wl_subgraph_normalized=self.wl_normalization(subgraph)['labeled_graph'] #g_relabel=convert_node_labels_to_integers(wl_subgraph_normalized) g_relabel = convert_node_labels_to_integers(subgraph) labeled_graph = nx.Graph(g_relabel) nauty_graph = Graph(len(g_relabel.nodes()), directed=False) nauty_graph.set_adjacency_dict( {n: list(nbrdict) for n, nbrdict in g_relabel.adjacency()}) labels_dict = nx.get_node_attributes(g_relabel, 'labeling') canonical_labeling_dict = { k: canonical_labeling(nauty_graph)[k] for k in range(len(g_relabel.nodes())) } new_ordered_dict = self.rank_label_wrt_dict(g_relabel, labels_dict, canonical_labeling_dict) nx.set_node_attributes(labeled_graph, new_ordered_dict, 'labeling') ed = time.time() self.all_times['canonicalizes'].append(ed - st) return labeled_graph
def all_its_precessor_is_not(node,circuit): level = nx.get_node_attributes(circuit,"level") # print node, "has predecessors: " for pre in circuit.predecessors(node): # print pre, " with level = ", level[pre] if level[pre] == -1: # print "node: ", node, " has predecessor: ", pre, " is not initialized" return False return True
def add_dummy_nodes_at_the_end(self,nx_graph): #why 0 ?? self.exists_dummies=True g=nx.Graph(nx_graph) keys=[k for k,v in dict(nx_graph.nodes()).items()] labels=[v for k,v in dict(nx.get_node_attributes(nx_graph,'labeling')).items()] j=1 while len(g.nodes())<self.k: g.add_node(max(keys)+j,attr_name=self.dummy_value,labeling=max(labels)+j) j+=1 return g
def create_all_rfs(self): """ Method that transforms the graph attribute of the PCSN object into suitable input for CNN :return: (width, rf_size, attr_dim) input for CNN """ input_to_cnn = list() # select node sequence returns full list of receptive fields created receptive_fields = self.node_sequence_selection() for field in receptive_fields: relabeled_nodes = nx.relabel_nodes(field, nx.get_node_attributes(field, 'labeling')) input_to_cnn.append( [x[1] for x in sorted(nx.get_node_attributes(relabeled_nodes, 'attr_name').items(), key=lambda x: x[0])]) return input_to_cnn
def receptive_field_normalization(self, graph: nx.graph, vertex: int): """ Method that normalizes a provenance graph :param graph: provenance graph to be normalized :param vertex: starting vertex in the neighbourhood assembly, from which to compute the ranking function :return: normalized (and canonicalized) provenance graph """ ranked_graph = self.labeling_function(graph)['labeled_graph'] original_order = nx.get_node_attributes(ranked_graph, 'labeling') graph_subset = self.compute_graph_ranking(graph, vertex, original_order) if len(graph_subset.nodes()) > self.rf_size: d = dict(nx.get_node_attributes(graph_subset, 'labeling')) k_first_nodes = sorted(d, key=d.get)[0:self.rf_size] full_graph = graph_subset.subgraph(k_first_nodes) ranked_graph_by_labeling_procedure = self.labeling_function(graph)['labeled_graph'] original_order = nx.get_node_attributes(ranked_graph_by_labeling_procedure, 'labeling') full_ranked_graph = self.compute_graph_ranking(full_graph, vertex, original_order) elif len(graph_subset.nodes()) < self.rf_size: full_ranked_graph = self.receptive_field_padding(graph_subset) else: full_ranked_graph = graph_subset return self.nauty_graph_automorphism(full_ranked_graph)
def compute_subgraph_ranking(self,subgraph,vertex,original_order_to_respect): st=time.time() labeled_graph=nx.Graph(subgraph) ordered_subgraph_from_centrality=self.labeling_to_root(subgraph,vertex) all_labels_in_subgraph_dict=nx.get_node_attributes(ordered_subgraph_from_centrality,'labeling') new_ordered_dict=self.rank_label_wrt_dict(ordered_subgraph_from_centrality,all_labels_in_subgraph_dict,original_order_to_respect) nx.set_node_attributes(labeled_graph,new_ordered_dict,'labeling') ed=time.time() self.all_times['compute_subgraph_ranking'].append(ed-st) return labeled_graph
def compute_graph_ranking(graph: nx.Graph, vertex: int, original_node_order: dict): """ Method that relabels a graph w.r.t. nodes distances to given root :param graph: subgraph to rank :param vertex: landmark vertex for the ranking :param original_node_order: original ranking :return: graph labeled by the new ranking """ labeled_graph = nx.Graph(graph) ordered_graph = compute_ranking_distance(graph, vertex) labels = nx.get_node_attributes(ordered_graph, 'labeling') new_order = relabel_graph(graph=ordered_graph, original_labeling=labels, new_labeling=original_node_order) nx.set_node_attributes(labeled_graph, new_order, 'labeling') return labeled_graph
def receptive_field_padding(self, normalized_graph): """ Method that ensures uniformity across receptive fields when width or rf_size are too big :param normalized_graph: rf_transformed graph to which we add dummy nodes :return: uniformized graph """ graph = nx.Graph(normalized_graph) keys = [key for key, v in dict(normalized_graph.nodes()).items()] labels = [value for key, value in dict(nx.get_node_attributes(normalized_graph, 'labeling')).items()] # add extra dummy nodes as long as rf_size is not reached ######################################################### counter = 1 while len(graph.nodes()) < self.rf_size: graph.add_node(max(keys) + counter, attr_name=self.dummy_value, labeling=max(labels) + counter) counter += 1 ######################################################### return graph
def nauty_graph_automorphism(graph: nx.Graph): """ Graph canonicalization funtion, meant to break timebreakers of the non-injective ranking function :param graph: subgraph to be canonicalized :return: canonicalized subgraph """ # convert labels to integers to give nauty the node partitions required graph_int_labeled = convert_node_labels_to_integers(graph) canonicalized_graph = nx.Graph(graph_int_labeled) # get canonicalized graph using nauty nauty = Graph(len(graph_int_labeled.nodes()), directed=False) nauty.set_adjacency_dict({node: list(nbr) for node, nbr in graph_int_labeled.adjacency()}) labels_dict = nx.get_node_attributes(graph_int_labeled, 'labeling') canonical_labeling_order = {k: canonical_labeling(nauty)[k] for k in range(len(graph_int_labeled.nodes()))} canonical_order = relabel_graph(graph_int_labeled, labels_dict, canonical_labeling_order) nx.set_node_attributes(canonicalized_graph, canonical_order, 'labeling') return canonicalized_graph
def activate_nodes(fileN, p, beginRange, endRange, outFileName): # READ GRAPH FROM FILE g = nx.read_edgelist(fileN) #g = nx.read_weighted_edgelist(fileN) # SORT BY DEGREE sorted(g.degree, key=lambda x: x[1], reverse=True) # SET ATTRIBUTE 0 TO ALL NODES attr = 0 nx.set_node_attributes(g, attr, 'attr') att = nx.get_node_attributes(g, 'attr') #print att numNodes = 0 for k, v in att.items(): numNodes = numNodes + 1 #print("Key : {0}, Value : {1}".format(k, v)) #numNodes = number_of_nodes(g) if (p == 0.3): # RANGE ADJUST a = 100 - beginRange b = 100 - endRange beginAtt = (a * numNodes) / 100 endAtt = (b * numNodes) / 100 # FINAL POSITIONS OF RANGE ON DICTIONARY beginAtt = numNodes - beginAtt endAtt = numNodes - endAtt while (beginAtt <= endAtt): att[att.keys()[beginAtt]] = 1 beginAtt = beginAtt + 1 #for k, v in att.items(): #print("Key : {0}, Value : {1}".format(k, v)) if (p == 0.2): # RANGE ADJUST a = 100 - beginRange b = 100 - (endRange - 10) beginAtt = (a * numNodes) / 100 endAtt = (b * numNodes) / 100 # FINAL POSITIONS OF RANGE ON DICTIONARY beginAtt = numNodes - beginAtt endAtt = numNodes - endAtt while (beginAtt <= endAtt): att[att.keys()[beginAtt]] = 1 beginAtt = beginAtt + 1 #for k, v in att.items(): #print("Key : {0}, Value : {1}".format(k, v)) if (p == 0.1): # RANGE ADJUST a = 100 - beginRange b = 100 - (endRange - 20) beginAtt = (a * numNodes) / 100 endAtt = (b * numNodes) / 100 # FINAL POSITIONS OF RANGE ON DICTIONARY beginAtt = numNodes - beginAtt endAtt = numNodes - endAtt while (beginAtt <= endAtt): att[att.keys()[beginAtt]] = 1 beginAtt = beginAtt + 1 #for k, v in att.items(): #print("Key : {0}, Value : {1}".format(k, v)) if (p == 0.05): # RANGE ADJUST a = 100 - beginRange b = 100 - (endRange - 25) beginAtt = (a * numNodes) / 100 endAtt = (b * numNodes) / 100 # FINAL POSITIONS OF RANGE ON DICTIONARY beginAtt = numNodes - beginAtt endAtt = numNodes - endAtt while (beginAtt <= endAtt): att[att.keys()[beginAtt]] = 1 beginAtt = beginAtt + 1 #for k, v in att.items(): #print("Key : {0}, Value : {1}".format(k, v)) nx.set_node_attributes(g, att, 'attr') nx.write_edgelist(g, outFileName+".elist.txt", data=['attr']) #att = nx.get_node_attributes(g, 'attr') #for k, v in att.items(): # print("Key : {0}, Value : {1}".format(k, v)) measure_MI(g)
def wl_normalization(self, graph): result = {} labeled_graph = nx.Graph(graph) relabel_dict_ = {} graph_node_list = list(graph.nodes()) for i in range(len(graph_node_list)): relabel_dict_[graph_node_list[i]] = i i += 1 inv_relabel_dict_ = {v: k for k, v in relabel_dict_.items()} graph_relabel = nx.relabel_nodes(graph, relabel_dict_) label_lookup = {} label_counter = 0 l_aux = list( nx.get_node_attributes(graph_relabel, 'attr_name').values()) labels = np.zeros(len(l_aux), dtype=np.int32) adjency_list = list([ list(x[1].keys()) for x in graph_relabel.adjacency() ]) #adjency list à l'ancienne comme version 1.0 de networkx for j in range(len(l_aux)): if not (l_aux[j] in label_lookup): label_lookup[l_aux[j]] = label_counter labels[j] = label_counter label_counter += 1 else: labels[j] = label_lookup[l_aux[j]] # labels are associated to a natural number # starting with 0. new_labels = copy.deepcopy(labels) # create an empty lookup table label_lookup = {} label_counter = 0 for v in range(len(adjency_list)): # form a multiset label of the node v of the i'th graph # and convert it to a string long_label = np.concatenate( (np.array([labels[v]]), np.sort(labels[adjency_list[v]]))) long_label_string = str(long_label) # if the multiset label has not yet occurred, add it to the # lookup table and assign a number to it if not (long_label_string in label_lookup): label_lookup[long_label_string] = label_counter new_labels[v] = label_counter label_counter += 1 else: new_labels[v] = label_lookup[long_label_string] # fill the column for i'th graph in phi labels = copy.deepcopy(new_labels) dict_ = {inv_relabel_dict_[i]: labels[i] for i in range(len(labels))} nx.set_node_attributes(labeled_graph, dict_, 'labeling') result['labeled_graph'] = labeled_graph result['ordered_nodes'] = [ x[0] for x in sorted(dict_.items(), key=lambda x: x[1]) ] return result
print("generating blog graph..") blogGraph = nx.DiGraph() with open('nodeslist.csv', 'r') as nodelist: nodelistreader = csv.DictReader(nodelist) for row in nodelistreader: blogGraph.add_node(row['\ufeffId'], URL=row['URL'], Label=row['Label']) with open('edgelist.csv', 'r') as edgelist: edgelistreader = csv.DictReader(edgelist) for row in edgelistreader: blogGraph.add_edge(row['Source'], row['Target']) print("predicting labels") accuracy = dict() number_of_nodes = blogGraph.number_of_nodes() attributes = nx.get_node_attributes(blogGraph, 'Label') nodes = blogGraph.nodes for trial in range(10): for i in range(1, 11): random.seed() nodeSample = random.sample(nodes, math.ceil(len(nodes) * (i / 10))) correct = 0 for node in blogGraph: neighbors = list(blogGraph.neighbors(node)) liberalCout = 0 consertiveCount = 0 for neighbor in neighbors: if neighbor in nodeSample: if attributes[neighbor] == '0': liberalCout += 1 else: