def prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id): tmp_file = tempfile.NamedTemporaryFile(delete=False) ds_tmp_file = tempfile.NamedTemporaryFile(delete=False) topic = topicname if not topic: return None #g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict= make_network(topic, date, window_size, attribute_add=False) key = str(real_topic_id) + '_' + str(date) + '_' + str(window_size) g = nx.read_gexf(str(GRAPH_PATH)+str(key)+'_g_graph.gexf') ds_dg = nx.read_gexf(str(GRAPH_PATH)+str(key)+'_ds_dg_graph.gexf') if not g or not ds_dg: return None N = len(g) print 'topic source network size %s' % N ds_N = len(ds_dg) print 'topic direct superior network size %s' % ds_N if not N or not ds_N: return None ''' 在临时文件中存放网络结构,将写入临时文件的过程写为方法write_tmp_file ''' print 'start PageRank tmp_file, ds_tmp_file' tmp_file = write_tmp_file(tmp_file, g, N) ds_tmp_file = write_tmp_file(ds_tmp_file, ds_dg, ds_N) print 'end PageRank tmp_file, ds_tmp_file' return tmp_file, N, ds_tmp_file, ds_N
def occurenceCounter(charList, graphFile, bookNetworksPath): g = nx.read_gexf(graphFile) if not charList: # Get characters from overall.gexf graph overallGraphFile = bookNetworksPath + "overall.gexf" overall_g = nx.read_gexf(overallGraphFile) overallChars = nx.nodes(overall_g) # Sort dictionary by name (key of dictionary) sortedChars = sorted(overallChars) return sortedChars else: charList = [item for item in charList] for index, item in enumerate(charList): currentChar = None for node in nx.nodes(g): if node == item: occurrence = 1 charList[index] = (item, occurrence) currentChar = node # If current character is not present in the current chapter assign 0 influence. if not currentChar: occurrence = 0 charList[index] = (item, occurrence) return charList
def analyze_Reff_chains(): fl_m20 = nx.read_gexf("Flute_vs_EpiFast/Flute_match20.gexf") reffs_fl_m20 = tv.build_Reff_txm_chain(fl_m20) ef_m20 = nx.read_gexf("Flute_vs_EpiFast/Epifast_match20.gexf") reffs_ef_m20 = tv.build_Reff_txm_chain(ef_m20) max_gens = 31
def add_LPU(self, data_file, gexf_file=None, LPU=None, win=None, is_input=False): ''' Add data associated with a specific LPU to a visualization. To add a plot containing neurons from a particular LPU, the LPU needs to be added to the visualization using this function. Note that outputs from multiple neurons can be visualized using the same visualizer object. Parameters ---------- data_file: str Location of the h5 file generated by neurokernel containing the output of the LPU gexf_file: str Location of the gexf file describing the LPU. If not specified, it will be assumed that the h5 file contains input. LPU: str Name of the LPU. Will be used as identifier to add plots. For input signals, the name of the LPU will be prepended with 'input_'. For example:: V.add_LPU('vision_in.h5', LPU='vision') will create the LPU identifier 'input_vision'. Therefore, adding a plot depicting this input can be done by:: V.add_plot({''type':'image',imlim':[-0.5,0.5]},LPU='input_vision) win: slice/list Can be used to limit the visualization to a specific time window. ''' if gexf_file and not is_input: self._graph[LPU] = nx.read_gexf(gexf_file) # Map neuron ids to index into output data array: self._id_to_data_idx[LPU] = {m:i for i, m in \ enumerate(sorted([int(n) for n, k in \ self._graph[LPU].nodes_iter(True) if k['spiking']]))} else: if LPU: LPU = 'input_' + str(LPU) else: LPU = 'input_' + str(len(self._data)) if gexf_file: self._graph[LPU] = nx.read_gexf(gexf_file) if not LPU: LPU = len(self._data) self._data[LPU] = np.transpose(sio.read_array(data_file)) if win is not None: self._data[LPU] = self._data[LPU][:,win] if self._maxt: self._maxt = min(self._maxt, self._data[LPU].shape[1]) else: self._maxt = self._data[LPU].shape[1]
def betweenValue(charList, graphFile, bookNetworksPath): # Compute betweenness for all characters in the current chapter graph. g = nx.read_gexf(graphFile) betCentrality = nx.betweenness_centrality(g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None) betweenValues = betCentrality.values() # NORMALISE betweenness values d = decimal.Decimal maxBetween = max(betweenValues) minBetween = min(betweenValues) maxMinusMin = d(maxBetween) - d(minBetween) if not charList: # Get top 10 overall characters from overall.gexf graph overallGraphFile = bookNetworksPath + "overall.gexf" overall_g = nx.read_gexf(overallGraphFile) overallBetweenCent = nx.betweenness_centrality( overall_g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None ) # Quick fix for getting all characters. # sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)[:10]) sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)) sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True) charList = [seq[0] for seq in sortedCentrality] return charList else: charList = [item for item in charList] for index, item in enumerate(charList): currentChar = None for key, value in betCentrality.iteritems(): if key == item: nummerator = d(value) - d(minBetween) if nummerator == 0: charList[index] = (key, str(0)) else: norm_value = (d(value) - d(minBetween)) / d(maxMinusMin) charList[index] = (key, str(norm_value)) currentChar = key # If current character is not present in the current chapter assign 0 influence. if not currentChar: charList[index] = (item, 0) return charList
def eigValue(charList, graphFile, bookNetworksPath): # Compute eigenvectors for all characters in the current chapter graph. g = nx.read_gexf(graphFile) eigCentrality = nx.eigenvector_centrality(g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight") eigValues = eigCentrality.values() # NORMALISE eigenvector values d = decimal.Decimal maxEig = max(eigValues) minEig = min(eigValues) maxMinusMin = d(maxEig) - d(minEig) if not charList: # Get top 10 overall characters from overall.gexf graph overallGraphFile = bookNetworksPath + "overall.gexf" overall_g = nx.read_gexf(overallGraphFile) overallEigCent = nx.eigenvector_centrality(overall_g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight") # sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)[:10]) sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)) sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True) charList = [seq[0] for seq in sortedCentrality] return charList else: charList = [item for item in charList] for index, item in enumerate(charList): currentChar = None for key, value in eigCentrality.iteritems(): if key == item: # Unnormalised version... charList[index] = (key, str(value)) currentChar = key # if key == item: # nummerator = d(value)-d(minEig) # if nummerator==0: # charList[index] = (key, str(0)) # else: # norm_value = (d(value)-d(minEig))/d(maxMinusMin) # charList[index] = (key, str(norm_value)) # currentChar = key # If current character is not present in the current chapter assign 0 influence. if not currentChar: charList[index] = (item, 0) return charList
def read_from_json_gexf(fname=None,label_field_name='APIs',conv_undir = False): ''' Load the graph files (.gexf or .json only supported) :param fname: graph file name :param label_field_name: filed denoting the node label :param conv_undir: convert to undirected graph or not :return: graph in networkx format ''' if not fname: logging.error('no valid path or file name') return None else: try: try: with open(fname, 'rb') as File: org_dep_g = json_graph.node_link_graph(json.load(File)) except: org_dep_g = nx.read_gexf (path=fname) g = nx.DiGraph() for n, d in org_dep_g.nodes_iter(data=True): g.add_node(n, attr_dict={'label': '-'.join(d[label_field_name].split('\n'))}) g.add_edges_from(org_dep_g.edges_iter()) except: logging.error("unable to load graph from file: {}".format(fname)) # return 0 logging.debug('loaded {} a graph with {} nodes and {} egdes'.format(fname, g.number_of_nodes(),g.number_of_edges())) if conv_undir: g = nx.Graph (g) logging.debug('converted {} as undirected graph'.format (g)) return g
def read_params_file(paramsDir, fName): """Read an xml parameters file into a list of tuples Each tuple consists of the following: entityType simulator, economy or bank entityName dataType parameter or attribute dataName dataValue a string @param fName: """ tree = ET.parse(paramsDir + fName) pList = read_params_from_xml(tree.getroot(), fName) graphFile = None for eType, eName, dataType, dataName, dataValue in pList: if dataName == "graphFile": graphFile = dataValue break if graphFile is None: msg = "No graph file specified in %s" % fName logger.error(msg) raise ParameterError(msg) basicGraph = nx.read_gexf(paramsDir + graphFile) # This gives us a graph of possible lending return pList, basicGraph
def importGexf(self, url ): # TODO once files are stored in a standard upload directory this will need to be changed import platform if platform.system() == 'Windows': PATH = 'c:\\inetpub\\wwwroot\\pydev\\systemshock\\modellingengine\\fincat\\parameters\\' else: PATH = '/var/lib/geonode/src/GeoNodePy/geonode/modellingengine/fincat/parameters/' G = nx.read_gexf(PATH + url) # ensure the nodes are labelled with integers starting from 0 # TODO might need to start from current number of nodes in G G = nx.convert_node_labels_to_integers(G, first_label=0) for node in G.nodes(data=True): nodeid = node[0] #node array index 0 is the node id, index 1 is the attribute list attributes = node[1] attributes['guid'] = nodeid if 'wkt' in attributes: attributes['geometry'] = self.WKTtoGeoJSON(attributes['wkt']) for edge in G.edges(data=True): edgeid = unicode(edge[0]) + '-' + unicode(edge[1]) attributes = edge[2] attributes['guid'] = edgeid self.layergraphs.append(G) # add the new layer graph to the overall network return True
def classify(request, pk): #gets object based on id given graph_file = get_object_or_404(Document, pk=pk) #reads file into networkx graph based on extension if graph_file.extension() == ".gml": G = nx.read_gml(graph_file.uploadfile) else: G = nx.read_gexf(graph_file.uploadfile) #closes file so we can delete it graph_file.uploadfile.close() #loads the algorithm and tests the algorithm against the graph g_json = json_graph.node_link_data(G) #save graph into json file with open(os.path.join(settings.MEDIA_ROOT, 'graph.json'), 'w') as graph: json.dump(g_json, graph) with open(os.path.join(settings.MEDIA_ROOT, 'rf_classifier.pkl'), 'rb') as malgo: algo_loaded = pickle.load(malgo, encoding="latin1") dataset = np.array([G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)]) print (dataset) #creates X to test against X = dataset prediction = algo_loaded.predict(X) graph_type = check_prediction(prediction) graph = GraphPasser(G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)) #gives certain variables to the view return render( request, 'classification/classify.html', {'graph': graph, 'prediction': graph_type} )
def test_read_simple_directed_graphml(self): G = self.simple_directed_graph H = nx.read_gexf(self.simple_directed_fh) assert_equal(sorted(G.nodes()), sorted(H.nodes())) assert_equal(sorted(G.edges()), sorted(H.edges())) assert_equal(sorted(G.edges(data=True)), sorted(H.edges(data=True))) self.simple_directed_fh.seek(0)
def GET(self): web.header('Access-Control-Allow-Origin', '*') output = dict() getInput = web.input(start='2012-3-03 16:00:00', end='2012-3-03 21:00:00') start_time=pd.to_datetime(getInput.start).tz_localize('US/Eastern') - pd.DateOffset(hours=10) end_time=pd.to_datetime(getInput.end).tz_localize('US/Eastern') - pd.DateOffset(hours=10) output_nodes = set() all_schedules = pd.read_json('all_schedules.json') allnodes = pd.read_json('allnodes.json') nodes = set(allnodes.nodes) all_schedules['end'] = all_schedules['end'].map(lambda x: datetime.datetime.fromtimestamp(x/1000000000)) all_schedules['start'] = all_schedules['start'].map(lambda x: datetime.datetime.fromtimestamp(x/1000000000)) night_sched = all_schedules[(all_schedules.start >= start_time) & (all_schedules.end <= end_time)] on_nodes = set() for idx,show in night_sched.iterrows(): on_nodes.add(show[2]) off_nodes = nodes.difference(on_nodes) imported_graph = nx.read_gexf('./finished_network3.gexf') for i in off_nodes: try: imported_graph.remove_node(i) except: continue pr=nx.pagerank(imported_graph,alpha=0.9,weight='newweight',tol=.01, max_iter=200) output['nodes'] = [(i,v*1000000) for i,v in pr.items()] output['input_params'] = getInput return json.dumps(output)
def dump_melodic_phrases_in_network(network_file, output_dir, myDatabase, base_name): """ This function dumps all the mp3 files for the patterns in the 'network' (gexf file) """ cmd1 = "select file.filename, pattern.start_time, pattern.end_time from pattern join file on (pattern.file_id = file.id) where pattern.id = %d" #reading the network full_net = nx.read_gexf(network_file) labels = nx.get_node_attributes(full_net, 'label') patterns = full_net.nodes() try: con = psy.connect(database=myDatabase, user='******') cur = con.cursor() for ii, pattern in enumerate(patterns): pattern = labels[pattern] cur.execute(cmd1%int(pattern)) filename, start, end = cur.fetchone() clipAudio(output_dir, os.path.join(base_name, filename), start, end, int(pattern)) except psy.DatabaseError, e: print 'Error %s' % e if con: con.rollback() con.close() sys.exit(1)
def create_genealogy(graph_id = 'deusto.aitoralmeida'): print 'Loading graph' merged = nx.read_gexf('merged_genealogy.gexf', node_type = None) print 'Loading edge index' dict_edges = load_merged_edge_index() print 'Building genealogy' to_process = [graph_id] tree = set() #get all the ascenstors in tree while len(to_process) > 0: current = to_process[0] to_process.remove(current) tree.add(current) try: to_process += dict_edges[current] except: pass print 'Creating graph' G = nx.DiGraph() for person in tree: print person G.add_node(person, name = merged.node[person]['name']) for target in merged.edge[person].keys(): #add edges with the ancestors only if target in tree: G.add_edge(person, target) print 'Writing file' nx.write_gexf(G, 'created_genealogy.gexf')
def write_estrada(path_to_file): graphe = nx.read_gexf(path_to_file + ".gexf") if type(graphe) == nx.MultiDiGraph: print 'has_multiple_edges' graphe = nx.DiGraph(graphe) if type(graphe) == nx.MultiGraph: print 'has_multiple_edges' graphe = nx.Graph(graphe) adj_mat = nx.to_numpy_matrix(graphe, weight=None) adj_mat = adj_mat.tolist() w, v = eig(adj_mat) argmax = np.argmax(w) sh = np.sinh(w) square = np.square(v) gamma = square[:, argmax] gammaideal = np.dot(square, sh) / sh[argmax] delta = 0.5 * np.log10(gamma / gammaideal) deltaplus = delta[(delta > 0)] deltaminus = delta[(delta < 0)] eplus = math.sqrt(mean(deltaplus ** 2)) eminus = math.sqrt(mean(deltaminus ** 2)) if math.isnan(eplus) or math.isnan(eminus) or eplus == float('inf') or eminus == float('inf'): print gamma print gammaideal print deltaplus print deltaminus classe = 1 if eminus > 0.01: classe += 1 if eplus > 0.01: classe += 2 add_results_estrada(eplus, eminus, classe,path_to_file)
def abrirred(): global G global fpname global file_path file_path = tkFileDialog.askopenfilename(title="Open file", filetypes=[("GEXF files", ".gexf")]) if file_path != "": G = nx.read_gexf(file_path, relabel=False) fp = file_path.split("/") fpname = fp[-1] print "Loaded network:", fpname editmenu.entryconfig(1, state=NORMAL) ## Estadisticas editmenu.entryconfig(9, state=NORMAL) ## Sigma-JS editmenu.entryconfig(10, state=NORMAL) ## D3 HTML Graph else: print "You have to choose a file" return False tmpdir = "tmp/" shutil.copyfile(file_path, tmpdir + "tmpfile.gexf") fp = file_path.split("/") fpname = fp[-1] fname = file("tmp/enamgexf.nme", "w") print >> fname, fpname[0:-4] fname.close() return False
def run(out_name): V = vis.visualizer() # Assumes that generic_lpu_0_input.h5 and generic_lpu_1_input.h5 # contain the same data: V.add_LPU('./data/generic_lpu_0_input.h5', LPU='Sensory') V.add_plot({'type': 'waveform', 'ids': [[0]]}, 'input_Sensory') for i in [0, 1]: G = nx.read_gexf('./data/generic_lpu_%s.gexf.gz' % i) neu_pub = sorted([int(n) for n, d in G.nodes_iter(True) \ if d['public'] == True]) V.add_LPU('generic_lpu_%s_%s_output_spike.h5' % (i, out_name), './data/generic_lpu_%s.gexf.gz' % i, 'Generic LPU %s' % i) V.add_plot({'type': 'raster', 'ids': {0: neu_pub}, #'yticks': range(1, 1+len(neu_out)), #'yticklabels': range(len(neu_out)) }, 'Generic LPU %s' % i, 'Output') V._update_interval = 50 V.rows = 3 V.cols = 1 V.fontsize = 18 V.out_filename = '%s.avi' % out_name V.codec = 'libtheora' V.dt = 0.0001 V.xlim = [0, 1.0] V.run()
def cyclePlot(gexFile): DG = nx.DiGraph(nx.read_gexf(gexFile)) #generate networkx friendly position format #dictionary keyed by node label with values being a float32 ndarray pos = dict() for i in range(1, len(DG.node)+1): xPos = DG.node[str(i)]['viz']['position']['x'] yPos = DG.node[str(i)]['viz']['position']['y'] pos[str(i)] = np.array([xPos,yPos]) #nx.draw_networkx_edges(DG,pos,nodelist=DG.node.keys(),alpha=0.05, # arrows=True) nx.draw_networkx_nodes(DG,pos,nodelist=DG.node.keys(), node_size=30, node_color='grey', alpha=0.4) nx.draw_networkx_edges(DG,pos,alpha=0.4, arrows=True, edge_color='k') plt.show() scc=nx.strongly_connected_component_subgraphs(DG) CG=scc[0]; #show example nx.draw_networkx_nodes(CG,pos,nodelist=CG.node.keys(), node_size=30, node_color='c') nx.draw_networkx_edges(CG,pos,alpha=0.5, arrows=True, edge_color='r')
def __prepare__(data_dir): """ Takes each file in .gexf format and converts it into the igraph-readable graphml. """ """ Takes a list of files in .gexf format, and converts each to GraphML. """ for f in glob.glob(os.path.join(data_dir, "*.gexf")): print(f) newFileName = f[:f.rfind('.')] + ".graphml" if os.path.exists(newFileName): continue G = nx.read_gexf(f) for node in G.node: for attrib in G.node[node]: if type(G.node[node][attrib]) == dict: # graphML doesn't play nice with dictionaries as attributes. # this line just deletes positional information. G.node[node][attrib] = 0 newFileName = f[:f.rfind('.')] + ".graphml" nx.write_graphml(G, newFileName)
def restore_path(meta_id, start_index, count, k): # k估计参数权值 test_path = TestPath(meta_id, start_index, count) visual_map4000 = nx.read_gexf('/home/elvis/map/analize/analizeTime/countXEntTime/visualMapTop4000.gexf') paths = [] # while len(paths) == 0: # paths = find_paths(test_path.path_time[0], test_path.path_time[-1], visual_map4000, k) # k *= 1.1 paths = find_paths(test_path.path_time[0], test_path.path_time[-1], visual_map4000, k) if len(paths) == 0: print_cache.append(['{}'.format(meta_id), 0,]) # print('{},not find'.format(meta_id)) return delta_time = (test_path.path_time[-1][1] - test_path.path_time[0][1]).total_seconds() map_paths = visual_to_map(paths, delta_time) best_path = best_alternative(map_paths, delta_time) file_path = '/home/elvis/map/analize/path_restore/restore_path/{k}/{meta_id}/{meta_id}-{s}-{c}'.format( meta_id=meta_id, s=start_index, c=count, k=k) to_path_txt(map_paths, file_path, mode='all') best_file_path = '/home/elvis/map/analize/path_restore/restore_path/{k}/{meta_id}/best'.format(meta_id=meta_id, k=k) to_path_txt(best_path, best_file_path, mode='best') print_cache.append(['{}'.format(meta_id),]) # print('{}'.format(meta_id), end=' ') similarity(best_path, test_path.path_time)
def convertNetToGefx(input_file): G = None if input_file.endswith(Constants.GEXF_FORMAT): G = nx.read_gexf(input_file, None, True) elif input_file.endswith(Constants.NET_FORMAT): G=nx.Graph() f = file(input_file, 'r') # iterate over the lines in the file for line in f: # split the line into a list of column values columns = line.split('\t') # clean any whitespace off the items columns = [col.strip() for col in columns] if columns: G.add_edge(columns[0], columns[1]) #write to a gexf file, so that GHOST can read it as well gexf_path = input_file[:-len(Constants.NET_FORMAT)]+Constants.GEXF_FORMAT #add attributes to nodes in gefx file for n,d in G.nodes_iter(data=True): G.node[n]["id"] = n G.node[n]["gname"] = n nx.write_gexf(G, gexf_path) else: print("Unsupported Format") exit(0) print("For "+input_file+" Number of Nodes =", G.number_of_nodes(), "No of edges = ", G.number_of_edges()) return G
def main(): # the description link graph g = nx.read_gexf('data/subreddits_edged_by_description_links.gexf') # an empty graph for showing communities g1 = nx.Graph() communities = get_coalesced_communities(g) for c in communities: g1.add_node(c.name) g1.node[c.name]['size'] = len(c.members) count = 0 ratio_weight = 0.0 for c1, c2 in product(communities, communities): if c1.id == c2.id or g1.has_edge(c1.name, c2.name) or len(c1.members) > len(c2.members): continue overlap = len(c1.members & c2.members) if overlap > 0: g1.add_edge(c1.name, c2.name, weight=overlap / len(c1.members)) ratio_weight += overlap / len(c1.members) count += 1 average_weight_ratio = ratio_weight / count print "average weight ratio: %s" % str(average_weight_ratio) g1.remove_edges_from(filter(lambda x: x[2]['weight'] < average_weight_ratio, g1.edges(data=True))) print "%d subreddits included" % len(reduce(lambda x,y: x.union(y.members), communities, set())) nx.write_gexf(g1, 'test_coalesce.gexf')
def __update_structure(self): imported_graph = nx.read_gexf(self.file_path) if not isinstance(imported_graph, nx.Graph): raise Exception("Imported graph is not undirected") self.structure = nx.convert_node_labels_to_integers(imported_graph)
def lpu_parser(filename): """ GEXF LPU specification parser. Extract LPU specification data from a GEXF file and store it in Python data structures. All nodes in the GEXF file are assumed to correspond to neuron model instances while all edges are assumed to correspond to synapse model instances. Parameters ---------- filename : str GEXF filename. Returns ------- n_dict : dict of dict of list Each key of `n_dict` is the name of a neuron model; the values are dicts that map each attribute name to a list that contains the attribute values for each neuron class. s_dict : dict of dict of list Each key of `s_dict` is the name of a synapse model; the values are dicts that map each attribute name to a list that contains the attribute values for each each neuron. """ graph = nx.read_gexf(filename) return LPU.graph_to_dicts(graph)
def plot_gexf_string(string): import networkx as nx from io import StringIO strio = StringIO(u"%s" % string) graph = nx.read_gexf(strio, relabel=False) plot_networkx_graph(graph)
def get_sna(path): sna_data = {} print 'Building relations graph' G = nx.read_gexf(path) print 'Nodes:', len(G.nodes()) print 'Edges:', len(G.edges()) print 'Calculating centralities:' print ' -degrees' degrees = G.degree() for c in degrees: sna_data[c] = { 'degree':degrees[c], 'betweenness':0, 'closeness':0, 'eigenvector':0} print ' -betweenness' betweenness = nx.betweenness_centrality(G) for c in betweenness: sna_data[c]['betweenness'] = betweenness[c] print ' -closeness' closeness = nx.closeness_centrality(G) for c in closeness: sna_data[c]['closeness'] = closeness[c] print ' -eigenvector' eigenvector = nx.eigenvector_centrality_numpy(G) for c in eigenvector: sna_data[c]['eigenvector'] = eigenvector[c] return sna_data
def run(out_name): V = vis.visualizer() V.add_LPU('./data/generic_input_0.h5', LPU='Sensory') V.add_plot({'type':'waveform', 'ids': [[0]]}, 'input_Sensory') for i in [0, 1]: G = nx.read_gexf('./data/generic_lpu_%s.gexf.gz' % i) neu_out = [k for k, n in G.node.items() if n['name'][:3] == 'out'] V.add_LPU('generic_output_%s_%s_spike.h5' % (i, out_name), './data/generic_lpu_%s.gexf.gz' % i, 'Generic LPU %s' % i) V.add_plot({'type': 'raster', 'ids': {0: range(len(neu_out))}, #'yticks': range(1, 1+len(neu_out)), #'yticklabels': range(len(neu_out)) }, 'Generic LPU %s' % i, 'Output') V._update_interval = 50 V.rows = 3 V.cols = 1 V.fontsize = 18 V.out_filename = 'generic_output_%s.avi' % out_name V.codec = 'libtheora' V.dt = 0.0001 V.xlim = [0, 1.0] V.run()
def read_any(fname): """Attempt to read file in any format. - First, file name can be specified as a schema prefix, as in 'gml:FILE_NAME'. - Then, it can be specified via filename extension. - Then, it can be specified via filename extension after .bz2 or .gz. Note: We do not decompress it, but rely on networkx to do decompression. - Then, look at the first 512 bytes to see if the file is either in pajek or gml format. - If read is unsuccessful, raise UnknownTypeError. If read is successful, return a networkx graph object. A list of all readers in networkx: import networkx [ x for x in dir(networkx) if x.startswith('read_') ] """#%' \n'.join(x for x in dir(networkx) if x.startswith('read_')) # Try a schema prefix based match: m = re.match(r'^(\w+?):(.*)$', fname) if m: schema, fname = m.group(1), m.group(2) # Look for networkx.read_schema. If it exists, use that to # read and return. reader = _get_reader(schema) if reader is not None: return reader(fname) # Try known file suffix based reading: base, ext = os.path.splitext(fname) #if ext == '.gml': # return networkx.read_gml(fname) # look for any reader in networkx with this extension. reader = _get_reader(ext[1:]) if reader is not None: return reader(fname) # If file is compressed, look at true extension, see if it has a # reader. if ext in ('.gz', '.bz2'): newext = os.path.splitext(base) reader = _get_reader(newext[1:]) if reader is not None: return reader(fname) # Look inside for the schema: data = open(fname).read(512) if _test_pajek(data): return networkx.read_pajek(fname) if _test_gml(data): return networkx.read_gml(fname) if _test_edgelist(data): return networkx.read_edgelist(fname, data=[('weight', float)]) if _test_gexf(data): return networkx.read_gexf(fname) if _test_graphml(data): return networkx.read_graphml(fname) # So it is probably an edgelist. # Raise exception raise UnknownTypeError("Can't open %s"%fname)
def test_bool(self): G = nx.Graph() G.add_node(1, testattr=True) fh = io.BytesIO() nx.write_gexf(G, fh) fh.seek(0) H = nx.read_gexf(fh, node_type=int) assert_equal(H.nodes[1]['testattr'], True)
def test_read_simple_undirected_graphml(self): G = self.simple_undirected_graph H = nx.read_gexf(self.simple_undirected_fh) assert_equal(sorted(G.nodes()), sorted(H.nodes())) assert_equal( sorted(sorted(e) for e in G.edges()), sorted(sorted(e) for e in H.edges())) self.simple_undirected_fh.seek(0)
def gen_avg_cited_net(): g = nx.read_gexf("data\\timeline_new_g.gexf") for node in g: gen_avg_cited_year(g, node) nx.write_gexf(g, "data\\timeline_new_g.gexf")
#!/usr/bin/python import networkx as nx import matplotlib.pyplot as plt G = nx.read_gexf("graph.gexf") print("Graph has loaded") nodes = nx.eigenvector_centrality(G) f = open('eigenvector_centrality.txt', 'w') for i in nodes: f.write(str(i) + ' ' + str(nodes[i]) + '\n') f.close()
print(' ', boardString[3], ' | ', boardString[4], ' | ', boardString[5], ' ') print(' -----+-----+----- ') print(' ', boardString[6], ' | ', boardString[7], ' | ', boardString[8], ' ') print('It is your turn. Enter your move by indexing into the board.') print( "Obey zero-indexing. for example, entering in '1 1' will choose the center tile." ) return 0 print("Hello, I am the Tic-Tac-Toe AI") print("Type 'X' or 'O' to choose what you will play.") G = nx.read_gexf("TicTacToeStates.gexf") player = input() boardString = '---------' turnOrder = 3 while player != 'X' and player != 'O': print("Input invalid. Please type 'X' or 'O' to choose what you play:") player = input() if player is 'X': turnOrder = 0 players_turn(boardString) moveX = int(input('horizontal index: ')) moveY = int(input('vertical index: ')) while ((moveX > 2) or (moveX < 0) or (moveY > 2) or (moveY < 0) or (boardString[moveX + moveY * 3] != '-')): print(
import networkx as nx import matplotlib.pyplot as plt if __name__ == '__main__': G = nx.DiGraph() G = nx.read_gexf("./torGraph.dat") numbers = [] rangeOfNumbers = [] x = [] for n in G.nodes_iter(): #calculate the in-degree for each node numbers.append(G.in_degree(n)) if (G.in_degree(n) not in rangeOfNumbers): rangeOfNumbers.append(G.in_degree(n)) rangeOfNumbers.sort() for n in rangeOfNumbers: #create a list with the number of nodes for each in-degree x.append(numbers.count(n)) plt.scatter(rangeOfNumbers, x, label=' in-degree', color='k') #print a scatter with the in-degree distribution plt.xlim(0, 30) plt.ylim(0, 10000) plt.yticks([0, 1000, 10000, 50000]) plt.xticks([0, 10, 20, 30])
year.append({ 'year': window, 'post_location': '', 'comment_location': str(ent) }) def WriteDictToCSV(csv_file, csv_columns, dict_data): try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in dict_data: writer.writerow(data) except IOError as (errno, strerror): print("I/O error({0}): {1}".format(errno, strerror)) return if __name__ == '__main__': for filename in os.listdir("LIWC_DATA/utility_graphs/"): if not filename.startswith("%s_di" % sys.argv[1]): continue print filename g = nx.read_gexf("LIWC_DATA/utility_graphs/" + filename) file = str(ntpath.basename(filename)).split('.')[0] windowyear = file[len(file) - 21:len(file) - 11] getLocation(g, windowyear) with open('LIWC_DATA/LOCATION_FILES/onlyLocation.txt', 'w') as outfile: json.dump(year, outfile)
ranking_file = "%s/all-%s.csv" % (ranking_folder, tp) network_file = "%s/all-%s.gexf" % (network_folder, tp) positions_file = "%s/comwith_all-%s.csv" % (positions_folder, tp) output_file = {'graph': "%s/all-%s-network.js" % (output_folder, tp), 'ring': "%s/all-%s-ring.js" % (output_folder, tp)} with open(ranking_file, 'r') as inf: reader = csv.DictReader(inf) ranks = {row.pop("node"): row for row in reader} with open(positions_file, 'r') as inf: reader = csv.DictReader(inf) pos = {row["node"]: eval(row['positions']) for row in reader} # MERGE AND REDUCE DATA # H is for ring view H = nx.read_gexf(network_file) id_mapping = {} # needed to replace numeric id's in json object for idx, (node, data) in enumerate(H.nodes(data=True)): id_mapping[idx] = node data["occurrence"] = data.get("occurrence", 0) # G is for graph view G = giant(H) # remove edges representing one-time collaboration having weight < 1 for sourc, tar, data in G.edges(data=True): if data["weight"] < 1 and len(data["journal"].split(';')) == 1: G.remove_edge(sourc, tar) G = nx.convert_node_labels_to_integers(G, label_attribute="name") # add groups, scaled positions and text for node, data in G.nodes(data=True): name = data['name'] # Groups (compressed)
if __name__ == "__main__": args = parse_args() np.random.seed(args.seed) # -- # IO print("main.py: preprocessing start", file=sys.stderr) graph_files = sorted(glob(os.path.join(args.indir, '*.gexf'))) label_lookup = {} graphs = [nx.read_gexf(graph_file) for graph_file in graph_files] graphs = [ initial_relabel(g, args.label_field, label_lookup) for g in graphs ] for height in range(1, args.wl_height + 1): label_lookup = {} graphs = [wl_relabel(graph, height, label_lookup) for graph in graphs] for graph_file, graph in zip(graph_files, graphs): dump_sg2vec_str(graph_file, args.wl_height, graph) wlk_files = sorted( glob(os.path.join(args.indir, '*.g2v' + str(args.wl_height)))) print("main.py: preprocessing done", file=sys.stderr)
# # Check Command line arguments # if len(sys.argv) < 2: print "Error: Must supply a GraphML input file" sys.exit(-1) filename = sys.argv[1] output_filename = "" if re.search(r'graphml', filename): output_filename = filename.strip("graphml") G = nx.read_graphml(filename) elif re.search(r'gexf', filename): output_filename = filename.strip("gexf") G = nx.read_gexf(filename) else: print "Error: Unknown file extension ", filename sys.exit(-1) output_filename = output_filename + "png" # # Positions for all nodes # pos = nx.spring_layout(G) pos = nx.random_layout(G) pos = nx.graphviz_layout(G, prog='dot') pos = nx.graphviz_layout(G, prog='twopi') #print G.graph
import numpy as np import matplotlib as mpl mpl.use('agg') import neurokernel.LPU.utils.visualizer as vis import networkx as nx nx.readwrite.gexf.GEXF.convert_bool = { 'false': False, 'False': False, 'true': True, 'True': True } # Select IDs of spiking projection neurons: G = nx.read_gexf('./data/generic_lpu.gexf.gz') neu_proj = sorted([int(k) for k, n in G.node.items() if \ n['name'][:4] == 'proj' and \ n['spiking']]) V = vis.visualizer() V.add_LPU('./data/generic_input.h5', LPU='Sensory') V.add_plot({'type': 'waveform', 'ids': [[0]]}, 'input_Sensory') V.add_LPU('generic_output_spike.h5', './data/generic_lpu.gexf.gz', 'Generic LPU') V.add_plot( { 'type': 'raster', 'ids': { 0: neu_proj
def main(): if len(sys.argv) < 2: sys.exit('usage: %s < input gexf' % sys.argv[0]) # Input Graph file infile = sys.argv[1] G = nx.read_gexf(infile) # extract the largest weakly connected component and convert to undirected for fa2l G = max(nx.weakly_connected_component_subgraphs(G), key=len).to_undirected() # set parameters colormap = { 'null': 'lightgray', 'partisan_2012_conservative': 'r', 'partisan_2012_liberal': 'b', 'partisan_2012_libertarian': 'y' } color_field = "partisan_code" size_field = 'inlink_count' filter_field = "inlink_count" label_field = "label" num_labels = 20 # number of labels to visualize k = 100 # number of nodes to visualize # If the size of Graph > 1000 nodes, set G to the subgraph containing largest 1000 nodes to get the layout if len(G.nodes()) > 1000: G = filter_graph(G, filter_by=filter_field, top=1000).to_undirected() # extract the positions pos = force_atlas2_layout(G, iterations=50, pos_list=None, node_masses=None, outbound_attraction_distribution=True, lin_log_mode=True, prevent_overlapping=True, edge_weight_influence=1.0, jitter_tolerance=1.0, barnes_hut_optimize=True, barnes_hut_theta=0.5, scaling_ratio=38, strong_gravity_mode=False, multithread=False, gravity=1.0) print("Extracted the positions") print(pos) # Extract top 500 nodes for visualization top_k_subgraph = filter_graph(G, filter_by=filter_field, top=k).to_undirected() # Set visual attributes node_colors = set_node_color(top_k_subgraph, color_by=color_field, colormap=colormap) node_sizes = set_node_size(top_k_subgraph, size_field="inlink_count", min_size=0.1, max_size=800) node_labels = set_node_label(top_k_subgraph, label=label_field) subgraph_pos = get_subgraph_pos(top_k_subgraph, pos) edge_colors = edgecolor_by_source(top_k_subgraph, node_colors) print("Drawing the visualization") # Get specific labels subset_label_nodes = sorted(zip(top_k_subgraph.nodes(), node_sizes), key=lambda x: x[1], reverse=True)[0:num_labels] subset_labels = {n[0]: node_labels[n[0]] for n in subset_label_nodes} # plot the visualization fig = plt.figure(figsize=(10, 10), dpi=100) ax = fig.add_subplot(111) #ax.set(xlim=[0.0, 1.0], ylim=[0.0, 1.0], title='Network Viz') # Draw the nodes, edges, labels separately nodes = nx.draw_networkx_nodes(top_k_subgraph, pos=subgraph_pos, node_size=node_sizes, node_color=node_colors, alpha=.7) edges = nx.draw_networkx_edges(top_k_subgraph, pos=subgraph_pos, edge_color=edge_colors, alpha=0.01) labels = nx.draw_networkx_labels(top_k_subgraph, pos=subgraph_pos, labels=subset_labels, font_size=8) # Adjust label overlapping x_pos = [v[0] for k, v in subgraph_pos.items()] y_pos = [v[1] for k, v in subgraph_pos.items()] adjust_text(texts=list(labels.values()), x=x_pos, y=y_pos, arrowprops=dict(arrowstyle='->', color='lightgray')) # Declutter visualization #ax.axis("off"); # save the plot plt.savefig("1.png") # Show the plot plt.show()
import networkx as nx from modularity_maximization import partition from modularity_maximization.utils import get_modularity networkfile = "Feb2018toNov2018_new.gexf" G = nx.read_gexf(networkfile) comm_dict = partition(G) print get_modularity(G, comm_dict) print comm_dict for node in G.nodes(): print node nx.set_node_attributes(G, name='community', values=comm_dict) # for comm in set(comm_dict.values()): # print("Community %d"%comm) # print(', '.join([node for node in comm_dict if comm_dict[node] == comm])) # # for node in comm_dict: # if comm_dict[node] == comm: nx.write_gexf(G, "Feb2018toNov2018_communities_new.gexf")
parser.add_argument('lpu_file_name', nargs='?', default='generic_lpu.gexf.gz', help='LPU file name') parser.add_argument('in_file_name', nargs='?', default='generic_input.h5', help='Input file name') parser.add_argument('-s', type=int, help='Seed random number generator') parser.add_argument('-l', '--lpu', type=str, default='gen', help='LPU name') args = parser.parse_args() if args.s is not None: np.random.seed(args.s) dt = 1e-4 dur = 1.0 start = 0.3 stop = 0.6 I_max = 0.6 neu_num = [np.random.randint(31, 40) for i in range(3)] create_lpu(args.lpu_file_name, args.lpu, *neu_num) g = nx.read_gexf(args.lpu_file_name) create_input(args.in_file_name, neu_num[0], dt, dur, start, stop, I_max) create_lpu(args.lpu_file_name, args.lpu, *neu_num)
def main(): num_nodes, dag = 50, 4 result_list = [] time_list = [] iter_list = [] gap_list = [] for i in range(dag, dag + 7): workflow_path = './SyntheticSettings/default_dags/%d_nodes/dag_%d.gexf' % ( num_nodes, i) system_path = './SyntheticSettings/default_dags/platform.gexf' dag = load_dag(workflow_path) platform = nx.read_gexf(system_path) # edge_list = list(platform.edges()) # edge_list = random.Random(500).sample(edge_list, 2) # for (u, v) in edge_list: # platform[u][v]['bandwidth'] = 100000 # platform.remove_edges_from(edge_list[::2]) #nx.draw(platform, with_labels=True) #plt.show() for u, v in platform.edges(): pass # platform[u][v]['bandwidth'] /= 4 for u, v in dag.edges(): pass # dag[u][v]['data'] /= 2 # dag[u][v]['data'] *= 4 for nd in dag.nodes(): pass # dag.node[nd]['Core6'] = dag.node[nd]['Core1'] // 5 nooff_rate, localoff_rate = 0, 0 noff_nodes = random.Random(401).sample( dag.nodes(), round(dag.number_of_nodes() * nooff_rate)) localoff_nodes = random.Random(500).sample( [i for i in dag.nodes() if not i in noff_nodes], round(dag.number_of_nodes() * localoff_rate)) for t in noff_nodes: for r in platform.nodes(): if r != 'Core1': dag.node[t][r] = 10000 # dag[u][v]['data'] *= 4 for t in localoff_nodes: dag.node[t]['Core6'] = 10000 #print('no offloading nodes:', noff_nodes) #print('local offloading nodes:', localoff_nodes) tl = 3600 ts = time.time() # result,_ = lb, _ = greedy_ant.greedy_ant(dag, platform, 40) result, lb = hybrid.ilp(dag, platform, tl=tl) # result, lb = hybrid.hybrid(dag, platform, tl=tl, with_ilp=True) # result, lb = hybrid.hybrid(dag, platform, tl=tl, with_ilp=False) # result = lbbd.lbbd(dag, platform) # result = lb = greedy.greedy(dag, platform) # result, _ = lb, _ = heft.heft(dag, platform) # result = hybrid.ilp(dag, platform, tl=tl) duration = time.time() - ts result_list.append(int(result)) time_list.append(int(duration)) gap = (result - lb) / result gap_list.append(round(gap, 5)) # iter_list.append(iter) print("makespan = ", result_list) print("time_cost = ", time_list) print("gap = ", gap_list) print(result_list)
def __init__(self): self.g = nx.read_gexf("data\\timeline_new_g.gexf")
:param time_string: a time string :param target_time_zone: the target time zone :param convert_utc_time: whether transfer the datetime object to utc first :return: """ datetime_object = datetime.strptime(time_string, '%a %b %d %H:%M:%S %z %Y') if convert_utc_time: final_time_object = datetime_object.replace(tzinfo=pytz.utc).astimezone(target_time_zone) else: final_time_object = datetime_object.astimezone(target_time_zone) return final_time_object def combine_candidate_ids(dataframe: pd.DataFrame) -> set: """ Get the Weibo id set, considering original post and repost :param dataframe: a Weibo dataframe :return: a Weibo id set """ author_int_set = set(dataframe['weibo_id']) retweeter_list = list(dataframe['retweets_id']) retweeter_int_set = set([np.int64(str(retweet_id[1:-1])) for retweet_id in retweeter_list if retweet_id != "['no retweets']"]) # combine the retweet id and author id together combine_set = {*author_int_set, *retweeter_int_set} return combine_set if __name__ == '__main__': graph = nx.read_gexf(os.path.join(data_paths.data_path, 'graph_structure', 'user_location_graph.gexf')) print(get_network_statistics(graph))
import networkx as nx def agrees(nnodes1, nnodes2): if nnodes1 == nnodes2: return True else: return False print("----- COUNT # OF NODES -----") print("\nreading z = 0") z0_1 = nx.read_gexf('halos/10000/z0/halo-radius-6.0.gexf') z0_2 = nx.read_gexf('halos/10000/z0/halo-radius-6.1.gexf') if agrees(z0_1.number_of_nodes(), z0_2.number_of_nodes()): print("z = 0 has", z0_1.number_of_nodes(), "nodes") else: print("z = 0 has disagreeing # of nodes.") print("\nreading z = 0.5") zp5_1 = nx.read_gexf('halos/10000/zp5/halo-radius-6.0.gexf') zp5_2 = nx.read_gexf('halos/10000/zp5/halo-radius-6.1.gexf') if agrees(zp5_1.number_of_nodes(), zp5_2.number_of_nodes()): print("z = 0.5 has", zp5_1.number_of_nodes(), "nodes") else: print("z = 0.5 has disagreeing # of nodes.") print("\nreading z = 1") z1_1 = nx.read_gexf('halos/10000/z1/halo-radius-6.0.gexf') z1_2 = nx.read_gexf('halos/10000/z1/halo-radius-6.1.gexf') if agrees(z1_1.number_of_nodes(), z1_2.number_of_nodes()):
def importGexfWithLabels(gexffilepath, depth = 0): ''' Reads gexf network file from hyphe, update or create all nodes and relationships in neo4j database Print . for each 100 nodes/links imported, 1000 for each 1000 "depth" is used to prefix new properties on node and rel. Value can be 0, 1 or 2 ''' # imports or update all nodes / relationships in gexf file from hyphe G= nx.read_gexf(gexffilepath, node_type=None, relabel=False, version='1.1draft') data = nx.json_graph.node_link_data(G) totnbnodes=len(data['nodes']) print(totnbnodes," nodes found in gexf") i=1 for node in data['nodes']: i=i+1 nodematch = graph.nodes.match(site_name =node['label']).first() if nodematch == None: try: nodematch = Node('Website', site_name = node['label']) nodematch.__primarylabel__ = 'Website' nodematch.__primarykey__ = 'site_name' graph.merge(nodematch) except: print("could not import ", node['label']) for key in node.keys(): nodematch["D" + str(depth) + "_" + key] = node[key] graph.push(nodematch) if i%100 == 0: print(".", end=" ") if i%1000 ==0: print(i,"/",totnbnodes) print(i," nodes imported") print(len(graph.nodes.match("Website")), "nodes in db after import") totnblinks=len(data['links']) print(totnblinks," links found in gexf") j=0 for link in data['links']: if depth ==0: source_n = graph.nodes.match("Website", D0_id = link['source']).first() target_n = graph.nodes.match("Website", D0_id = link['target']).first() if depth == 1: source_n = graph.nodes.match("Website", D1_id = link['source']).first() target_n = graph.nodes.match("Website", D1_id = link['target']).first() if depth == 2: source_n = graph.nodes.match("Website", D2_id = link['source']).first() target_n = graph.nodes.match("Website", D2_id = link['target']).first() if depth == 3: source_n = graph.nodes.match("Website", D3_id = link['source']).first() target_n = graph.nodes.match("Website", D3_id = link['target']).first() relmatch = graph.relationships.match((source_n,target_n),r_type="LINKS_TO").first() try: if relmatch == None: rel = Relationship(source_n, "LINKS_TO", target_n) rel["count_D" + str(depth)]=link['count'] graph.merge(rel) else: relmatch["count_D" + str(depth)]=link['count'] graph.push(relmatch) if j%100 == 0: print(".", end=" ") if j%1000 ==0: print(j, "/", totnblinks) j=j+1 except: pass print(j," links imported") print(len(graph.relationships.match()), "links in db after import")
output_file = open(data_dir + "/graphs.bss", 'w') if has_dict == False: hashing = {} typeCnt = 0 else: dictionary = open('dict.pkl', 'rb') hashing = pickle.load(dictionary) typeCnt = len(hashing.keys()) count = 0 err_cnt = 0 for g_file in glob(data_dir + '/*.gexf'): gid = int(os.path.basename(g_file).split('.')[0]) try: g = nx.read_gexf(g_file) except xml.etree.ElementTree.ParseError: continue g.graph['gid'] = gid label2node = {} output_file.write('%d\n' % gid) count = count + 1 output_file.write("{} {}\n".format(len(g.nodes()), len(g.edges()))) for i, n in enumerate(g.nodes(data=True)): # print(n) if constant: output_file.write('1\n') else:
def process(self): import networkx as nx ids, Ns = [], [] for r_path, p_path in zip(self.raw_paths, self.processed_paths): names = glob.glob(osp.join(r_path, '*.gexf')) # Get the graph IDs given by the file name: ids.append(sorted([int(i.split(os.sep)[-1][:-5]) for i in names])) data_list = [] # Convert graphs in .gexf format to a NetworkX Graph: for i, idx in enumerate(ids[-1]): i = i if len(ids) == 1 else i + len(ids[0]) G = nx.read_gexf(osp.join(r_path, f'{idx}.gexf')) mapping = {name: j for j, name in enumerate(G.nodes())} G = nx.relabel_nodes(G, mapping) Ns.append(G.number_of_nodes()) edge_index = torch.tensor(list(G.edges)).t().contiguous() if edge_index.numel() == 0: edge_index = torch.empty((2, 0), dtype=torch.long) edge_index = to_undirected(edge_index, num_nodes=Ns[-1]) data = Data(edge_index=edge_index, i=i) data.num_nodes = Ns[-1] # Create a one-hot encoded feature matrix denoting the atom # type for the AIDS700nef dataset: if self.name == 'AIDS700nef': x = torch.zeros(data.num_nodes, dtype=torch.long) for node, info in G.nodes(data=True): x[int(node)] = self.types.index(info['type']) data.x = F.one_hot(x, num_classes=len(self.types)).to( torch.float) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) torch.save(self.collate(data_list), p_path) assoc = {idx: i for i, idx in enumerate(ids[0])} assoc.update({idx: i + len(ids[0]) for i, idx in enumerate(ids[1])}) path = osp.join(self.raw_dir, self.name, 'ged.pickle') mat = torch.full((len(assoc), len(assoc)), float('inf')) with open(path, 'rb') as f: obj = pickle.load(f) xs, ys, gs = [], [], [] for (x, y), g in obj.items(): xs += [assoc[x]] ys += [assoc[y]] gs += [g] x, y = torch.tensor(xs), torch.tensor(ys) g = torch.tensor(gs, dtype=torch.float) mat[x, y], mat[y, x] = g, g path = osp.join(self.processed_dir, f'{self.name}_ged.pt') torch.save(mat, path) # Calculate the normalized GEDs: N = torch.tensor(Ns, dtype=torch.float) norm_mat = mat / (0.5 * (N.view(-1, 1) + N.view(1, -1))) path = osp.join(self.processed_dir, f'{self.name}_norm_ged.pt') torch.save(norm_mat, path)
# nx.write_gexf(H, 'data/freq_greater_than_5_facebook_network_level_[4,5].gexf') # nx.write_gexf(G, 'data/facebook_network_level_[4,5].gexf') # finance_index = nx.get_node_attributes(G, 'betweenness').items() # food_index = nx.get_node_attributes(G, 'betweenness').items() # # print (simple_resonance(finance_index, food_index)) # print (standardized_sr(finance_index, food_index)) # # finance_iscore = nx.get_edge_attributes(G, 'pair_i') # food_iscore = nx.get_edge_attributes(G, 'pair_i') # # print(pair_resonance(finance_iscore, food_iscore)) # # print(standardized_pr(finance_iscore, food_iscore)) exp_config = configparser.ConfigParser() exp_config.read("experiments/visualizacao_post_exclusivo.ini") experiment_name = exp_config['EXPERIMENT']['name'] networks = eval(exp_config['EXPERIMENT']['networks']) folder = exp_config['EXPERIMENT']['folder'] networks_keys = list(networks.keys()) for i in range(networks_keys.__len__()): net_1 = nx.read_gexf(folder + experiment_name + "/" + experiment_name + "_" + str(networks_keys[i]) + ".gexf") for j in range(i + 1, networks_keys.__len__()): net_2 = nx.read_gexf(folder + experiment_name + "/" + experiment_name + "_" + str(networks_keys[j]) + ".gexf") print(simple_resonance(net_1, net_2))
def lpu_parser(filename): """ GEXF LPU specification parser. Extract LPU specification data from a GEXF file and store it in a list of dictionaries. All nodes in the GEXF file are assumed to correspond to neuron model instances while all edges are assumed to correspond to synapse model instances. Parameters ---------- filename : str GEXF filename. Returns ------- n_dict : dict of dict of neuron Each key of `n_dict` is the name of a neuron model; the values are dicts that map each attribute name to a list that contains the attribute values for each neuron. s_dict : dict of dict of synapse Each key of `s_dict` is the name of a synapse model; the values are dicts that map each attribute name to a list that contains the attribute values for each each neuron. Example ------- >>> n_dict = {'LeakyIAF': {'Vr': [0.5, 0.6], 'Vt': [0.3, 0.2]}, 'MorrisLecar': {'V1': [0.15, 0.16], 'Vt': [0.13, 0.27]}} Notes ----- All neurons must have the following attributes; any additional attributes for a specific neuron model must be provided for all neurons of that model type: 1. spiking - True if the neuron emits spikes, False if it emits graded potentials. 2. model - model identifier string, e.g., 'LeakyIAF', 'MorrisLecar' 3. public - True if the neuron emits output exposed to other LPUS. 4. extern - True if the neuron can receive external input from a file. All synapses must have the following attributes: 1. class - int indicating connection class of synapse; it may assume the following values: 0. spike to spike synapse 1. spike to graded potential synapse 2. graded potential to spike synapse 3. graded potential to graded potential synapse 2. model - model identifier string, e.g., 'AlphaSynapse' 3. conductance - True if the synapse emits conductance values, False if it emits current values. 4. reverse - If the `conductance` attribute is True, this attribute should be set to the reverse potential. TODO ---- Input data should be validated. """ # parse the GEXF file using networkX graph = nx.read_gexf(filename) # parse neuron data n_dict = {} neurons = graph.node.items() # sort based on id (id is first converted to an integer) # this is done so that consecutive neurons of the same type # in the constructed LPU is the same in neurokernel neurons.sort(cmp=neuron_cmp) for id, neu in neurons: model = neu['model'] # if an input_port, make sure selector is specified if model == PORT_IN_GPOT or model == PORT_IN_SPK: assert ('selector' in neu.keys()) if model == PORT_IN_GPOT: neu['spiking'] = False neu['public'] = False else: neu['spiking'] = True neu['public'] = False # if an output_port, make sure selector is specified if 'public' in neu.keys(): if neu['public']: assert ('selector' in neu.keys()) else: neu['public'] = False if 'selector' not in neu.keys(): neu['selector'] = '' # if the neuron model does not appear before, add it into n_dict if model not in n_dict: n_dict[model] = {k: [] for k in neu.keys() + ['id']} # neurons of the same model should have the same attributes assert (set(n_dict[model].keys()) == set(neu.keys() + ['id'])) # add neuron data into the subdictionary of n_dict for key in neu.iterkeys(): n_dict[model][key].append(neu[key]) n_dict[model]['id'].append(int(id)) # remove duplicate model information for val in n_dict.itervalues(): val.pop('model') if not n_dict: n_dict = None # parse synapse data synapses = graph.edges(data=True) s_dict = {} synapses.sort(cmp=synapse_cmp) for syn in synapses: # syn[0/1]: pre-/post-neu id; syn[2]: dict of synaptic data model = syn[2]['model'] syn[2]['id'] = int(syn[2]['id']) # if the synapse model does not appear before, add it into s_dict if model not in s_dict: s_dict[model] = { k: [] for k in syn[2].keys() + ['pre', 'post'] } # synapses of the same model should have the same attributes assert (set(s_dict[model].keys()) == set(syn[2].keys() + ['pre', 'post'])) # add synaptic data into the subdictionary of s_dict for key in syn[2].iterkeys(): s_dict[model][key].append(syn[2][key]) s_dict[model]['pre'].append(syn[0]) s_dict[model]['post'].append(syn[1]) for val in s_dict.itervalues(): val.pop('model') if not s_dict: s_dict = {} return n_dict, s_dict
def update_output(clicks, clicks2, input_value): ctx = dash.callback_context if (clicks is not None) or (clicks2 is not None): my_file = open("test.txt", "w+") my_file.write(input_value) my_file.close() button_id = ctx.triggered[0]['prop_id'].split('.')[0] print(button_id) if button_id == 'submit-button': print('selected first submit button') print('getting graph') ################ #COMPUTATIONS ################ graph, descriptors, dict_pmid_count_mesh = run_get_graph_2() print('obtained graph') # import pdb; pdb.set_trace() graph = nx.read_gexf("test_graph.gexf") top_k_papers, top_k_papers_pmids, top_k_people, top_k_people_ids, authors_to_affiliation, papers_to_author, citation_dict, number_papers_dict, affiliation_paper_count, pmid_to_title, graph, pagerank_ordered = graph_to_recommend( graph, dict_pmid_count_mesh, host, port, dbname, user, password) # import pdb; pdb.set_trace() global title_to_pmid title_to_pmid = dict([(value, key) for key, value in pmid_to_title.items()]) # sentences = articles_to_knowledge(top_k_papers_pmids, host, port, dbname, user, password) sentences = articles_to_summary(top_k_papers_pmids, host, port, dbname, user, password, summarizer) ################# #CITATION NETWORK ################## #get a x,y position for each node pos = nx.layout.spring_layout(graph) #Create Edges edge_trace = go.Scatter(x=[], y=[], line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines') for edge in graph.edges(): x0, y0 = pos[graph.nodes[edge[0]]['label']] x1, y1 = pos[graph.nodes[edge[1]]['label']] edge_trace['x'] += tuple([x0, x1, None]) edge_trace['y'] += tuple([y0, y1, None]) node_trace = go.Scatter( x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', reversescale=True, color=[], size=20, colorbar=dict(thickness=15, title='Node Connections', xanchor='left', titleside='right'), line=dict(width=2))) for node in graph.nodes(): x, y = pos[graph.nodes[node]['label']] node_trace['x'] += tuple([x]) node_trace['y'] += tuple([y]) #add color to node points for node, adjacencies in enumerate(graph.adjacency()): node_trace['marker']['color'] += tuple( [len(adjacencies[1])]) if str(adjacencies[0]) in pmid_to_title: node_info = pmid_to_title[str( adjacencies[0])] + '<br># of connections: ' + str( len(adjacencies[1])) else: node_info = str( adjacencies[0]) + '<br># of connections: ' + str( len(adjacencies[1])) node_trace['text'] += tuple([node_info]) fig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout( title='', titlefont_size=16, showlegend=False, hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), annotations=[ dict(showarrow=False, xref="paper", yref="paper", x=0.005, y=-0.002) ], xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))) fig.update_layout(hoverlabel=dict( bgcolor="white", font_size=16, font_family="Roboto")) style = {'display': 'block'} ################# #TABLES WITH DATA ################## layout = [ html.Div( children=[ html.H2(children='Recommendations', id='title_rec'), dash_table.DataTable( id='table1', columns=[{ "name": i, "id": i } for i in df1['columns']], data=[{ 'Recommended Papers 📄': x[0:-6], 'PMID': title_to_pmid[x], 'Last Author': papers_to_author[x], 'Article Summary': sentences[title_to_pmid[x]] } for x in top_k_papers], style_header={ 'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal', 'height': 'auto' }, style_cell={'textAlign': 'left'}, style_data={ 'whiteSpace': 'pre-wrap', 'height': 'auto' }, style_table={ "margin-top": "25px", 'whiteSpace': 'normal', 'height': 'auto' }, style_cell_conditional=[{ 'if': { 'column_id': 'Recommended Papers 📄' }, 'width': '50%' }, { 'if': { 'column_id': 'PMID' }, 'width': '5%' }, { 'if': { 'column_id': 'Last Author' }, 'width': '10%' }, { 'if': { 'column_id': 'Article Summary' }, 'width': '35%' }]), dash_table.DataTable( id='table2', columns=[{ "name": i, "id": i } for i in df2['columns']], data=[{ 'Recommended People 👩🔬👨🔬': x, 'Latest Affiliation 🏫': authors_to_affiliation[x], 'Citations': citation_dict[top_k_people_ids[idx]], 'Number of Papers': number_papers_dict[top_k_people_ids[idx]] } for idx, x in enumerate(top_k_people)], style_header={ 'backgroundColor': '#f2f2f2', 'textColor': 'pink', 'whiteSpace': 'normal', 'height': 'auto' }, style_cell={'textAlign': 'left'}, style_data={ 'whiteSpace': 'normal', 'height': 'auto' }, style_table={ "margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto', 'align': 'center' }, style_cell_conditional=[{ 'if': { 'column_id': 'Recommended People 👩🔬👨🔬' }, 'width': '25%' }, { 'if': { 'column_id': 'Latest Affiliation 🏫' }, 'width': '55%' }, { 'if': { 'column_id': 'Citations' }, 'width': '8%' }, { 'if': { 'column_id': 'Number of Papers' }, 'width': '12%' }]) #, #dash_table.DataTable( # id='table3', # columns=[{"name": i, "id": i} for i in df3['columns']], # data=[{'Affiliation 🏫': key, 'Number of Papers': affiliation_paper_count[key]} for key in affiliation_paper_count.keys()], # style_header={'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal','height': 'auto'}, # style_cell={'textAlign': 'left'}, # style_data={'whiteSpace': 'normal','height': 'auto'}, #style_table={"margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto'}, #style_cell_conditional=[ #{'if': {'column_id': 'Affiliation 🏫'}, #'width': '85%'}, #{'if': {'column_id': 'Number of Papers'}, #'width': '15%'} #] #) ], id='div_table_analytics'), html.H2(children='Citation graph of related papers', id='title_graph_div') ] return layout, style, fig, go.Figure(data=[], layout=go.Layout( title='', titlefont_size=16, )), { 'display': 'none' } elif button_id == 'submit-button-2': print('selected second submit button') print('getting graph') ################ #COMPUTATIONS ################ graph, descriptors, dict_pmid_count_mesh = run_get_graph_2() print('obtained graph') # import pdb; pdb.set_trace() graph = nx.read_gexf("test_graph.gexf") top_k_papers, top_k_papers_pmids, top_k_people, top_k_people_ids, authors_to_affiliation, papers_to_author, citation_dict, number_papers_dict, affiliation_paper_count, pmid_to_title, graph, pagerank_ordered = graph_to_recommend( graph, dict_pmid_count_mesh, host, port, dbname, user, password) title_to_pmid = dict([(value, key) for key, value in pmid_to_title.items()]) # sentences = articles_to_knowledge(top_k_papers_pmids, host, port, dbname, user, password) sentences = articles_to_summary(top_k_papers_pmids, host, port, dbname, user, password, summarizer) ################# #CITATION NETWORK ################## #get a x,y position for each node pos = nx.layout.spring_layout(graph) #Create Edges edge_trace = go.Scatter(x=[], y=[], line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines') for edge in graph.edges(): x0, y0 = pos[graph.nodes[edge[0]]['label']] x1, y1 = pos[graph.nodes[edge[1]]['label']] edge_trace['x'] += tuple([x0, x1, None]) edge_trace['y'] += tuple([y0, y1, None]) node_trace = go.Scatter( x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', reversescale=True, color=[], size=20, colorbar=dict(thickness=15, title='Node Connections', xanchor='left', titleside='right'), line=dict(width=2))) for node in graph.nodes(): x, y = pos[graph.nodes[node]['label']] node_trace['x'] += tuple([x]) node_trace['y'] += tuple([y]) #add color to node points for node, adjacencies in enumerate(graph.adjacency()): node_trace['marker']['color'] += tuple( [len(adjacencies[1])]) if str(adjacencies[0]) in pmid_to_title: node_info = pmid_to_title[str( adjacencies[0])] + '<br># of connections: ' + str( len(adjacencies[1])) else: node_info = str( adjacencies[0]) + '<br># of connections: ' + str( len(adjacencies[1])) node_trace['text'] += tuple([node_info]) fig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout( title='', titlefont_size=16, showlegend=False, hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), annotations=[ dict(showarrow=False, xref="paper", yref="paper", x=0.005, y=-0.002) ], xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))) fig.update_layout(hoverlabel=dict( bgcolor="white", font_size=16, font_family="Roboto")) ################# #EMBEDDING VISUALIZATION ################## t0 = time.time() emb = get_embeddings_to_visualize(descriptors, pagerank_ordered) t1 = time.time() print('got embeddings: {} secs'.format(t1 - t0)) fig_emb = px.scatter(emb, x='x', y='y', color='Node type', opacity=0.8, hover_data={ 'x': False, 'y': False, 'Name': True }) fig_emb.update_layout(hoverlabel=dict(bgcolor="white", font_size=16, font_family="Roboto"), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) fig_emb.update_traces(marker=dict(size=30), selector=dict(mode='markers')) style = {'display': 'block'} style_emb = {'display': 'block'} ################# #TABLES WITH DATA ################## layout = [ html.Div( children=[ html.H2(children='Recommendations', id='title_rec'), dash_table.DataTable( id='table1', columns=[{ "name": i, "id": i } for i in df1['columns']], data=[{ 'Recommended Papers 📄': x[0:-6], 'PMID': title_to_pmid[x], 'Last Author': papers_to_author[x], 'Article Summary': sentences[title_to_pmid[x]] } for x in top_k_papers], style_header={ 'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal', 'height': 'auto' }, style_cell={'textAlign': 'left'}, style_data={ 'whiteSpace': 'pre-wrap', 'height': 'auto' }, style_table={ "margin-top": "25px", 'whiteSpace': 'normal', 'height': 'auto' }, style_cell_conditional=[{ 'if': { 'column_id': 'Recommended Papers 📄' }, 'width': '50%' }, { 'if': { 'column_id': 'PMID' }, 'width': '5%' }, { 'if': { 'column_id': 'Last Author' }, 'width': '10%' }, { 'if': { 'column_id': 'Article Summary' }, 'width': '35%' }]), dash_table.DataTable( id='table2', columns=[{ "name": i, "id": i } for i in df2['columns']], data=[{ 'Recommended People 👩🔬👨🔬': x, 'Latest Affiliation 🏫': authors_to_affiliation[x], 'Citations': citation_dict[top_k_people_ids[idx]], 'Number of Papers': number_papers_dict[top_k_people_ids[idx]] } for idx, x in enumerate(top_k_people)], style_header={ 'backgroundColor': '#f2f2f2', 'textColor': 'pink', 'whiteSpace': 'normal', 'height': 'auto' }, style_cell={'textAlign': 'left'}, style_data={ 'whiteSpace': 'normal', 'height': 'auto' }, style_table={ "margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto', 'align': 'center' }, style_cell_conditional=[{ 'if': { 'column_id': 'Recommended People 👩🔬👨🔬' }, 'width': '25%' }, { 'if': { 'column_id': 'Latest Affiliation 🏫' }, 'width': '55%' }, { 'if': { 'column_id': 'Citations' }, 'width': '8%' }, { 'if': { 'column_id': 'Number of Papers' }, 'width': '12%' }]) #, #dash_table.DataTable( # id='table3', # columns=[{"name": i, "id": i} for i in df3['columns']], # data=[{'Affiliation 🏫': key, 'Number of Papers': affiliation_paper_count[key]} for key in affiliation_paper_count.keys()], # style_header={'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal','height': 'auto'}, # style_cell={'textAlign': 'left'}, # style_data={'whiteSpace': 'normal','height': 'auto'}, #style_table={"margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto'}, #style_cell_conditional=[ #{'if': {'column_id': 'Affiliation 🏫'}, #'width': '85%'}, #{'if': {'column_id': 'Number of Papers'}, #'width': '15%'} #] #) ], id='div_table_analytics'), html.H2(children='Citation graph of related papers', id='title_graph_div') ] return layout, style, fig, fig_emb, style_emb else: return [], { 'display': 'none' }, go.Figure(data=[], layout=go.Layout( title='', titlefont_size=16, )), go.Figure(data=[], layout=go.Layout( title='', titlefont_size=16, )), { 'display': 'none' } else: return [], { 'display': 'none' }, go.Figure(data=[], layout=go.Layout( title='', titlefont_size=16, )), go.Figure(data=[], layout=go.Layout( title='', titlefont_size=16, )), { 'display': 'none' }
# if prediction is no attribute values, e.g. [] and so is the groundtruth # May happen if not p_value and not groundtruth[p_key]: true_positive_prediction += 1 # counts the number of good prediction for node p_key # here len(p_value)=1 but we could have tried to predict more values if p_value in groundtruth[p_key]: true_positive_prediction += 1 #len([c for c in p_value if c in groundtruth[p_key]]) # no else, should not happen: train and test datasets are consistent return true_positive_prediction * 100 / total_predictions # load the graph G = nx.read_gexf("mediumLinkedin.gexf") print("Nb of users in our graph: %d" % len(G)) # load the profiles. 3 files for each type of attribute # Some nodes in G have no attributes # Some nodes may have 1 attribute 'location' # Some nodes may have 1 or more 'colleges' or 'employers', so we # use dictionaries to store the attributes college = {} location = {} employer = {} # The dictionaries are loaded as dictionaries from the disk (see pickle in Python doc) with open('mediumCollege_60percent_of_empty_profile.pickle', 'rb') as handle: college = pickle.load(handle) with open('mediumLocation_60percent_of_empty_profile.pickle', 'rb') as handle: location = pickle.load(handle)
__author__ = 'Amir' import logging logging.basicConfig(level=logging.DEBUG, format='%(process)d\t%(asctime)s:%(levelname)s: %(message)s', datefmt='%H:%M:%S') from pre_process import crawl_to_graph DS_DIR = '/home/amir/pyproj/amazon-review-spam/io/same_cat_v2' graph, membs, prods = crawl_to_graph(ds_dir=DS_DIR) graph_orig = graph.copy() import networkx as nx from os import path mgraph = nx.read_gexf(path.join(DS_DIR, '%s.gexf' % 'em_unlabeled_mgraph')) author_product_mapping = {} for a in mgraph: author_product_mapping[a] = [p for p in graph[a]] from hardEM_gurobi import HardEM nparts = 4 ll, partition = HardEM.run_EM(author_graph=mgraph, author_product_map=author_product_mapping, nparts=nparts*5, parallel=True, nprocs=4) for a in mgraph: mgraph.node[a]['cLabel'] = int(partition[a])
def orig_graph_map_WL(graph_list_1, graph_list_2, h): """ Computes orig_graph_map for classes 1 and 2, for a given height h. """ labels = {} label_lookup = {} label_counter = 0 graph_idx = range(len(graph_list_1) + len(graph_list_2)) orig_graph_map = { it: {gidx: defaultdict(lambda: 0) for gidx in graph_idx} for it in range(-1, h) } idx_2 = 0 #initial labeling for gidx in graph_idx: if gidx < len(graph_list_1): G = nx.read_gexf('/your_dir/' + graph_list_1[gidx] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() #this is a dictionary nx.set_node_attributes(G, 'label', degrees) labels[gidx] = np.zeros(G.order(), dtype=np.int32) #for node in graph_list[gidx].node: #for node in range(len(graph_list[gidx])): for i in range(len(G.node)): label = G.node[i]["label"] if not label in label_lookup: label_lookup[label] = label_counter labels[gidx][i] = label_counter label_counter += 1 else: labels[gidx][i] = label_lookup[label] orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get( label, 0) + 1 else: G = nx.read_gexf('/your_dir/' + graph_list_2[idx_2] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() # this is a dictionary nx.set_node_attributes(G, 'label', degrees) labels[gidx] = np.zeros(G.order(), dtype=np.int32) #for node in graph_list[gidx].node: #for node in range(len(graph_list[gidx])): for i in range(len(G.node)): label = G.node[i]["label"] if not label in label_lookup: label_lookup[label] = label_counter labels[gidx][i] = label_counter label_counter += 1 else: labels[gidx][i] = label_lookup[label] orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get( label, 0) + 1 idx_2 = idx_2 + 1 compressed_labels = deepcopy(labels) idx_2 = 0 # WL iterations started for it in range(h): unique_labels_per_h = set() label_lookup = {} label_counter = 0 idx_2 = 0 for gidx in graph_idx: if gidx < len(graph_list_1): G = nx.read_gexf('/your_dir/' + graph_list_1[gidx] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() # this is a dictionary nx.set_node_attributes(G, 'label', degrees) #for node in range(len(graph_list[gidx])): for i in range(len(G.node)): node_label = tuple([labels[gidx][i]]) neighbors = G.neighbors(i) #neighbors = graph_list[gidx][node]["neighbors"] if len(neighbors) > 0: neighbors_label = tuple( [labels[gidx][i] for i in neighbors]) node_label = str(node_label) + "-" + str( sorted(neighbors_label)) if not node_label in label_lookup: label_lookup[node_label] = label_counter compressed_labels[gidx][i] = label_counter label_counter += 1 else: compressed_labels[gidx][i] = label_lookup[node_label] orig_graph_map[it][gidx][ node_label] = orig_graph_map[it][gidx].get( node_label, 0) + 1 else: G = nx.read_gexf('/your_dir/' + graph_list_2[idx_2] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() # this is a dictionary nx.set_node_attributes(G, 'label', degrees) #for node in range(len(graph_list[gidx])): for i in range(len(G.node)): node_label = tuple([labels[gidx][i]]) neighbors = G.neighbors(i) #neighbors = graph_list[gidx][node]["neighbors"] if len(neighbors) > 0: neighbors_label = tuple( [labels[gidx][i] for i in neighbors]) node_label = str(node_label) + "-" + str( sorted(neighbors_label)) if not node_label in label_lookup: label_lookup[node_label] = label_counter compressed_labels[gidx][i] = label_counter label_counter += 1 else: compressed_labels[gidx][i] = label_lookup[node_label] orig_graph_map[it][gidx][ node_label] = orig_graph_map[it][gidx].get( node_label, 0) + 1 idx_2 = idx_2 + 1 print("Number of compressed labels at iteration %s: %s" % (it, len(label_lookup))) labels = deepcopy(compressed_labels) return orig_graph_map
def read_network_data(self, path): self.G = nx.read_gexf(path)
def compute_mle_wl_kernel(graph_list, h): """ Computes original WL kernel for a given height h. """ start_time_mle = time.time() labels = {} label_lookup = {} label_counter = 0 graph_idx = range(len(graph_list)) num_graphs = len(graph_idx) orig_graph_map = { it: {gidx: defaultdict(lambda: 0) for gidx in graph_idx} for it in range(-1, h) } # initial labeling for gidx in graph_idx: G = nx.read_gexf( '/Users/Syzygy/workspace/Stage_Shanghai/gexf_simplified_france/' + graph_list[gidx] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() # this is a dictionary nx.set_node_attributes(G, 'label', degrees) labels[gidx] = np.zeros(G.order(), dtype=np.int32) #for node in graph_list[gidx].node: #for node in range(len(graph_list[gidx])): for i in range(len(G.node)): label = G.node[i]["label"] if not label in label_lookup: label_lookup[label] = label_counter labels[gidx][i] = label_counter label_counter += 1 else: labels[gidx][i] = label_lookup[label] orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get( label, 0) + 1 compressed_labels = copy.deepcopy(labels) # WL iterations started for it in range(h): unique_labels_per_h = set() label_lookup = {} label_counter = 0 for gidx in graph_idx: G = nx.read_gexf( '/Users/Syzygy/workspace/Stage_Shanghai/gexf_simplified_france/' + graph_list[gidx] + '.gexf', node_type=None, relabel=True, version='1.1draft') G = nx.convert_node_labels_to_integers(G) degrees = G.degree() # this is a dictionary nx.set_node_attributes(G, 'label', degrees) #for node in range(len(graph_list[gidx])): #print(gidx,'on en est la') for i in range(len(G.node)): node_label = tuple([labels[gidx][i]]) neighbors = G.neighbors(i) #neighbors = graph_list[gidx][node]["neighbors"] if len(neighbors) > 0: neighbors_label = tuple( [labels[gidx][i] for i in neighbors]) node_label = str(node_label) + "-" + str( sorted(neighbors_label)) if not node_label in label_lookup: label_lookup[node_label] = label_counter compressed_labels[gidx][i] = label_counter label_counter += 1 else: compressed_labels[gidx][i] = label_lookup[node_label] orig_graph_map[it][ gidx][node_label] = orig_graph_map[it][gidx].get( node_label, 0) + 1 print("Number of compressed labels at iteration %s: %s" % (it, len(label_lookup))) labels = copy.deepcopy(compressed_labels) K = np.zeros((num_graphs, num_graphs)) for it in range(-1, h): for i in range(num_graphs): for j in range(num_graphs): common_keys = set(orig_graph_map[it][i].keys()) & set( orig_graph_map[it][j].keys()) K[i][j] += sum([ orig_graph_map[it][i].get(k, 0) * orig_graph_map[it][j].get(k, 0) for k in common_keys ]) end_time_mle_kernel = time.time() print( "Total time for MLE computation for WL kernel (with kernel computation) %g" % (end_time_mle_kernel - start_time_mle)) return K
def process(self): import networkx as nx ids, Ns = [], [] # Iterating over paths for raw and processed data (train + test): for r_path, p_path in zip(self.raw_paths, self.processed_paths): # Find the paths of all raw graphs: names = glob.glob(osp.join(r_path, '*.gexf')) # Get sorted graph IDs given filename: 123.gexf -> 123 ids.append(sorted([int(i.split(os.sep)[-1][:-5]) for i in names])) data_list = [] # Convert graphs in .gexf format to a NetworkX Graph: for i, idx in enumerate(ids[-1]): i = i if len(ids) == 1 else i + len(ids[0]) # Reading the raw `*.gexf` graph: G = nx.read_gexf(osp.join(r_path, f'{idx}.gexf')) # Mapping of nodes in `G` to a contiguous number: mapping = {name: j for j, name in enumerate(G.nodes())} G = nx.relabel_nodes(G, mapping) Ns.append(G.number_of_nodes()) edge_index = torch.tensor(list(G.edges)).t().contiguous() if edge_index.numel() == 0: edge_index = torch.empty((2, 0), dtype=torch.long) edge_index = to_undirected(edge_index, num_nodes=Ns[-1]) data = Data(edge_index=edge_index, i=i) data.num_nodes = Ns[-1] # Create a one-hot encoded feature matrix denoting the atom # type (for the `AIDS700nef` dataset): if self.name == 'AIDS700nef': x = torch.zeros(data.num_nodes, dtype=torch.long) for node, info in G.nodes(data=True): x[int(node)] = self.types.index(info['type']) data.x = F.one_hot(x, num_classes=len(self.types)).to( torch.float) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) torch.save(self.collate(data_list), p_path) assoc = {idx: i for i, idx in enumerate(ids[0])} assoc.update({idx: i + len(ids[0]) for i, idx in enumerate(ids[1])}) # Extracting ground-truth GEDs from the GED pickle file path = osp.join(self.raw_dir, self.name, 'ged.pickle') # Initialize GEDs as float('inf'): mat = torch.full((len(assoc), len(assoc)), float('inf')) with open(path, 'rb') as f: obj = pickle.load(f) xs, ys, gs = [], [], [] for (x, y), g in obj.items(): xs += [assoc[x]] ys += [assoc[y]] gs += [g] # The pickle file does not contain GEDs for test graph pairs, i.e. # GEDs for (test_graph, test_graph) pairs are still float('inf'): x, y = torch.tensor(xs), torch.tensor(ys) ged = torch.tensor(gs, dtype=torch.float) mat[x, y], mat[y, x] = ged, ged path = osp.join(self.processed_dir, f'{self.name}_ged.pt') torch.save(mat, path) # Calculate the normalized GEDs: N = torch.tensor(Ns, dtype=torch.float) norm_mat = mat / (0.5 * (N.view(-1, 1) + N.view(1, -1))) path = osp.join(self.processed_dir, f'{self.name}_norm_ged.pt') torch.save(norm_mat, path)
import networkx as nx import matplotlib.pyplot as plt import random G = nx.read_gexf('../../dataset/graph-small/graph.gexf') pagerank = nx.pagerank(G) for node_id in G.nodes: G.nodes[node_id]['pagerank'] = round(pagerank[node_id], 5) pos = nx.spring_layout(G) nx.draw(G, pos) node_labels = nx.get_node_attributes(G, 'pagerank') nx.draw_networkx_labels(G, pos, labels=node_labels) plt.show()
# %% A Initialisation import networkx as nx from py2neo import Graph, Node, Relationship import pandas as pd from neo4j import GraphDatabase, basic_auth graph = Graph("bolt://localhost:7687", auth=("neo4j", "Password")) driver = GraphDatabase.driver('bolt://localhost',auth=basic_auth("neo4j", "Password")) db = driver.session() # results = db.run("MATCH (w:Website) WHERE 'IN' in w.D1_status RETURN w.site_name, w.D1_homepage") # df = pd.DataFrame([r["w.D1_homepage"] for r in results]) # %% Load gexf gexfD0DISCO="C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\202007WebsitesRS_D0\\202007WebsiteRSD0.gexf" G= nx.read_gexf(gexfD0DISCO, node_type=None, relabel=False, version='1.1draft') data = nx.json_graph.node_link_data(G) totnbnodes=len(data['nodes']) print(totnbnodes," nodes found in gexf") # %% A Lance les firstpages : import des procedures import sys dataPath = 'C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data' if dataPath not in sys.path: sys.path.insert(0, dataPath) from FromNotebook import getFirstPageNode, getFirstPageRSNode, myUrlParse # %% This downloads all firstpages if not there and put results in firstPageRS_df medialistD0path = "C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\202007WebsitesRS_D0\\20200709_medialistForD0.txt"