def copy_layout(from_fname, to_fname): if not from_fname[-4:] =='.gml': from_name +='.gml' if not to_fname[-4:] =='.gml': to_name +='.gml' print 'reading A=', from_fname,'..', g1 = NX.read_gml(from_fname) labels1 = NX.get_node_attributes(g1, 'label') n1 = set(labels1.values()) print len(n1),'nodes' print 'reading B=', to_fname,'..', g2 = NX.read_gml(to_fname) labels2 = NX.get_node_attributes(g2, 'label') n2 = set(labels2.values()) print len(n2),'nodes' intersection = len(n2.intersection(n1)) percent=100.*intersection/len(n2) print 'B.intersect(A)=',intersection,'(%.1f%%)'%percent print 'copying layout..', mapping = {} for L1 in labels1: for L2 in labels2: if labels1[L1]==labels2[L2]: mapping[L1] = L2 break layout = NX.get_node_attributes(g1, 'graphics') attr = dict([ ( mapping[ID], {'x':layout[ID]['x'],'y':layout[ID]['y']} ) for ID in mapping]) NX.set_node_attributes(g2, 'graphics', attr) NX.write_gml(g2, to_fname) print 'done.'
def test_modularity_measure(function): def print_info(graph, name): print name, "N:", len(graph), "M:", graph.size() print "Q:", round(function(graph)[0], 3) graph = nx.read_gml(archive.extractfile("networks/karate-newman-1977.gml")) print_info(graph, "Karate") graph = nx.read_gml(archive.extractfile("networks/yeast_protein_interaction-barabasi-2001.tsv")) print_info(graph, "Protein Interaction") graph = pp.read_pajek(zipfile.ZipFile("networks/jazz.zip", "r").open("jazz.net")) print_info(graph, "Jazz musicians") graph = pp.read_pajek(zipfile.ZipFile("networks/celegans_metabolic.zip", "r").open("celegans_metabolic.net")) print_info(graph, "Metabolic") graph = nx.read_edgelist(zipfile.ZipFile("networks/email.zip", "r").open("email.txt"), data=False) print_info(graph, "E-mail") graph = pp.read_pajek(zipfile.ZipFile("networks/PGP.zip", "r").open("PGPgiantcompo.net")) print_info(graph, "Key signing") graph = nx.read_gml(zipfile.ZipFile("networks/cond-mat-2003.zip", "r").open("cond-mat-2003.gml")) print_info(graph, "Physicists")
def load_3(gml1,gml2,name): g1 = nx.read_gml(gml1) g2 = nx.read_gml(gml2) q1 = qucik_hash(g1) q2 = qucik_hash(g2) if not q1: return 0 if not q2: return 0 v1 = q1[1] v2 = q2[1] s1 = q1[0] s2 = q2[0] if v1 == v2: #print "skip" return 0 #print s1 #print s2 to_write = [] to_write.append(name) with open("result_openssl.txt", "a") as myfile: for item in to_write: myfile.write(item) myfile.write("\n") return 1
def generateGraph(self, ticket, bnglContents, graphtype): print ticket pointer = tempfile.mkstemp(suffix='.bngl', text=True) with open(pointer[1], 'w') as f: f.write(bnglContents) try: if graphtype in ['regulatory', 'contactmap']: consoleCommands.setBngExecutable(bngDistro) consoleCommands.generateGraph(pointer[1], graphtype) name = pointer[1].split('.')[0].split('/')[-1] with open('{0}_{1}.gml'.format(name, graphtype), 'r') as f: graphContent = f.read() gml = networkx.read_gml('{0}_{1}.gml'.format(name, graphtype)) result = gml2cyjson(gml, graphtype=graphtype) jsonStr = json.dumps(result, indent=1, separators=(',', ': ')) result = {'jsonStr': jsonStr, 'gmlStr': graphContent} self.addToDict(ticket, result) os.remove('{0}_{1}.gml'.format(name, graphtype)) print 'success', ticket elif graphtype in ['sbgn_er']: consoleCommands.setBngExecutable(bngDistro) consoleCommands.generateGraph(pointer[1], 'contactmap') name = pointer[1].split('.')[0].split('/')[-1] # with open('{0}_{1}.gml'.format(name,'contactmap'),'r') as f: # graphContent = f.read() graphContent = networkx.read_gml( '{0}_{1}.gml'.format(name, 'contactmap')) sbgn = libsbgn.createSBNG_ER_gml(graphContent) self.addToDict(ticket, sbgn) os.remove('{0}_{1}.gml'.format(name, 'contactmap')) print 'success', ticket elif graphtype in ['std']: consoleCommands.setBngExecutable(bngDistro) consoleCommands.bngl2xml(pointer[1]) xmlFileName = pointer[1].split('.')[0] + '.xml' xmlFileName = xmlFileName.split(os.sep)[-1] graph = stdgraph.generateSTDGML(xmlFileName) gmlGraph = networkx.generate_gml(graph) #os.remove('{0}.gml'.format(xmlFileName)) result = gml2cyjson(graph, graphtype=graphtype) jsonStr = json.dumps(result, indent=1, separators=(',', ': ')) result = {'jsonStr': jsonStr, 'gmlStr': ''.join(gmlGraph)} #self.addToDict(ticket, ''.join(gmlGraph)) self.addToDict(ticket, result) print 'success', ticket except: import traceback traceback.print_exc() self.addToDict(ticket,-5) print 'failure',ticket finally: task.deferLater(reactor, 600, freeQueue, ticket)
def ato_write_gml(graph, fileName, labelGraphics): def writeDict(gml, key, label, contents, space, labelGraphics=None): gml.write('{1}{0} [\n'.format(key, space)) for subKey in contents: if type(contents[subKey]) in [str]: gml.write('{2}\t{0} "{1}"\n'.format(subKey, contents[subKey], space)) elif type(contents[subKey]) in [int]: gml.write('{2}\t{0} {1}\n'.format(subKey, contents[subKey], space)) elif type(contents[subKey]) in [dict]: writeDict(gml, subKey, subKey, contents[subKey], space + '\t') if labelGraphics and label in labelGraphics: for labelInstance in labelGraphics[label]: writeDict(gml, 'LabelGraphics', 'LabelGraphics', labelInstance, space + '\t') gml.write('{0}]\n'.format(space)) gml = StringIO.StringIO() gml.write('graph [\n') gml.write('\tdirected 1\n') for node in graph.node: writeDict(gml, 'node', node, graph.node[node], '\t', labelGraphics) flag = False for x in nx.generate_gml(graph): if 'edge' in x and not flag: flag = True if flag: gml.write(x + '\n') #gml.write(']\n') with open(fileName, 'w') as f: f.write(gml.getvalue()) nx.read_gml(fileName)
def __init__(self, ppi1filename, ppi2filename, matchfilename, outmatchfilename): self.ppi1 = nx.read_gml(ppi1filename, relabel = True) self.ppi2 = nx.read_gml(ppi2filename, relabel = True) in_to_node1 = {} in_to_node2 = {} for n1 in self.ppi1.nodes(): in_to_node1[self.ppi1.node[n1]['index']] = n1 for n2 in self.ppi2.nodes(): in_to_node2[self.ppi2.node[n2]['index']] = n2 matchfile = open(matchfilename) outmatchfile = open(outmatchfilename, "w") self.I = [] for line in matchfile: if line[0] == "!": outmatchfile.write(line) continue cols = line.split() outmatchfile.write(in_to_node1[int(cols[0])] + " ") outmatchfile.write(in_to_node2[int(cols[1])] + "\n") self.I.append([in_to_node1[int(cols[0])], in_to_node2[int(cols[1])]]) #check if it's a legal match for (i, j) in itertools.product(self.I, self.I): if i!= j: if (i[0] == j[0]) or (i[1] == j[1]): print "not a legal match: ", (i,j) return print "Legal match" matchfile.close() outmatchfile.close()
def main(): #extract_intra_function_cfg("C:\\Users\\Xu Zhengzi\\Desktop\\oh\\") cfg1 = nx.read_gml("C:\\Users\\Xu Zhengzi\\Desktop\\og\\dtls1_reassemble_fragment.gml") cfg2 = nx.read_gml("C:\\Users\\Xu Zhengzi\\Desktop\\oh\\dtls1_reassemble_fragment.gml") nodes1 = ['0x80c0b14', '0x80c0b9a', '0x80c0c3c', '0x80c0c57', '0x80c0c5d', '0x80c0c8c', '0x80c0ccc', '0x80c0d0a', '0x80c0d2c', '0x80c0e83', '0x80c0fb4', '0x80c0eb6', '0x80c0f53', '0x80c0b97', '0x80c0d88', '0x80c0de1', '0x80c0db5', '0x80c0fac', '0x80c0f73', '0x80c0dd9'] extract_trace(cfg1, 3, nodes1) print "Finish"
def main(): favorites_graph = nx.read_gml(FAVORITES_GML_OUTPUT_PATH) output_metrics(favorites_graph) favorites_graph = None comments_graph = nx.read_gml(COMMENTS_GML_OUTPUT_PATH) output_metrics(comments_graph) comments_graph = None
def read_graph(path): if path.endswith('.txt'): A = np.loadtxt(path) G = nx.from_numpy_matrix(A) return G if path.endswith('.gml'): A = nx.read_gml(path, 'label') return nx.read_gml(path, 'label')
def dolphins(): """ Loads the dolphin social graph """ try: gml_graph = nx.read_gml(DATA_PATH_1 + DOLPHINS) except: gml_graph = nx.read_gml(DATA_PATH_2 + DOLPHINS) dgraph = nx.Graph() dgraph.add_nodes_from(gml_graph.nodes(), size=1.) edges = gml_graph.edges() edges = [(u, v, {'weight': 1.}) for (u,v) in edges] dgraph.add_edges_from(edges) return dgraph
def test_output(self): """ test the output management function. should only output to file if an output format is given. otherwise output to console in adj list text. :return: """ custom_filename = 'custom.out' try: yt_script.generate_output(self.MOCK_GRAPH, None, self.MOCK_FILE_OUTPUT) self.assertFalse(os.path.exists(self.MOCK_FILE_OUTPUT)) except AttributeError: self.fail() try: yt_script.generate_output(self.MOCK_GRAPH, 'gml', self.MOCK_FILE_OUTPUT) result_graph = nx.read_gml(self.MOCK_FILE_OUTPUT) for node in self.MOCK_GRAPH.nodes(): self.assertIn(node, result_graph.nodes()) for edge in self.MOCK_GRAPH.edges(): try: self.assertIn(edge, result_graph.edges()) except AssertionError: edge = (edge[1], edge[0]) self.assertIn(edge, result_graph.edges()) continue except AttributeError: self.fail() try: yt_script.generate_output(self.MOCK_GRAPH, 'gml', custom_filename) result_graph = nx.read_gml(custom_filename) for node in self.MOCK_GRAPH.nodes(): self.assertIn(node, result_graph.nodes()) for edge in self.MOCK_GRAPH.edges(): try: self.assertIn(edge, result_graph.edges()) except AssertionError: edge = (edge[1], edge[0]) self.assertIn(edge, result_graph.edges()) continue except AttributeError: self.fail() self.assertRaises(RuntimeError, yt_script.generate_output, self.MOCK_GRAPH, 'fake_format', custom_filename) if os.path.exists(custom_filename): os.remove(custom_filename)
def load(gml1,gml2,name): g1 = nx.read_gml(gml1) g2 = nx.read_gml(gml2) s1 = t(g1) s2 = t(g2) #print s1 #print s2 with open("result.txt", "a") as myfile: myfile.write(name) myfile.write("\n") m = lcs(s1,s2) index = find_index(m) match1 = [] match2 = [] for item in index: myfile.write(hex(s1[item[0]][0]) + " " + hex(s2[item[1]][0])) myfile.write("\n") #print hex(s1[item[0]][0]) + " " + hex(s2[item[1]][0]) match1.append(s1[item[0]][0]) match2.append(s2[item[1]][0]) myfile.write("o") myfile.write("\n") for item in s1: if item[0] not in match1: #print hex(item[0]) myfile.write(hex(item[0])) myfile.write("\n") myfile.write("p") myfile.write("\n") for item in s2: if item[0] not in match2: #print hex(item[0]) myfile.write(hex(item[0])) myfile.write("\n") #print return 0
def graph_product(G_file): #TODO: take in a graph (eg when called from graphml) rather than re-reading the graph again LOG.info("Applying graph product to %s" % G_file) H_graphs = {} try: G = nx.read_graphml(G_file).to_undirected() except IOError: G = nx.read_gml(G_file).to_undirected() return G = remove_yed_edge_id(G) G = remove_gml_node_id(G) #Note: copy=True causes problems if relabelling with same node name -> loses node data G = nx.relabel_nodes(G, dict((n, data.get('label', n)) for n, data in G.nodes(data=True))) G_path = os.path.split(G_file)[0] H_labels = defaultdict(list) for n, data in G.nodes(data=True): H_labels[data.get("H")].append(n) for label in H_labels.keys(): try: H_file = os.path.join(G_path, "%s.graphml" % label) H = nx.read_graphml(H_file).to_undirected() except IOError: try: H_file = os.path.join(G_path, "%s.gml" % label) H = nx.read_gml(H_file).to_undirected() except IOError: LOG.warn("Unable to read H_graph %s, used on nodes %s" % (H_file, ", ".join(H_labels[label]))) return root_nodes = [n for n in H if H.node[n].get("root")] if len(root_nodes): # some nodes have root set non_root_nodes = set(H.nodes()) - set(root_nodes) H.add_nodes_from( (n, dict(root=False)) for n in non_root_nodes) H = remove_yed_edge_id(H) H = remove_gml_node_id(H) nx.relabel_nodes(H, dict((n, data.get('label', n)) for n, data in H.nodes(data=True)), copy=False) H_graphs[label] = H G_out = nx.Graph() G_out.add_nodes_from(node_list(G, H_graphs)) G_out.add_nodes_from(propagate_node_attributes(G, H_graphs, G_out.nodes())) G_out.add_edges_from(intra_pop_links(G, H_graphs)) G_out.add_edges_from(inter_pop_links(G, H_graphs)) G_out.add_edges_from(propagate_edge_attributes(G, H_graphs, G_out.edges())) #TODO: need to set default ASN, etc? return G_out
def polblogs(relabel=True): """Network of political blogs. A directed network of hyperlinks between weblogs on US politics, recorded in 2005 by Adamic and Glance. Please cite L. A. Adamic and N. Glance, 'The political blogosphere and the 2004 US Election', in Proceedings of the WWW-2005 Workshop on the Weblogging Ecosystem (2005). Thanks to Lada Adamic for permission to post these data on this web site. http://www-personal.umich.edu/~mejn/netdata/polblogs.zip """ fname = os.path.join(os.path.dirname(__file__), "data/polblogs.gml") g = networkx.read_gml(fname, relabel=True) g = networkx.Graph(g) g.graph["Creator"] = g.graph["Creator"].split('"')[1] # node_map = { } for n, data in g.nodes_iter(data=True): dict_values_to_str(data) # data['label'] = str(data['label']) data["cmty"] = str(data["value"]) # node_map[n] = data['label'] del data["value"], data["id"], data["label"] # print data # if relabel: # g = networkx.relabel_nodes(g, node_map) return g
def polbooks(relabel=True): """Network of political books. A network of books about US politics published around the time of the 2004 presidential election and sold by the online bookseller Amazon.com. Edges between books represent frequent copurchasing of books by the same buyers. The network was compiled by V. Krebs and is unpublished, but can found on Krebs' web site. Thanks to Valdis Krebs for permission to post these data on this web site. Communities stored in g.node[n]['cmty'], node names are the titles of books. http://www-personal.umich.edu/~mejn/netdata/polbooks.zip""" fname = os.path.join(os.path.dirname(__file__), "data/polbooks.gml") g = networkx.read_gml(fname, relabel=True) g = networkx.Graph(g) g.graph["Creator"] = g.graph["Creator"].split('"')[1] # node_map = { } for n, data in g.nodes_iter(data=True): dict_values_to_str(data) data["cmty"] = str(data["value"]) # node_map[n] = str(data['label']) # del data['value'], data['id'], data['label'] # if relabel: # g = networkx.relabel_nodes(g, node_map) return g
def save_commuting_graph(): # print G.nodes() # print G.edges(data=True) # # nx.write_gml(G, "/home/sscepano/D4D res/allstuff/User movements graphs/communting patterns/1/total_commuting_G.gml") # # print GA.nodes() # print GA.edges(data=True) # # nx.write_gml(G, "/home/sscepano/D4D res/allstuff/User movements graphs/communting patterns/1/total_commuting_GA.gml") #v.map_commuting_all(G) #map_communities_and_commutes(G) # G = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G.gml") # # G1 = process_weights(G) # nx.write_gml(G1, "/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml") # # print G1.edges(data=True) G1 = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml") # print G1.nodes(data=True) # # print G1.nodes(data=True)[1][1]['label'] map_communities_and_commutes(G1) return G1
def main(): graph = nx.read_gml("../data/network.gml") nodes = [{ "id": data['id'], "name": data.get("name") or "(Null)", "group": data['group'], "x": data["graphics"]["x"], "y": data["graphics"]["y"], "w": data["graphics"]["w"], "h": data["graphics"]["h"], "weight": data["weight"], "fixed": True, } for _, data in graph.nodes(data=True)] links = [{ "source": get_node_index(nodes, source), "target": get_node_index(nodes, target), "name": data["label"], "value": data["value"] } for source, target, data in graph.edges(data=True)] json.dump({ "nodes": nodes, "links": links }, open("../data/network.json", "w"), indent=4)
def main(): G = nx.read_gml(filename, relabel=False) power_law_est(G) #power_law_est_igraph(filename) max_degrees(G) max_bcentrality(G) max_pagerank(G)
def loadNetwork(f, ext): if ext == "gml": try: return nx.read_gml(f) except Exception, e: print("Couldn't load " + f + " as gml.") return False
def _read_celltype_graph(self, celltypes_file, format="gml"): """ Read celltype-celltype connectivity graph from file. celltypes_file -- the path of the file containing the graph. format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml. """ start = datetime.now() celltype_graph = None try: if format == "gml": celltype_graph = nx.read_gml(celltypes_file) elif format == "edgelist": celltype_graph = nx.read_edgelist(celltypes_file) elif format == "graphml": celltype_graph = nx.read_graphml(celltypes_file) elif format == "pickle": celltype_graph = nx.read_gpickle(celltypes_file) elif format == "yaml": celltype_graph = nx.read_yaml(celltypes_file) else: print "Unrecognized format %s" % (format) except Exception, e: print e
def _read_cell_graph(self, filename, format): """Load the cell-to-cell connectivity graph from a file. Returns None if any error happens. """ cell_graph = None if filename: try: start = datetime.now() if format == "gml": cell_graph = nx.read_gml(filename) elif format == "pickle": cell_graph = nx.read_gpickle(filename) elif format == "edgelist": cell_graph = nx.read_edgelist(filename) elif format == "yaml": cell_graph = nx.read_yaml(filename) elif format == "graphml": cell_graph = cell_graph = nx.read_graphml(filename) else: print "Unrecognized format:", format end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Read cell_graph from file %s of format %s in %g s" % (filename, format, delta.seconds + 1e-6 * delta.microseconds) ) except Exception, e: print e
def parse_input(folder_name): ''' Parses an input and returns the corresponding graph and parameters Inputs: folder_name - a string representing the path to the input folder Outputs: (graph, num_buses, size_bus, constraints) graph - the graph as a NetworkX object num_buses - an integer representing the number of buses you can allocate to size_buses - an integer representing the number of students that can fit on a bus constraints - a list where each element is a list vertices which represents a single rowdy group ''' graph = nx.read_gml(folder_name + "/graph.gml") parameters = open(folder_name + "/parameters.txt") num_buses = int(parameters.readline()) size_bus = int(parameters.readline()) constraints = [] for line in parameters: line = line[1: -2] curr_constraint = [num.replace("'", "") for num in line.split(", ")] constraints.append(curr_constraint) return graph, num_buses, size_bus, constraints
def copy_layout_GML2NX(Fname, Graph, verbose=1): if not Fname[-4:]=='.gml': Fname+='.gml' print 'Copying layout from', Fname+'..' g1 = NX.read_gml( Fname ) labels1 = NX.get_node_attributes(g1, 'label') n1 = set(labels1.values()) nodes = set( Graph.nodes() ) if not n1: print ' empty layout graph' return if not nodes: print ' empty target graph' return mapping = {} for L1 in labels1: for name in nodes: if labels1[L1]==name: mapping[L1] = name break intersection = len(nodes.intersection(n1)) percent=100.*intersection/len(nodes) print ' %.1f%%'%percent,'(%i positions)'%intersection layout = NX.get_node_attributes(g1, 'graphics') attr = dict([ ( mapping[ID], {'x':layout[ID]['x'],'y':layout[ID]['y']} ) for ID in mapping]) NX.set_node_attributes( Graph, 'graphics', attr)
def import_layout(from_fname, to_graph): if not from_fname[-4:] =='.gml': from_fname +='.gml' print 'importing layout from', from_fname+'..' g1 = NX.read_gml(from_fname) labels1 = NX.get_node_attributes(g1, 'label') n1 = set(labels1.values()) g2 = to_graph n2 = set(g2.nodes()) if not n1: print ' empty target graph' return if not n2: print ' empty layout graph' return mapping = {} for L1 in labels1: for name in n2: if labels1[L1]==name: mapping[L1] = name break intersection = len(n2.intersection(n1)) percent=100.*intersection/len(n2) print ' %.1f%%'%percent,'(%i positions)'%intersection layout = NX.get_node_attributes(g1, 'graphics') attr = dict([ ( mapping[ID], {'x':layout[ID]['x'],'y':layout[ID]['y']} ) for ID in mapping]) NX.set_node_attributes(g2, 'graphics', attr)
def q1(): lada = nx.read_gml("../../data/network_analysis/LadaFacebookAnon.gml") print_stats(lada, "LadaFacebookAnon") p = compute_edge_creation_probability( lada.number_of_nodes(), lada.number_of_edges()) erg = nx.erdos_renyi_graph(lada.number_of_nodes(), p) print_stats(erg, "Erdos-Renyi Random")
def gml2jie(file_name): G = nx.read_gml(file_name) num_of_nodes = G.number_of_nodes() num_of_edges = G.number_of_edges() #debug for duplicated edges ##print "num of edges: %d \n" % num_of_edges for e in G.edges_iter(): num_edges = G.number_of_edges(e[0],e[1]) if num_edges > 1: print "find one: %d,%d" % (e[0],e[1]) G.remove_edge(*e) fnames = file_name.strip().split('.') base_name = fnames[0] ofname = base_name + '.jie' with open(ofname,'w') as ofile: ### write Node Node Edges ofile.write(str(G.number_of_nodes()) + " " \ + str(G.number_of_nodes()) + " "\ + str(G.number_of_edges())+'\n') for e in G.edges_iter(): """ index of Jie Chen's file start from 1. but networkx starts from 0 """ ofile.write(str(e[0]+1) + " " + str(e[1]+1) + "\n") return 0;
def classify(request, pk): #gets object based on id given graph_file = get_object_or_404(Document, pk=pk) #reads file into networkx graph based on extension if graph_file.extension() == ".gml": G = nx.read_gml(graph_file.uploadfile) else: G = nx.read_gexf(graph_file.uploadfile) #closes file so we can delete it graph_file.uploadfile.close() #loads the algorithm and tests the algorithm against the graph g_json = json_graph.node_link_data(G) #save graph into json file with open(os.path.join(settings.MEDIA_ROOT, 'graph.json'), 'w') as graph: json.dump(g_json, graph) with open(os.path.join(settings.MEDIA_ROOT, 'rf_classifier.pkl'), 'rb') as malgo: algo_loaded = pickle.load(malgo, encoding="latin1") dataset = np.array([G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)]) print (dataset) #creates X to test against X = dataset prediction = algo_loaded.predict(X) graph_type = check_prediction(prediction) graph = GraphPasser(G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)) #gives certain variables to the view return render( request, 'classification/classify.html', {'graph': graph, 'prediction': graph_type} )
def usufyToGmlExport(d, fPath): ''' Workaround to export to a gml file. :param d: Data to export. :param fPath: File path. ''' # Reading the previous gml file try: oldData=nx.read_gml(fPath) except UnicodeDecodeError as e: print "UnicodeDecodeError:\t" + str(e) print "Something went wrong when reading the .gml file relating to the decoding of UNICODE." import time as time fPath+="_" +str(time.time()) print "To avoid losing data, the output file will be renamed to use the timestamp as:\n" + fPath + "_" + str(time.time()) print # No information has been recovered oldData = nx.Graph() except Exception as e: # No information has been recovered oldData = nx.Graph() newGraph = _generateGraphData(d, oldData) # Writing the gml file nx.write_gml(newGraph,fPath)
def run_graph(graph_file, output_file, neighborhood, flag = None): print "==== New graph ====" print "Input:", graph_file print "Output:", output_file print "Neighborhood:", neighborhood print "" print "Loading graph" print "Start time: %s" % (datetime.now()) g=nx.read_gml(graph_file) print "End time: %s" % (datetime.now()) gflag = None if flag == None: print "Creating flag complex" print "Start time: %s" % (datetime.now()) gedgelist = map(list,g.edges(data=False)) gflag = sh.flag(gedgelist,4) print "End time: %s" % (datetime.now()) else: print "Using passed in flag complex" gflag = flag for n in range(neighborhood+1): print "Finding the local homology (n = %d)" % (n) print "Start time: %s" % (datetime.now()) graph = graph_file.split("/")[-1] print output_file ofile = open(output_file, 'w') ofile.write("Local Homology (neighborhood=%d) of flag complex generated by %s\n" % (n, graph)) locHomTable(gflag, {}, n, ofile) print "End time: %s" % (datetime.now()) print "\n" return gflag
def __init__(self, fileName = GML_FILE): if os.path.exists(fileName): print "Read From File.\n" self.G = nx.read_gml(fileName) else: print "File Not Found. Creating It..." self.G = nx.DiGraph()
def get_row_exhaustive(general_path, pattern_result, pattern_path): row = {} print "Pattern exhaustive ", pattern_result print "Pattern path: ", pattern_path pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) parent_id = get_parent_id(os.path.join(pattern_path)) #get nr embeddings of exhaustive nr_emb = None time = None print general_path.split('/') pattern_name = general_path.split('/')[-1] if pattern_name == "": pattern_name = general_path.split('/')[-2] nr_obs = None print "Exists? ", os.path.join( general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res'), os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')): nr_emb, time, nr_obs = extract_nr_embeddings( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) #get the results if os.path.exists(os.path.join(pattern_result, 'monitoring')): embeddings, stdev, klds = get_stat( os.path.join(pattern_result, 'monitoring'), 'exhaustive') else: embeddings = [None] * 120 klds = [None] * 120 is_timeout = False if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'timeout.info')): is_timeout = True print "Nr of records for embeddings: ", len(embeddings) nodes, edges = man.get_readable_text_format(pattern) row['pattern_name'] = pattern_result row['parent_id'] = parent_id row['nr_randvar_values'] = int(nr_randvar_values) row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = nx.density(pattern) row['max_degree'] = float(max_degree) row['avg_deg'] = float(average_degree) row['nr_targets'] = int(n_target_nodes) if nr_emb: row['exh_emb'] = float(nr_emb) else: row['exh_emb'] = nr_emb row['time'] = time row['timeout'] = is_timeout row['nr_observations'] = nr_obs for i in xrange(1, len(embeddings) + 1): if embeddings[i - 1] == None: row["emb_" + str(i)] = None else: row["emb_" + str(i)] = float(embeddings[i - 1]) return row
def correlation_edge_centrality(name): current_path = "../../data" network = nx.read_gml(current_path + "/" + name + "/" + name + ".gml") edge_centrality(network)
import numpy as np import math as ma import networkx as nx from matplotlib import pyplot as plt #Abre el primer conjunto de datos guarda el original y una copia, la copia es la que modificare data_dolphins = nx.read_gml('dolphins.gml') dd = data_dolphins #Abre la info del genero de los delfines archivo2 = open('dolphinsGender.txt', 'r').readlines() #Arreglos limpios para llenar con la informacion extra para los nodos nombres = [] generos = [] gender_dict = {} numeros = {} colores = [] machos = 0 hembras = 0 incognita = 0 #Agrega info a los arreglos ya creados, incluido el mapa de color para los nodos que sigue al genero de los delfines k = 0 for i in archivo2: j = i.split("\t")
def basic_operation(): # Create a graph. G = nx.Graph() # Nodes. G.add_node(1) G.add_nodes_from([2, 3]) H = nx.path_graph(10) # Creates a graph. G.add_nodes_from(H) G.add_node(H) #print('G.nodes =', G.nodes) print('G.nodes =', list(G.nodes)) # Edges. G.add_edge(1, 2) e = (2, 3) G.add_edge(*e) # Unpack edge tuple. G.add_edges_from([(1, 2), (1, 3)]) G.add_edges_from(H.edges) #print('G.edges =', G.edges) print('G.edges =', list(G.edges)) # Remove all nodes and edges. G.clear() #-------------------- G.add_edges_from([(1, 2), (1, 3)]) G.add_node(1) G.add_edge(1, 2) G.add_node('spam') # Adds node 'spam'. G.add_nodes_from('spam') # Adds 4 nodes: 's', 'p', 'a', 'm'. G.add_edge(3, 'm') print('G.number_of_nodes() =', G.number_of_nodes()) print('G.number_of_edges() =', G.number_of_edges()) # Set-like views of the nodes, edges, neighbors (adjacencies), and degrees of nodes in a graph. print('G.adj[1] =', list(G.adj[1])) # or G.neighbors(1). print('G.degree[1] =', G.degree[1]) # The number of edges incident to 1. # Report the edges and degree from a subset of all nodes using an nbunch. # An nbunch is any of: None (meaning all nodes), a node, or an iterable container of nodes that is not itself a node in the graph. print("G.edges([2, 'm']) =", G.edges([2, 'm'])) print('G.degree([2, 3]) =', G.degree([2, 3])) # Remove nodes and edges from the graph in a similar fashion to adding. G.remove_node(2) G.remove_nodes_from('spam') print('G.nodes =', list(G.nodes)) G.remove_edge(1, 3) # When creating a graph structure by instantiating one of the graph classes you can specify data in several formats. G.add_edge(1, 2) H = nx.DiGraph(G) # Creates a DiGraph using the connections from G. print('H.edges() =', list(H.edges())) edgelist = [(0, 1), (1, 2), (2, 3)] H = nx.Graph(edgelist) #-------------------- # Access edges and neighbors. print('G[1] =', G[1]) # Same as G.adj[1]. print('G[1][2] =', G[1][2]) # Edge 1-2. print('G.edges[1, 2] =', G.edges[1, 2]) # Get/set the attributes of an edge using subscript notation if the edge already exists. G.add_edge(1, 3) G[1][3]['color'] = 'blue' G.edges[1, 2]['color'] = 'red' # Fast examination of all (node, adjacency) pairs is achieved using G.adjacency(), or G.adj.items(). # Note that for undirected graphs, adjacency iteration sees each edge twice. FG = nx.Graph() FG.add_weighted_edges_from([(1, 2, 0.125), (1, 3, 0.75), (2, 4, 1.2), (3, 4, 0.375)]) for n, nbrs in FG.adj.items(): for nbr, eattr in nbrs.items(): wt = eattr['weight'] if wt < 0.5: print(f'({n}, {nbr}, {wt:.3})') # Convenient access to all edges is achieved with the edges property. for (u, v, wt) in FG.edges.data('weight'): if wt < 0.5: print(f'({u}, {v}, {wt:.3})') #-------------------- # Attributes. # Graph attributes. G = nx.Graph(day='Friday') print('G.graph =', G.graph) G.graph['day'] = 'Monday' # Node attributes: add_node(), add_nodes_from(), or G.nodes. G.add_node(1, time='5pm') G.add_nodes_from([3], time='2pm') print('G.nodes[1] =', G.nodes[1]) G.nodes[1]['room'] = 714 print('G.nodes.data() =', G.nodes.data()) # Edge attributes: add_edge(), add_edges_from(), or subscript notation. G.add_edge(1, 2, weight=4.7) G.add_edges_from([(3, 4), (4, 5)], color='red') G.add_edges_from([(1, 2, {'color': 'blue'}), (2, 3, {'weight': 8})]) G[1][2]['weight'] = 4.7 G.edges[3, 4]['weight'] = 4.2 print('G.edges.data() =', G.edges.data()) #-------------------- # Directed graphs. DG = nx.DiGraph() DG.add_weighted_edges_from([(1, 2, 0.5), (3, 1, 0.75)]) print("DG.out_degree(1, weight='weight') =", DG.out_degree(1, weight='weight')) print("DG.degree(1, weight='weight') =", DG.degree(1, weight='weight')) # The sum of in_degree() and out_degree(). print('DG.successors(1) =', list(DG.successors(1))) print('DG.neighbors(1) =', list(DG.neighbors(1))) # Convert G to undirected graph. #H = DG.to_undirected() H = nx.Graph(DG) #-------------------- # Multigraphs: Graphs which allow multiple edges between any pair of nodes. MG = nx.MultiGraph() #MDG = nx.MultiDiGraph() MG.add_weighted_edges_from([(1, 2, 0.5), (1, 2, 0.75), (2, 3, 0.5)]) print("MG.degree(weight='weight') =", dict(MG.degree(weight='weight'))) GG = nx.Graph() for n, nbrs in MG.adjacency(): for nbr, edict in nbrs.items(): minvalue = min([d['weight'] for d in edict.values()]) GG.add_edge(n, nbr, weight = minvalue) print('nx.shortest_path(GG, 1, 3) =', nx.shortest_path(GG, 1, 3)) #-------------------- # Classic graph operations: """ subgraph(G, nbunch): induced subgraph view of G on nodes in nbunch union(G1,G2): graph union disjoint_union(G1,G2): graph union assuming all nodes are different cartesian_product(G1,G2): return Cartesian product graph compose(G1,G2): combine graphs identifying nodes common to both complement(G): graph complement create_empty_copy(G): return an empty copy of the same graph class to_undirected(G): return an undirected representation of G to_directed(G): return a directed representation of G """ #-------------------- # Graph generators. # Use a call to one of the classic small graphs: petersen = nx.petersen_graph() tutte = nx.tutte_graph() maze = nx.sedgewick_maze_graph() tet = nx.tetrahedral_graph() # Use a (constructive) generator for a classic graph: K_5 = nx.complete_graph(5) K_3_5 = nx.complete_bipartite_graph(3, 5) barbell = nx.barbell_graph(10, 10) lollipop = nx.lollipop_graph(10, 20) # Use a stochastic graph generator: er = nx.erdos_renyi_graph(100, 0.15) ws = nx.watts_strogatz_graph(30, 3, 0.1) ba = nx.barabasi_albert_graph(100, 5) red = nx.random_lobster(100, 0.9, 0.9) #-------------------- # Read a graph stored in a file using common graph formats, such as edge lists, adjacency lists, GML, GraphML, pickle, LEDA and others. nx.write_gml(red, 'path.to.file') mygraph = nx.read_gml('path.to.file')
import networkx as nx, community as comm, pylab g = nx.read_gml("lesmis/lesmis.gml") bp = comm.best_partition(g) nx.draw(g, node_color=[bp[i] for i in bp]) pylab.show()
import numpy as np import networkx as nx F = np.load("./numpy_files/citeseer_poisson_v2_son.npy") #F = np.load("../poisson/numpy_files/citeseer_gaussian_iter_250_T.npy") g = nx.read_gml("../datasets/citeseer.gml") n = g.number_of_nodes() with open("citeseer_poisson_v2_son.embedding", 'w') as f: f.write("{} {}\n".format(n, F.shape[1])) for node in g.nodes(): line = [str(val) for val in F[int(node), :]] f.write("{} {}\n".format(node, " ".join(line)))
def carrega_grafo(nome_rede): """Carrega o grafo .gml""" grafo = nx.read_gml(nome_rede + ".gml") return grafo
def load_igraph(filepath): return nx.read_gml("example.gml")
true_sbm = true adj = nx.adjacency_matrix(G).todense() np.savetxt('file.csv', adj, delimiter='\t') dists_gt = test_clustering_structure() print("GT : {0}, {1}".format(np.mean(dists_gt), np.std(dists_gt))) dists_gt = ensemble_density_huge("file.csv", "\\t") dist_dense = pd.read_csv("./matrix.csv", delimiter="\t", header=None).values dist_dense = dist_dense[:, :-1] dist.append(dist_dense) #################### #################### print("--Football--") G = nx.read_gml("../data/football.gml") true = [] adj = nx.adjacency_matrix(G).todense() with open("../data/football.gml") as f: for line in f: values = line.split(" ") if (len(values) >= 5): if (values[4] == "value"): true.append(values[5]) encoder = LabelEncoder() true = encoder.fit_transform(true) true_football = true model_hac = hac(n_clusters=len(set(true)), affinity="precomputed", linkage="average")
def carrega_grafo(self): """carrega um grafo e gera uma lista de vértices""" rede = nx.read_gml(self.nome_rede) self.lista_nos = rede.nodes()
def _validate_(self, level): try: _ = nx.read_gml(str(self.path)) except nx.NetworkXError: raise ValidationError('Not a valid GML file')
sp_emds.append(sp_emd) sgl2s.append(sgl2) return sp_emds, sgl2s if __name__ == "__main__": graph_name = sys.argv[1] RW_x = np.loadtxt( 'plots/lesmis_walk_wa/trainingIteration_3200_expectedGraph.txt'.format( graph_name)) edge_x = np.loadtxt( 'plots/lesmis_edge_wa/trainingIteration_3200_expectedGraph.txt'.format( graph_name)) #RW_c = np.loadtxt('plots/{}_rw_expected_correct.txt'.format(graph_name)) G = nx.read_gml('../data/{}.gml'.format(graph_name)) _A_obs = nx.adjacency_matrix(G) A = _A_obs.todense() N = A.shape[0] L = nx.normalized_laplacian_matrix(G).todense() eig_vals, eig_vecs = linalg.eig(L) eig_list = zip(eig_vals, np.transpose(eig_vecs)) eig_list.sort(key=lambda x: x[0]) u = np.asarray([u_i.real for u_i in eig_list[-2][1]])[0][0] x, y = expected_against_fiedler(RW_x, u, N) fig, ax = plt.subplots() plt.scatter(x, y, color='r', s=10, label='Degree Random Walk From Uniform') #x_c,y = expected_against_fiedler(RW_c,u,N)
G.node[i]['pi'] = U # Com base na tabela de predecessores cria a MST MST = nx.Graph() for i in range(0, len(Q)): MST.add_node(i, label=G.node[i]['label']) for i in range(0, len(Q)): if K == 1: if (i != Raizes[0]): MST.add_edge(i, G.node[i]['pi']) elif K == 2: if (i != Raizes[0]) and (i != Raizes[1]): MST.add_edge(i, G.node[i]['pi']) elif K == 3: if (i != Raizes[0]): if (i != Raizes[1]): if (i != Raizes[2]): MST.add_edge(i, G.node[i]['pi']) return MST G = nx.read_gml("football.gml") K = 3 #Numero de raizes MST = Dijkstra(G, K) #Plota a MST pos = nx.spring_layout(MST, k=0.10, iterations=100) nx.draw(MST, pos, with_labels=True) plt.savefig("1")
from math import pi,sin,cos,sqrt import pylab from matplotlib.patches import Wedge, Polygon import matplotlib.ticker as ticker from matplotlib.widgets import Slider, Button #import nx_pylab2 as nx2 from matplotlib.patches import FancyArrowPatch, Circle #from colorsnew import cmap_discretize import matplotlib as m graph_name1 = "method2_50/networks/method2_50_adherent.gml" ### Method3 G = nx.read_gml(graph_name1) G = nx.connected_component_subgraphs(G)[0] listsort1 = [[u'38481', u'40842', u'46549', u'17427', u'41063', u'7264', u'35755', u'16584', u'42825', u'39816', u'28627', u'44291', u'36915', u'38164', u'2573', u'30898', u'43373', u'45655', u'38629', u'46451', u'46254', u'45064', u'39167', u'39953', u'39600', u'43282', u'3753', u'30300', u'38798', u'45392', u'36268', u'45758', u'32683', u'34064', u'32242', u'46601', u'4094', u'32108', u'46118', u'38479', u'35766'], [u'18956', u'39714', u'27171', u'46480', u'4655', u'41720', u'41906', u'3869', u'27995', u'45364', u'38641', u'14142', u'29881', u'39252', u'28743', u'41480', u'5842', u'33422', u'36783', u'35561', u'20494', u'6137', u'43821', u'29802', u'42270', u'39823', u'23012', u'20972', u'22783', u'29540', u'28308', u'46643', u'30050', u'4597', u'28945', u'7330', u'18531', u'45676', u'33143', u'27847', u'3217', u'43334', u'29250', u'27127', u'32340', u'46600', u'15479', u'4455', u'11729', u'35309', u'44916'], [u'42877', u'5820', u'42380', u'42510', u'3144', u'35059', u'14867', u'29053', u'33282', u'24629', u'44334', u'41561', u'46162', u'5536', u'46271', u'44757', u'31246', u'42348', u'39055', u'41851', u'8034', u'46721', u'41225', u'41165', u'29376', u'39770', u'32508', u'18656', u'44449', u'38029', u'40653', u'34754', u'41261', u'5613', u'17086', u'45916', u'41728', u'32395', u'27851', u'41745', u'40296', u'42000', u'42401', u'42087', u'34450', u'46663', u'32892', u'24642', u'28118', u'20377', u'45922', u'32637', u'45190', u'4800', u'43617', u'17097', u'46389', u'29314', u'41999', u'29480', u'44188', u'40476', u'36088', u'36343', u'45154'], [u'44121', u'3557', u'41794', u'24308', u'967', u'43573', u'47061', u'43981', u'24575', u'41830', u'21994', u'44866', u'46798', u'9486', u'15389', u'42637', u'46315', u'45116', u'41049', u'20553', u'41656', u'37258', u'36157', u'42467', u'7073', u'46612', u'45795', u'38044', u'9831', u'42765', u'6322', u'46998', u'12252', u'36591', u'41509', u'17484', u'80', u'19515', u'46496', u'2832', u'14722', u'40854', u'29840', u'46782', u'42807', u'8371', u'45303', u'17755', u'15311', u'43437', u'45374', u'45771', u'32214', u'33340', u'30166', u'43252', u'21010', u'12048', u'12484', u'3719', u'37781', u'46026', u'36865', u'998', u'39130', u'44265', u'310', u'44263', u'23266', u'26102', u'45298', u'10127', u'833', u'13662', u'41669', u'18106', u'19501', u'10542', u'29018', u'29930', u'44884', u'36255', u'36701', u'25991', u'13790', u'3864', u'42886', u'36411', u'35493', u'43435', u'28845', u'46582', u'29070', u'18647', u'33242', u'17923', u'38945', u'9429', u'18790', u'43402', u'40232', u'8997', u'19311', u'8638', u'36351', u'24180', u'42911', u'35135', u'39485', u'45670', u'25619', u'17369', u'36917', u'32662', u'31021', u'38972', u'43442', u'13518', u'30372', u'29826', u'10924', u'29415', u'43150', u'42384', u'16541', u'45557', u'40582', u'21215', u'10378', u'40402', u'27756', u'3424'], [u'33322', u'28851', u'13739', u'39065', u'39898', u'33425', u'3284', u'32833', u'38955', u'41792', u'35531', u'44195', u'44199', u'42457', u'43572', u'37747', u'32167', u'3815', u'27301', u'42484', u'42585', u'40520', u'40443', u'34221', u'45437', u'5817', u'39690', u'10411', u'46941', u'34752', u'37000', u'38315', u'36925', u'41510', u'41160', u'38914', u'39155', u'6613', u'41194', u'45060', u'44166', u'41584', u'20312', u'40324', u'40803', u'27345', u'40484', u'31124', u'5068', u'6627', u'26718', u'38101', u'47043', u'27958', u'5580', u'41899', u'42608', u'31513', u'2158', u'45833', u'47038', u'41095', u'18967', u'35849', u'45148', u'40734', u'43192', u'45903', u'46121', u'40027', u'19708', u'12992', u'36237', u'42022', u'40284', u'45243', u'31547', u'39006', u'46785', u'45876', u'33441', u'46878', u'19', u'31425', u'42156', u'45009', u'29371', u'41292', u'18654', u'43953', u'14378', u'42471', u'27811', u'14376', u'19000', u'19943', u'29592', u'39342', u'40719', u'17258', u'28050', u'12798', u'41316', u'45220', u'45061', u'45784', u'46234', u'42612', u'42971', u'33717', u'32811', u'44214', u'43858', u'45760', u'33372', u'47067', u'30570', u'43848', u'31383', u'39017', u'40138', u'32171', u'43138', u'29399', u'46148', u'39978', u'33369', u'43177', u'30392', u'42714', u'28301', u'41553', u'36726', u'6197', u'45860', u'46689', u'9816', u'17598', u'37574', u'45261', u'35955', u'46326', u'18541', u'34955', u'42225', u'44352', u'19546', u'43401', u'4336', u'31076', u'29749', u'39948', u'36631', u'46329', u'45139', u'39478', u'28436', u'17857', u'11477', u'45513', u'46639', u'41435', u'39688', u'33062', u'1055', u'36529', u'41987', u'11926', u'34061', u'3944', u'13136', u'43446', u'10915', u'41187', u'12977', u'32349', u'20327', u'39383', u'42550', u'39971', u'41521', u'41970', u'28212', u'28688', u'36719', u'12115', u'45419', u'42319', u'42317', u'47052', u'31353', u'39809', u'27759', u'37143', u'37639', u'45527'], [u'35544', u'41015', u'12486', u'41371', u'41797', u'41559', u'39625', u'37188', u'45933', u'29453', u'46487', u'13731', u'46726', u'43599', u'43982', u'12013', u'40844', u'19450', u'46439', u'46269', u'39539', u'44169', u'39313', u'11613', u'10959', u'44791', u'32476', u'35274', u'35973', u'31391', u'40188', u'31954', u'20411', u'41586', u'19022', u'41769', u'47063', u'42018', u'43542', u'31826', u'46046', u'43541', u'46347', u'43543', u'614', u'41942', u'6390', u'39250', u'42263', u'42262', u'37648', u'42065', u'46533', u'26900', u'28182', u'15367', u'45175', u'38900', u'44614', u'41092', u'35847', u'44571', u'44570', u'32571', u'391', u'4046', u'46123', u'39003', u'40218', u'46613', u'18061', u'1432', u'33895', u'46153', u'27724', u'39022', u'26136', u'30067', u'45470', u'8592', u'31337', u'11313', u'26528', u'13803', u'46771', u'33161', u'18842', u'45379', u'2697', u'44539', u'44021', u'14977', u'31949', u'43318', u'22520', u'27101', u'39673', u'41131', u'40802', u'32915', u'37287', u'12681', u'42358', u'42574', u'37763', u'37600', u'30244', u'25486', u'36369', u'215', u'40656', u'4477', u'42419', u'29115', u'2464', u'41781', u'29522', u'10684', u'22711', u'30359', u'6568', u'46230', u'45506', u'43178', u'41401', u'13228', u'41310', u'28640', u'22903', u'44213', u'32741', u'29467', u'6933', u'39018', u'39019', u'44662', u'259', u'36222', u'7077', u'27511', u'32085', u'11427', u'42399', u'33985', u'39862', u'40666', u'46668', u'34775', u'5295', u'28719', u'20392', u'9590', u'34579', u'39207', u'28169', u'42146', u'44682', u'40161', u'43084', u'26751', u'39111', u'39059', u'44636', u'36176', u'41847', u'32700', u'42240', u'20206', u'28506', u'36330', u'11149', u'20527', u'40545', u'40782', u'18992', u'28342', u'34107', u'27575', u'30917', u'40434', u'3940', u'40539', u'45511', u'5564', u'39422', u'46819', u'47019', u'5286', u'39686', u'46812', u'30964', u'41242', u'44554', u'40052', u'6161', u'12449', u'36629', u'46062', u'45368', u'6883', u'33964', u'42418', u'13511', u'39438', u'727', u'29418', u'30264', u'43020', u'34065', u'17458', u'27973', u'45411', u'46772', u'42716', u'46850', u'47053', u'6083', u'25318', u'40811', u'32116', u'45528', u'34141', u'41203'], [u'36585', u'27490', u'40155', u'27284', u'27280', u'46820', u'39656', u'28854', u'43112', u'36092', u'32233', u'40929', u'41479', u'41373', u'37041', u'44855', u'19701', u'38954', u'30192', u'2915', u'41650', u'38959', u'41790', u'7129', u'28035', u'39624', u'46824', u'6651', u'41152', u'36611', u'43778', u'45545', u'29284', u'43807', u'21108', u'30776', u'39400', u'41724', u'39361', u'39364', u'42054', u'14694', u'43698', u'42053', u'43695', u'39567', u'28404', u'43053', u'40847', u'31778', u'29917', u'29031', u'29033', u'29527', u'43142', u'34110', u'45160', u'40037', u'27711', u'45702', u'33946', u'45565', u'42636', u'45569', u'36332', u'1930', u'45288', u'42227', u'45443', u'39693', u'35379', u'36819', u'5131', u'42226', u'18163', u'46891', u'28664', u'41331', u'39224', u'31487', u'44018', u'20418', u'29086', u'6758', u'29247', u'43307', u'7519', u'27132', u'27137', u'39032', u'43490', u'29962', u'43499', u'37755', u'43941', u'34270', u'45941', u'4030', u'38953', u'46042', u'27574', u'30154', u'41067', u'43394', u'27342', u'30741', u'37874', u'36376', u'27610', u'29662', u'28361', u'4916', u'40247', u'7189', u'27933', u'30736', u'2536', u'42063', u'27836', u'31461', u'47046', u'45627', u'30951', u'9813', u'39189', u'39811', u'36473', u'12375', u'4393', u'44835', u'6578', u'38214', u'46585', u'35201', u'41419', u'45050', u'44111', u'31067', u'33489', u'28601', u'45831', u'46029', u'19063', u'37424', u'39706', u'42116', u'33436', u'36115', u'2901', u'33549', u'40733', u'43865', u'46915', u'1067', u'44184', u'28335', u'43462', u'43461', u'27385', u'40146', u'37228', u'44527', u'43740', u'37617', u'32164', u'35930', u'43582', u'34341', u'28414', u'39411', u'41736', u'28793', u'43995', u'42029', u'42434', u'42432', u'44377', u'40853', u'43041', u'45173', u'28383', u'8316', u'45049', u'36991', u'40455', u'45573', u'36308', u'39301', u'34771', u'19476', u'2112', u'45476', u'39604', u'45656', u'41291', u'46655', u'4699', u'42527', u'46803', u'28775', u'39402', u'42150', u'30803', u'44385', u'7258', u'16058', u'40229', u'46809', u'13719', u'45099', u'39120', u'44158', u'40102', u'39044', u'3989', u'43890', u'29874', u'46180', u'32328', u'28516', u'45895', u'29476', u'46119', u'43009', u'41680', u'40777', u'40958', u'41689', u'30058', u'17578', u'46621', u'38893', u'29988', u'32039', u'43674', u'33960', u'45435', u'14475', u'37115', u'40252', u'29528', u'37111', u'46453', u'46020', u'31574', u'22923', u'44212', u'41929', u'42190', u'3445', u'45508', u'42610', u'40338', u'33493', u'3326', u'43210', u'28838', u'42993', u'36990', u'28890', u'42853', u'44663', u'41081', u'45265', u'37027', u'28387', u'33729', u'32337', u'46359', u'45342', u'32426', u'45610', u'45641', u'38932', u'44060', u'45279', u'44407', u'43434', u'27582', u'27854', u'40821', u'27396', u'40133', u'44103', u'30477', u'40613', u'29495', u'40597', u'44831', u'11864', u'16722', u'45966', u'42035', u'5960', u'43911', u'46412', u'31137', u'31044', u'40466', u'46365', u'46363', u'30000', u'45486', u'45497', u'34175', u'36312', u'35886', u'40501', u'42241', u'28305', u'25688', u'41895', u'28309', u'43812', u'45923', u'38121', u'31151', u'41369', u'45861', u'42947', u'29058', u'40235', u'27978', u'27976', u'27186', u'47060', u'5242', u'39201', u'44105', u'39220', u'42925', u'35812', u'44139', u'20625', u'14901', u'44521', u'41402', u'40603', u'43083', u'39768', u'37378', u'31384', u'43715', u'43717', u'44220', u'36071', u'29272', u'43969', u'2960', u'44353', u'29861', u'38052', u'45556', u'5470', u'42445', u'39442', u'42849', u'11285', u'43403', u'18799', u'44054', u'44037', u'44030', u'33235', u'41140', u'37976', u'4651', u'43813', u'45799', u'43762', u'30761', u'43581', u'40230', u'16468', u'30769', u'31261', u'43662', u'4331', u'40786', u'41807', u'42694', u'5513', u'25458', u'40627', u'2552', u'28022', u'27523', u'45422', u'35819', u'39573', u'30344', u'42590', u'39129', u'31566', u'46921', u'46507', u'7311', u'46881', u'43238', u'31585', u'33530', u'42678', u'6458', u'45077', u'26602', u'43295', u'42180', u'43612', u'38271', u'45330', u'42747', u'46186', u'18462', u'33460', u'43206', u'40999', u'43205', u'46959', u'37030', u'39152', u'32968', u'21754', u'32070', u'5705', u'42589', u'41248', u'39834', u'15726', u'47072', u'39836', u'41508', u'34217', u'38714', u'27798', u'38924', u'22593', u'34841', u'41021', u'42914', u'33665', u'40373', u'16735', u'2908', u'46478', u'41613', u'28473', u'2041', u'42559', u'42009', u'22594', u'27983', u'2362', u'43927', u'40680', u'27330', u'45902', u'30032', u'5369', u'44709', u'32397', u'39444', u'42259', u'42090', u'41693', u'42207', u'1440', u'6025', u'4044', u'37380', u'22613', u'35305', u'40400', u'5738', u'26179', u'41059', u'4849', u'12414', u'42179', u'30395', u'45150', u'35944']] #sorted(listcom,key=len) H=nx.Graph() wtchange = [] ynode = [] bminode = [] bminodep = [] bminodem = []
def get_row_NS(general_path, pattern_result, experiment_name): row = {} if not (os.path.exists(os.path.join(general_path, 'input_pattern.gml'))): row['pattern_name'] = pattern_result row['nr_randvar_values'] = "NC" row['nodes'] = "NC" row['edges'] = "NC" row['has_cycles'] = "NC" row['density'] = "NC" row['shape'] = "NC" row['max_degree'] = "NC" row['avg_deg'] = "NC" row['nr_targets'] = "NC" row['nr_emb'] = "NC" row['has_obd'] = "NC" row['unequal_size_warn'] = "NC" row['OBD'] = "NC" return row pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) nr_emb = None has_obd = True if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')): has_obd = False if os.path.exists(os.path.join(general_path, 'not_selected.info')): nr_emb = extract_nr_embeddings_NS( os.path.join(general_path, 'not_selected.info')) nodes, edges = man.get_readable_text_format(pattern) unequal_size_warning = False if os.path.exists( os.path.join(general_path, 'results_furer', 'unequal_size.warning')): unequal_size_warning = True OBD = None if os.path.exists( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')): OBD = getOBDecomp( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')) row['pattern_name'] = pattern_result row['nr_randvar_values'] = nr_randvar_values row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = nx.density(pattern) row['shape'] = man.get_graph_shape(pattern) row['max_degree'] = max_degree row['avg_deg'] = average_degree row['nr_targets'] = n_target_nodes row['nr_emb'] = nr_emb #row['has_obd']=has_obd #row['unequal_size_warn']=unequal_size_warning row['OBD'] = OBD return row
def loadgraph(filepath): return nx.read_gml(filepath)
import networkx as nx from geopy.distance import great_circle def generateDistances(G, verbose=False): for edge in G.edges: if verbose: print(*edge, end=" ") distance = great_circle(reversed(G.nodes[edge[0]]["Position"]), reversed(G.nodes[edge[1]]["Position"])).km if verbose: print(distance) G.edges[edge]["Distance"] = distance nx.write_gml(G, "Graphs/FullFWithDistance.gml") if __name__ == '__main__': generateDistances(nx.read_gml("Graphs/FullFiltered.gml"), verbose=True)
def input(self, filename): self.GGG = networkx.read_gml(filename)
def main(result, data, redo, write, monitoring_reports): print "reporting furer" flag_version = 'my' common_result_path = result output_path = os.path.join(result, 'results_furer') detailed_result_path = os.path.join(output_path, "monitoring") if (not redo) and os.path.exists(detailed_result_path) and len( os.listdir(detailed_result_path)) >= 100: print "Results already post-processed" row = csv_report.get_row(result, output_path, "furer", result.replace("RESULTS", "PATTERNS")) with open(os.path.join(output_path, "furer_row.info"), 'w') as f: f.write(str(row)) sys.exit() exhaustive_approach_results_path = os.path.join(common_result_path, "exhaustive_approach") try: data_graph = nx.read_gpickle(data) except: data_graph = nx.read_gml(data) #data_graph=nx.read_gpickle(data) pattern = nx.read_gml(os.path.join(common_result_path, 'input_pattern.gml')) #load Plist pkl_file = open(os.path.join(output_path, 'Plist.pickle'), 'rb') Plist = pickle.load(pkl_file) #load monitoring marks pkl_file = open(os.path.join(output_path, 'monitoring_marks.pickle'), 'rb') monitoring_marks = pickle.load(pkl_file) #load monitoring_reports if os.path.exists(os.path.join(output_path, 'monitoring_reports.pickle')): pkl_file = open(os.path.join(output_path, 'monitoring_reports.pickle'), 'rb') monitoring_reports = pickle.load(pkl_file) print common_result_path, common_result_path.split("/") pattern_file_name = common_result_path.split("/")[-1] if pattern_file_name == "": pattern_file_name = common_result_path.split("/")[-2] print "Number of reports: ", len(monitoring_reports) print "pattern file name: ", pattern_file_name print "Do we need exhaustive dict: ", write fdict_exhaustive = None if write == True: picklename = os.path.join( exhaustive_approach_results_path, "fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) experiments.globals.output_path = output_path if pattern_file_name.startswith("dblp"): experiments.globals.experiment_name = "dblp" else: experiments.globals.experiment_name = "yeast" if (flag_version == 'my'): my_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, Plist, 1, pattern_file_name, write) #print monitoring_reports if (flag_version == 'martin'): martin_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, Plist, 1, pattern_file_name) #print monitoring_reports row = csv_report.get_row(result, output_path, "furer", result.replace("RESULTS", "PATTERNS")) with open(os.path.join(output_path, "furer_row.info"), 'w') as f: f.write(str(row))
preferences = build_preference_dict() place_preferences = build_place_preferences_table() edges = list() for k, v in place_preferences.to_dict().items(): edge = (preferences[k[0]], places[k[1]], v) edges.append(edge) g = nx.Graph() g.add_weighted_edges_from(edges) return g try: g = nx.read_gml("communities/community_graph.gml") print("Reading graph...") except: print("Building graph...") g = build_community_graph() nx.write_gml(g, "communities/community_graph.gml") print("Graph ready") #first compute the best partition partition = community.best_partition(g) #drawing plt.figure(figsize=(50, 50)) plt.title("eTurismo", fontsize=48)
# Initialize parameters E = np.random.normal(size=(N, dim)) nb_list = find_neighbors(g) #dist = find_distances(g) dist = [] for iter in range(num_of_iters): if iter % 50 == 0: draw_points(E, "Karate", g, base=True) for node in range(N): node_grad_E = grad(g, E, nb_list, node, dist) E[node, :] += eta * node_grad_E score = compute_score(g, E, nb_list, dist) print("Iter: {} Score {}".format(iter, score)) return E edges = example5 #g = nx.Graph() #g.add_edges_from(edges) g = nx.read_gml("../datasets/karate.gml") E = run(g, dim=2, num_of_iters=1000, eta=0.001) #np.save("./numpy_files/citeseer_gaussian_v5", E) draw_points(E, "Karate", g)
def main(): #Takes a single GFF input, generates a graph and merges with a pre-existing graph args = get_options() # create directory if it isn't present already if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) args.input_dir = os.path.join(args.input_dir, "") args.output_dir = os.path.join(args.output_dir, "") # Create temporary directory temp_dir = os.path.join(tempfile.mkdtemp(dir=args.output_dir), "") directories = [args.input_dir, temp_dir] gff_file = [args.input_gff] filename = os.path.basename(args.input_gff).split(".")[0] if not args.quiet: print("Processing input") process_prokka_input(gff_list=gff_file, output_dir=temp_dir, quiet=args.quiet, n_cpu=args.n_cpu) cd_hit_out = temp_dir + "combined_protein_cdhit_out.txt" run_cdhit(input_file=temp_dir + "combined_protein_CDS.fasta", output_file=cd_hit_out, id=args.id, quiet=args.quiet, n_cpu=args.n_cpu) if not args.quiet: print("Generating network") single_gml, centroid_contexts_single, seqid_to_centroid_single = generate_network( cluster_file=cd_hit_out + ".clstr", data_file=temp_dir + "gene_data.csv", prot_seq_file=temp_dir + "combined_protein_CDS.fasta", all_dna=args.all_seq_in_graph) if not args.quiet: print("Reformatting network") reformat_network(single_gml=single_gml, output_dir=temp_dir, isolateName=filename) merge_graphs(directories=directories, temp_dir=temp_dir, len_dif_percent=args.len_dif_percent, pid=args.id, family_threshold=args.family_threshold, length_outlier_support_proportion=args. length_outlier_support_proportion, merge_paralogs=args.merge_paralogs, output_dir=args.output_dir, min_edge_support_sv=args.min_edge_support_sv, aln=args.aln, alr=args.alr, core=args.core, merge_single=True, depths=[1], n_cpu=args.n_cpu, quiet=args.quiet) G = nx.read_gml(args.output_dir + "final_graph.gml") for index, name in enumerate( G.graph['isolateNames'] ): #Corrects isolate name for single gff being returned as list if name == 'x': G.graph['isolateNames'][index] = filename nx.write_gml(G, args.output_dir + "final_graph.gml") #remove temporary directory if dirty = True if not args.dirty: shutil.rmtree(temp_dir) sys.exit(0)
def master(struct_save_name="ProteinDict_ten_thousand", edge_type="ligands", edge_comm_num=3, property="processes", graph_filename="Protein-Protein_Graph_Default_Name", load_graph=False, print_dict_props=False, bipart_graph=False, bipartite_filename="Bipartite_Default_Name", show_plots=False, avg_clust=False, print_graph_props=False, degree_dist=False, k_clique=False, mod_max=False, fluid=False, louv=False, k_property=20, num_k_cliques=7, num_fluid_comms=100, std_val=-0.5, k_clique_opt=False, start_k_clique_opt=3, end_k_clique_opt=10, num_trials_k=3, opt_fluid=False, start_fluid_comms=100, end_fluid_comms=300, fluid_step_size=20, fluid_num_trials=3): Structure_Dict = {} Structure_Dict = hf.readDict(struct_save_name, Structure_Dict) #Here we print out some helpful information about the dataset we are using if print_dict_props == True: avg_ligands = hf.get_mean_property(Structure_Dict, "ligands") print("Average Number of Ligands:", avg_ligands) avg_subunits = hf.get_mean_property(Structure_Dict, "subunits") print("Average Number of Subunits:", avg_subunits) avg_functions = hf.get_mean_property(Structure_Dict, "functions") print("Average Number of Functions:", avg_functions) avg_processes = hf.get_mean_property(Structure_Dict, "processes") print("Average Number of Processes:", avg_processes) # Get Total Number of Ligands, Functions, Proccesses and Subunits num_ligands = len(hf.get_all_property(Structure_Dict, "ligands")) print("Number of Ligands:", num_ligands) num_subunits = len(hf.get_all_property(Structure_Dict, "subunits")) print("Number of Subunits:", num_subunits) num_functions = len(hf.get_all_property(Structure_Dict, "functions")) print("Number of Functions:", num_functions) num_processes = len(hf.get_all_property(Structure_Dict, "processes")) print("Number of Processes:", num_processes) #Here we create a bipartite graph of ligands and proteins, which can be analyzed on its own, or used to #to create a projected graph. if bipart_graph == True: Protein_Bipartite_Graph = nx.Graph() struct_name_set = set() # Create a bipartite graph in which there are structure nodes and ligand ndoes for (struct_name, struct) in Structure_Dict.items(): struct_name_set.add(struct_name) hf.create_Edge(struct, Protein_Bipartite_Graph, property) print('Bipartite Nodes:', len(Protein_Bipartite_Graph.nodes())) print('Bipartite Edges:', len(Protein_Bipartite_Graph.edges())) nx.write_gml(Protein_Bipartite_Graph, bipartite_filename) #Here we create a new projected graph if load_graph == False: # Create a projected graph from the bipartite Protein_Graph = hf.create_projected_graph(Structure_Dict, edge_comm_num, edge_type) # Get the Giant Component of graph Protein_Graph_GC = Protein_Graph.subgraph( sorted(nx.connected_components(Protein_Graph), key=len, reverse=True)[0]) nx.write_gml(Protein_Graph, graph_filename) #If the garph has already been created, load in the graph to save time if load_graph == True: Protein_Graph = nx.read_gml(graph_filename) Protein_Graph_GC = Protein_Graph.subgraph( sorted(nx.connected_components(Protein_Graph), key=len, reverse=True)[0]) #Print out some useful informatoion about the graph if print_graph_props == True: print('Protein_Graph Nodes:', len(Protein_Graph.nodes())) print('Protein_Graph Edges:', len(Protein_Graph.edges())) print('Protein_Graph Num connected Components:', nx.number_connected_components(Protein_Graph)) print('Protein_Graph Num edges in largest Components:', len(Protein_Graph_GC.edges())) print('Protein_Graph Num nodes in largest Components:', len(Protein_Graph_GC.nodes())) # K-Clique Implementation if k_clique == True: print('Begin K_Clique') #Create a copy of the graph, which will be used when we lable nodes by community k_clique_graph = Protein_Graph_GC.copy() #You can use a predetermined k, or optimize the k for the graph if k_clique_opt == False: k_clique_comms_pre_del = nx.algorithms.community.k_clique_communities( Protein_Graph_GC, num_k_cliques) k_clique_comms_pre_del = list(list(k_clique_comms_pre_del)) else: k_clique_comms_pre_del = hf.opt_k_clique(Protein_Graph_GC, start_k_clique_opt, end_k_clique_opt, num_trials_k) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in k_clique_comms_pre_del ]) / len(k_clique_comms_pre_del) #Get the graph similiarty score K_clique_score_pre_del = hf.score_graph(k_clique_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(K_clique_score_pre_del, len(k_clique_comms_pre_del), avg_comm_pre_del, hf.num_nodes(k_clique_comms_pre_del)) #Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(k_clique_comms_pre_del, Structure_Dict, k_property, property, "K_Clique_" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) #Delete some communities based off there modularity score, and the standard deviation of community scores in the graph k_clique_comms = hf.delete_comms(Protein_Graph_GC, k_clique_comms_pre_del, std_val) #Get the graph similiarty score after deletion k_clique_score = hf.score_graph(k_clique_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in k_clique_comms]) / len(k_clique_comms) print(k_clique_score, len(k_clique_comms), avg_comm, hf.num_nodes(k_clique_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(k_clique_comms, Structure_Dict, k_property, property, "K_Clique_" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(k_clique_graph, hf.list_to_dict(k_clique_comms_pre_del), "Community") #Save the graph with nodes labled by community nx.write_gml( k_clique_graph, "K_Clique_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End K_Clique') # Modularity Maximization Implementation if mod_max == True: print('Begin Modularity Maximization') # Create a copy of the graph, which will be used when we lable nodes by community mod_graph = Protein_Graph_GC.copy() #Find communities using modularity maximization mod_max_comms_pre_del = nx.algorithms.community.modularity_max.greedy_modularity_communities( Protein_Graph_GC) mod_max_comms_pre_del = list(list(mod_max_comms_pre_del)) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in mod_max_comms_pre_del ]) / len(mod_max_comms_pre_del) # Get the graph similiarty score mod_max_score_pre_del = hf.score_graph(mod_max_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(mod_max_score_pre_del, len(mod_max_comms_pre_del), avg_comm_pre_del, hf.num_nodes(mod_max_comms_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(mod_max_comms_pre_del, Structure_Dict, k_property, property, "Mod_Max" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph mod_max_comms = hf.delete_comms(Protein_Graph_GC, mod_max_comms_pre_del, std_val) # Get the graph similiarty score after deletion mod_max_score = hf.score_graph(mod_max_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in mod_max_comms]) / len(mod_max_comms) print(mod_max_score, len(mod_max_comms), avg_comm, hf.num_nodes(mod_max_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(mod_max_comms, Structure_Dict, k_property, property, "Mod_Max" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(mod_graph, hf.list_to_dict(mod_max_comms_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( mod_graph, "Mod_Max_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Modularity Maximization') # Fluid Implementation if fluid == True: print('Begin Fluid') # Create a copy of the graph, which will be used when we lable nodes by community fluid_graph = Protein_Graph_GC.copy() # You can use a predetermined number of communities, or optimize the number of communieis for the graph if opt_fluid == False: fluid_comms_pre_del = nx.algorithms.community.asyn_fluid.asyn_fluidc( Protein_Graph_GC, num_fluid_comms) fluid_comms_pre_del = list(list(fluid_comms_pre_del)) else: fluid_comms_pre_del = hf.opt_fluid(Protein_Graph_GC, start_fluid_comms, end_fluid_comms, fluid_step_size, fluid_num_trials) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in fluid_comms_pre_del ]) / len(fluid_comms_pre_del) # Get the graph similiarty score fluid_score_pre_del = hf.score_graph(fluid_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(fluid_score_pre_del, len(fluid_comms_pre_del), avg_comm_pre_del, hf.num_nodes(fluid_comms_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(fluid_comms_pre_del, Structure_Dict, k_property, property, "Fluid" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in # the graph fluid_comms = hf.delete_comms(Protein_Graph_GC, fluid_comms_pre_del, std_val) # Get the graph similiarty score after deletion fluid_score = hf.score_graph(fluid_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in fluid_comms]) / len(fluid_comms) print(fluid_score, len(fluid_comms), avg_comm, hf.num_nodes(fluid_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(fluid_comms, Structure_Dict, k_property, property, "Fluid" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(fluid_graph, hf.list_to_dict(fluid_comms_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( fluid_graph, "Fluid_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Fluid') # louvian Implmentation if louv == True: print('Begin Louvain') # Create a copy of the graph, which will be used when we lable nodes by community louv_graph = Protein_Graph_GC.copy() #Create communities using the louvian opt_louv = hf.optimize_louv(Protein_Graph_GC, Structure_Dict, 100, 1, property, k_property) louv_comm_pre_del = hf.Get_Community(opt_louv[0]) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in louv_comm_pre_del ]) / len(louv_comm_pre_del) # Get the graph similiarty score louv_score_pre_del = hf.score_graph(louv_comm_pre_del, Structure_Dict, k_property, property, already_list=True) print(louv_score_pre_del, len(louv_comm_pre_del), avg_comm_pre_del, hf.num_nodes(louv_comm_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(louv_comm_pre_del, Structure_Dict, k_property, property, "Louv" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph louv_comms = hf.delete_comms(Protein_Graph_GC, louv_comm_pre_del, std_val) # Get the graph similiarty score after deletion louv_score = hf.score_graph(louv_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in louv_comms]) / len(louv_comms) print(louv_score, len(louv_comms), avg_comm, hf.num_nodes(louv_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(louv_comms, Structure_Dict, k_property, property, "Louv" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(louv_graph, hf.list_to_dict(louv_comm_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( louv_graph, "Louv_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Louvain') # Create Degree Distribution Plot and print out the expexted degree of the node if degree_dist == True: x, y, expected_degree = hf.degree_dist(Protein_Graph_GC) print("Expected Degree:", expected_degree) plt.figure() plt.loglog(x, y, 'bo') plt.title("Degree distribution") plt.xlabel("log(degree values)") plt.ylabel("log(degree frequencies)") plt.savefig('degree_dist_' + edge_type + '.png') plt.show() #Find the average clustering coefficient of the graph if avg_clust == True: average_clustering = nx.average_clustering(Protein_Graph_GC) print("Average Clustering Coefficient:", average_clustering)
def main (graph_name_GC): H1 = nx.read_gml(graph_name_GC) # just GC, but with Role info H1 = nx.connected_component_subgraphs(H1)[0] print len(H1.nodes()) list_R6_labels=[] dicc_label_node={} list_network_ids=[] for node in H1.nodes(): list_network_ids.append(int(H1.node[node]['label']))# this actually corresponds to the id from the users table in the DB dicc_label_node[int(H1.node[node]['label'])]=node if (H1.node[node]['role'] =="R6"): list_R6_labels.append(int(H1.node[node]['label']))# this actually corresponds to the id from the users table in the DB #print "# R6s:",len(list_R6_labels) print len(dicc_label_node) database = "calorie_king_social_networking_2010" server="tarraco.chem-eng.northwestern.edu" user="******" passwd="n1ckuDB!" db= Connection(server, database, user, passwd) query1="""select * from users""" result1 = db.query(query1) # is a list of dict. name1="GINI_coef_friendships_strenght_friendship_with_R6s.csv" file=open(name1, 'wt') print >> file,'label','ck_id','gini_friendships','gini_to_friends','gini_from_friends','sum_strength_with_R6s','sum_strength_to_R6s','sum_strength_from_R6s','tot_mess','tot_sent','tot_received', 'tot_public_mess','blog_posts','home_page','forum_posts','lesson_com','tot_act' list_blog_posts=[] list_home_page=[] list_forum_posts=[] list_lesson_com=[] list_tot_public_mess=[] dicc_ck_label={} for r1 in result1: #first i build a dicc ck_id vs. label ck_id=r1['ck_id'] label=int(r1['id']) # this corresponds to the 'label' in the gml files dicc_ck_label[ck_id]=label try: node=dicc_label_node[label] H1.node[node]['ck_id']=ck_id except KeyError: pass num_users=0. for r1 in result1: #loop over users num_users+=1. print int(num_users) ck_id=r1['ck_id'] label=int(r1['id']) # this corresponds to the 'label' in the gml files try: node=dicc_label_node[label] except KeyError: pass query2="select * from friends where (src ='"+str(ck_id)+"')or (dest ='"+str(ck_id)+"') " result2= db.query(query2) degree=len(result2) num_messg_friends=0. num_messg_to_friends=0. num_messg_from_friends=0. flag_sent=0 flag_received=0 list_weighted_to_friends=[] # one value per FRIEND of a given user list_weighted_from_friends=[] list_weighted_tot_messg_friends=[] list_weighted_to_friends_norm=[] # one value per FRIEND of a given user, normalized by the tot number of messages that user sent list_weighted_from_friends_norm=[] list_weighted_tot_messg_friends_norm=[] list_weighted_to_friends_R6s=[] list_weighted_from_friends_R6s=[] list_weighted_tot_messg_friends_R6s=[] list_weighted_to_friends_R6s_norm=[] list_weighted_from_friends_R6s_norm=[] list_weighted_tot_messg_friends_R6s_norm=[] # query3="select * from private_messages where (src_id ='"+str(ck_id)+"') " #result3= db.query(query3) #tot_sent=float(len(result3)) #query4="select * from private_messages where (dest_id ='"+str(ck_id)+"') " #result4= db.query(query4) #tot_received=float(len(result4)) query5="select * from private_messages where (src_id ='"+str(ck_id)+"')or (dest_id ='"+str(ck_id)+"') " # all messages result5= db.query(query5) num_tot_messg=float(len(result5)) tot_sent=0 tot_received=0 for r5 in result5: if r5['src_id']==ck_id: tot_sent+=1 elif r5['dest_id']==ck_id: tot_received+=1 query6="SELECT * FROM activity_combined where activity_flag != 'WI' and activity_flag != 'PM' and ck_id='"+str(ck_id)+"' " result6= db.query(query6) tot_public_mess=len(result6) query7="SELECT * FROM activity_combined where activity_flag != 'WI' and ck_id='"+str(ck_id)+"' " result7= db.query(query7) tot_activity=len(result7) blog_posts=0 home_page=0 forum_posts=0 lesson_com=0 for r6 in result6: # print r6 if r6['activity_flag']=="BC": blog_posts+=1 elif r6['activity_flag']=="HP": home_page+=1 elif r6['activity_flag']=="FP": forum_posts+=1 elif r6['activity_flag']=="LC": lesson_com+=1 list_blog_posts.append(blog_posts) list_home_page.append(home_page) list_forum_posts.append(forum_posts) list_lesson_com.append(lesson_com) list_tot_public_mess.append(tot_public_mess) #if tot_public_mess>0: print ck_id,"tot public:", tot_public_mess, "blogs:",blog_posts, "home page:",home_page, "forum:",forum_posts, "lessons",lesson_com, "tot private:", num_tot_messg,"tot sent:",tot_sent,"tot_received:",tot_received,"tot act:",tot_activity # if num_users <=500: # JUST TO TEST THE CODE if label in list_network_ids: # if the user is in the network, i check how many messages they send each other print "\n\nnode label",label,ck_id,"has degree:",H1.degree(node),"from DB",degree for f in H1.neighbors(node): messg_to_one_friend=0. #looking at a particular friend messg_from_one_friend=0. messg_one_friend=0. from_R6s=0. to_R6s=0. with_R6s=0. flag_R6_friend=0 flag_to_R6=0 flag_from_R6=0 for r5 in result5: if r5['src_id']== ck_id and r5['dest_id']== H1.node[f]['ck_id']: num_messg_to_friends+=1. num_messg_friends+=1. flag_sent=1 messg_to_one_friend+=1. messg_one_friend+=1. if H1.node[f]['role']=='R6': if H1.node[node]['R6_overlap'] >0: to_R6s+=1. with_R6s+=1. flag_R6_friend=1 flag_to_R6=1 elif r5['dest_id']== ck_id and r5['src_id']== H1.node[f]['ck_id']: num_messg_from_friends+=1. num_messg_friends+=1. flag_received=1 messg_from_one_friend+=1. messg_one_friend+=1. if H1.node[f]['role']=='R6': if H1.node[node]['R6_overlap'] >0: from_R6s+=1. with_R6s+=1. flag_R6_friend=1 flag_from_R6=1 list_weighted_to_friends.append(messg_to_one_friend) # weight of each friendship (not normalized) list_weighted_from_friends.append(messg_from_one_friend) list_weighted_tot_messg_friends.append(messg_one_friend) if flag_to_R6!=0: list_weighted_to_friends_R6s.append(to_R6s) if flag_from_R6!=0: list_weighted_from_friends_R6s.append(from_R6s) if flag_R6_friend !=0: list_weighted_tot_messg_friends_R6s.append(with_R6s) for item in list_weighted_tot_messg_friends: # normalization if sum(list_weighted_tot_messg_friends)>0: list_weighted_tot_messg_friends_norm.append(item/sum(list_weighted_tot_messg_friends)) for item in list_weighted_to_friends: if sum(list_weighted_to_friends)>0: list_weighted_to_friends_norm.append(item/sum(list_weighted_to_friends)) for item in list_weighted_from_friends: if sum(list_weighted_from_friends)>0: list_weighted_from_friends_norm.append(item/sum(list_weighted_from_friends)) for item in list_weighted_tot_messg_friends_R6s: # normalization if sum(list_weighted_tot_messg_friends)>0: list_weighted_tot_messg_friends_R6s_norm.append(item/sum(list_weighted_tot_messg_friends)) for item in list_weighted_to_friends_R6s: if sum(list_weighted_to_friends)>0: list_weighted_to_friends_R6s_norm.append(item/sum(list_weighted_to_friends)) for item in list_weighted_from_friends_R6s: if sum(list_weighted_from_friends)>0: list_weighted_from_friends_R6s_norm.append(item/sum(list_weighted_from_friends)) # i calculate how skewed friendships for a given user are: if len(list_weighted_to_friends) >0 and sum(list_weighted_to_friends)>0: Gini_to_friends=GINI_coef.calculate_GINI(list_weighted_to_friends) else: Gini_to_friends='NA' if len(list_weighted_from_friends) >0 and sum(list_weighted_from_friends)>0: Gini_from_friends=GINI_coef.calculate_GINI(list_weighted_from_friends) else: Gini_from_friends='NA' if len(list_weighted_tot_messg_friends) >0 and sum(list_weighted_tot_messg_friends)>0: Gini_friends=GINI_coef.calculate_GINI(list_weighted_tot_messg_friends) else: Gini_friends='NA' #'label','ck_id','gini_friendships','gini_to_friends','gini_from_friends','sum_strength_with_R6s','sum_strength_to_R6s','sum_strength_from_R6s','tot_mess','tot_sent','tot_received', 'tot_public_mess','blog_posts','home_page','forum_posts','lesson_com','tot_act' print >> file,label,ck_id,Gini_friends,Gini_to_friends,Gini_from_friends,sum(list_weighted_tot_messg_friends_R6s_norm),sum(list_weighted_to_friends_R6s_norm),sum(list_weighted_from_friends_R6s_norm),num_tot_messg,tot_sent,tot_received, tot_public_mess,blog_posts,home_page,forum_posts,lesson_com,tot_activity else : #if not networked node (or not GC) print >> file,label,ck_id,'NA','NA','NA','NA','NA','NA',num_tot_messg,tot_sent,tot_received, tot_public_mess,blog_posts,home_page,forum_posts,lesson_com,tot_activity
import numpy as np import pandas as pd import math import csv from gurobipy import * #define a powerset function from itertools import chain, combinations def powerset(iterable): s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) #initialize graph from file Grid = nx.read_gml("Bus30WithData.gml") Grid = nx.convert_node_labels_to_integers(Grid) #declare needed constants SteadyStatePower = 255 #in MW--the PU Basis PlanningHorizon = 7 #this is measured in shifts ShiftLength = 12 #in Hours #Define sets to be used in optimiation Nodes = list(range(0, len(Grid.nodes))) Time = list(range(0, PlanningHorizon)) RoadGrid = nx.Graph() RoadGrid.add_nodes_from(Grid.nodes) for i in Nodes: for j in Nodes: if Grid.has_edge(i, j, 1): RoadGrid.add_edge(i, j, weight=Grid[i][j][1]['length']) else:
def toptw(startDestList, myGraph, bigPointDir): # 用来记录加入到路径中的点 existList = [] routeMaxDuration = myGraph.graph['RouteMaxDuration'] threeDayPath = [] threeDayProfitList = [0, 0, 0] threeDayBestRatioList = [-1, -1, -1] for i, val in enumerate(startDestList): s = val['s'] t = val['t'] existList.append(s) existList.append(t) tP = [{'ID': s}, {'ID': t}] duration = myGraph.edges[s, t]['duration'] startSlack = 0 destSlack = routeMaxDuration - duration tP[0]['slack'] = startSlack tP[0]['aTime'] = 0 tP[0]['dTime'] = 0 tP[0]['waitTime'] = 0 tP[1]['slack'] = destSlack tP[1]['aTime'] = duration tP[1]['dTime'] = duration # 终点的等待时间,如何确定呢 tP[1]['waitTime'] = myGraph.nodes[t]['TimeWindows'][ i + 1]['opentime'] - duration if tP[1]['waitTime'] < 0: tP[1]['waitTime'] = 40 threeDayPath.append(tP) finish = False while finish == False: finish = True # top-k最优路径 # k = 5 # bestKPath = [] # bestKRatio = [] # bestKProfit = [] # bestKNodeId = [] # 寻找一个合适点进行插入 for data in myGraph.nodes.data(): node = data[1] if node['ID'] in existList: continue threeDayRatioList = [-1, -1, -1] threeDayTmpPathList = [[] for i in range(3)] threeDayTmpProfitList = [x for x in threeDayProfitList] # 遍历三条路径,选择一条最适合的插入这个点 for day, travelPath in enumerate(threeDayPath): day = day + 1 # 找一个最匹配的slack插入到它前边 bestMatch = -1 tmpWaitTime = -1 gapTime = 10000000 for index, _ in enumerate(travelPath[1:]): # index值是阶段后的列表中的索引值 index = index + 1 preComponent = travelPath[index - 1] preToCandiNodeDuration = myGraph.edges[ preComponent['ID'], node['ID']]['duration'] arriveTimeToCandiNode = preComponent[ 'dTime'] + preToCandiNodeDuration if (arriveTimeToCandiNode > routeMaxDuration or max(arriveTimeToCandiNode, node['TimeWindows'][day]['opentime']) + node['ServiceTime'] > routeMaxDuration or node['TimeWindows'][day]['closetime'] - arriveTimeToCandiNode < node['ServiceTime']): break deparTimeOnCandiNode = max( arriveTimeToCandiNode, node['TimeWindows'][day] ['opentime']) + node['ServiceTime'] curComponent = travelPath[index] candiNodeToCurNodeDuration = myGraph.edges[ node['ID'], curComponent['ID']]['duration'] arriveTimeToCurNode = deparTimeOnCandiNode + candiNodeToCurNodeDuration # 由于插入这个点增加的时间 # t1为加入的点带来的时间duraion,t2为原先的pre到cur的duration t1 = arriveTimeToCurNode - preComponent['dTime'] t2 = curComponent['aTime'] - preComponent['dTime'] deltaTime = t1 - t2 # 在slack允许的情况下找等待时间最长的点进行插入 curGapTime = curComponent['slack'] - deltaTime if curGapTime < 0: continue if travelPath[index]['waitTime'] > tmpWaitTime: bestMatch = index tmpWaitTime = travelPath[index]['waitTime'] # # 更新当前的间隔值 # if curGapTime < gapTime: # gapTime = curGapTime # bestMatch = index # 没有合适的插入点 if bestMatch == -1: continue # 计算插入后的总收益与slack的比值 tmpTotalProfit = threeDayProfitList[day - 1] + node['Profit'] tmpTravelPath = [copy.copy(x) for x in travelPath] timeParamDict = { 'arriveTimeToCandiNode': arriveTimeToCandiNode, 'deparTimeOnCandiNode': deparTimeOnCandiNode, 'arriveTimeToCurNode': arriveTimeToCurNode, 'day': day } tmpTotalSlack, tmpTravelPath = calcuSlack2( myGraph, tmpTravelPath, node, bestMatch, timeParamDict) if float(tmpTotalSlack) == 0: continue ratio = float(tmpTotalProfit) / float(tmpTotalSlack) threeDayRatioList[day - 1] = ratio threeDayTmpPathList[day - 1] = [copy.copy(x) for x in tmpTravelPath] threeDayTmpProfitList[day - 1] = tmpTotalProfit # 随机选择一天进行插入 accept = False dieTime = 0 while accept == False: randomDay = random.randrange(0, 3) if threeDayRatioList[randomDay] > threeDayBestRatioList[ randomDay]: threeDayBestRatioList[randomDay] = threeDayRatioList[ randomDay] threeDayPath[randomDay] = [ copy.copy(x) for x in threeDayTmpPathList[randomDay] ] threeDayProfitList[randomDay] = threeDayTmpProfitList[ randomDay] accept = True dieTime = dieTime + 1 if dieTime > 15: break # 取threeDayRatioList中最大值的那条路就是要插入的路径 # accept = False # for i in range(3): # if accept == False: # bestRatio = max(threeDayRatioList) # whichDay = threeDayRatioList.index(bestRatio) # if bestRatio > threeDayBestRatioList[whichDay]: # threeDayBestRatioList[whichDay] = bestRatio # threeDayPath[whichDay] = [copy.copy(x) for x in threeDayTmpPathList[whichDay]] # threeDayProfitList[whichDay] = threeDayTmpProfitList[whichDay] # accept = True # threeDayRatioList[whichDay] = -5 if accept == True: existList.append(node['ID']) finish = False shake(myGraph, threeDayPath, threeDayBestRatioList, threeDayProfitList, existList) profitSum = sum(threeDayProfitList) bigPointPaths = os.listdir(bigPointDir) for day, path in enumerate(threeDayPath): print(path) for component in path: if str(component['ID']) in bigPointPaths: smallG = nx.read_gml(bigPointDir + '\\' + str(component['ID'])) smallG = nx.convert_node_labels_to_integers(smallG) tmpP = BigPoint.dfsTraverse( smallG, component['aTime'] + component['waitTime'], component['dTime'], day + 1) profitSum = profitSum + tmpP profitSum = profitSum - myGraph.nodes[ component['ID']]['Profit'] print() print('total profit:', profitSum) return
def get_row(general_path, pattern_result, experiment_name, pattern_path): row = {} pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) parent_id = get_parent_id(os.path.join(pattern_path)) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) #get nr embeddings of exhaustive nr_emb = None sel_emb = None has_obd = True emb_stds = [] if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')): has_obd = False if os.path.exists( os.path.join(os.path.dirname(pattern_result), "selected.info")): sel_emb = extract_nr_embeddings_NS( os.path.join(os.path.dirname(pattern_result), "selected.info")) print "General path: ", general_path print os.path.join( general_path, 'exhaustive_approach', 'results_' + general_path.split('/')[-1] + '.res'), "exists?", os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + general_path.split('/')[-1] + '.res')) pattern_name = None print general_path.split('/') if general_path.split('/')[-1] == "": pattern_name = general_path.split('/')[-2] else: pattern_name = general_path.split('/')[-1] print pattern_name if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')): nr_emb, time, nr_obs = extract_nr_embeddings( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) #get the results if os.path.exists(os.path.join(pattern_result, 'monitoring')): embeddings, emb_stds, klds = get_stat( os.path.join(pattern_result, 'monitoring'), experiment_name) else: embeddings = [None] * 120 klds = [None] * 120 print "EMBEDDINGS: ", embeddings unequal_size_warning = False OBD = None if os.path.exists( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')): OBD = getOBDecomp( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')) nodes, edges = man.get_readable_text_format(pattern) print "PATTERN NAME: ", pattern_result row['pattern_name'] = pattern_result row['parent_id'] = parent_id row['nr_randvar_values'] = int(nr_randvar_values) row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = float(nx.density(pattern)) row['shape'] = man.get_graph_shape(pattern) row['max_degree'] = float(max_degree) row['avg_deg'] = float(average_degree) row['nr_targets'] = n_target_nodes if sel_emb: row['sel_emb'] = float(sel_emb) else: row['sel_emb'] = sel_emb if nr_emb: row['exh_emb'] = float(nr_emb) else: row['exh_emb'] = nr_emb row['has_obd'] = has_obd #row['unequal_size_warn']=unequal_size_warning row['OBD'] = OBD print "Nr embeddingS: ", len(embeddings) for i in xrange(0, len(embeddings)): row["emb_" + str(i + 1)] = embeddings[i] for i in xrange(0, len(emb_stds)): row["std_" + str(i + 1)] = emb_stds[i] for i in xrange(0, len(klds)): row["KLD_" + str(i + 1)] = klds[i] return row
def main(graph_name): G = nx.read_gml(graph_name) cutting_day=125 # i use this only for the filenames for_testing_fixed_set="YES" # when YES, fixed values param, to get all statistics on final distances etc # change the range for the parameters accordingly envelopes="YES" Niter=1000 # 100 iter seems to be enough (no big diff. with respect to 1000it) percent_envelope=95. list_id_weekends_T3=look_for_T3_weekends(G) # T3 doesnt share fellows in the weekend (but they are the exception) Nbins=200 # for the histogram of sum of distances all_team="NO" # as adopters or not dir_real_data='../Results/' dir="../Results/weight_shifts/infection/" delta_end=3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) if for_testing_fixed_set=="NO": output_file3="../Results/weight_shifts/Landscape_parameters_infection_"+str(Niter)+"iter_A_F_inferred.dat" file3 = open(output_file3,'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team=="YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol="../Results/Actual_evolution_adopters_from_inference.dat" file1=open(filename_actual_evol,'r') ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file=file1.readlines() list_actual_evol=[] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters= float(line.split("\t")[1]) list_actual_evol.append(num_adopters) ################################################################## #../Results/weight_shifts/infection/Average_time_evolution_Infection_training_p0.8_Immune0.3_1000iter_2012_avg_ic_day125.dat ESTOS VALORES SON EL OPTIMUM FIT THE 152-DIAS prob_min=0.4 prob_max=0.401 delta_prob=0.1 prob_Immune_min=0.50 prob_Immune_max=0.51 delta_prob_Immune=0.1 dict_filenames_tot_distance={} # i will save the filename as key and the tot distance from that curve to the original one prob_Immune=prob_Immune_min while prob_Immune<= prob_Immune_max: print "prom Immune:",prob_Immune prob_infection=prob_min while prob_infection<= prob_max: print " p:",prob_infection if for_testing_fixed_set=="YES": output_file2=dir+"Average_time_evolution_Infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_A_F_inferred.dat" else: output_file2=dir+"Average_time_evolution_Infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_A_F_inferred.dat" file2 = open(output_file2,'wt') file2.close() # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions=[] list_dist_fixed_parameters=[] list_dist_fixed_parameters_testing_segment=[] list_abs_dist_at_ending_point_fixed_parameters=[] list_dist_at_ending_point_fixed_parameters=[] list_final_num_infected=[] list_abs_dist_point_by_point_indiv_simus_to_actual=[] list_dist_point_by_point_indiv_simus_to_actual=[] # list_abs_dist_at_cutting_day=[] for iter in range(Niter): print " iter:",iter #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file_name_indiv_evol=output_file2.strip("Average_").split('.dat')[0]+"_indiv_iter"+str(iter)+".dat" # file4 = open(file_name_indiv_evol,'wt') # file4.close() ########################################## ########### set I.C. list_I=[] #list infected doctors max_order=0 for n in G.nodes(): G.node[n]["status"]="S" # all nodes are Susceptible if G.node[n]['type']=="shift": if G.node[n]['order']>max_order: max_order=G.node[n]['order'] # to get the last shift-order for the time loop else: if G.node[n]['label']=="Wunderink" or G.node[n]["label"]=="Weiss": G.node[n]["status"]="I" list_I.append(G.node[n]['label']) list_single_t_evolution=[] list_single_t_evolution.append(2.0) # I always start with TWO infected doctors!! for n in G.nodes(): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"): if G.node[n]['label']!="Wunderink" and G.node[n]["label"]!="Weiss": rand=random.random() if rand< prob_Immune: G.node[n]["status"]="Immune" ################# the dynamics starts: t=1 while t<= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type']=="shift" and G.node[n]['order']==t: shift_lenght=int(G.node[n]['shift_lenght']) if shift_lenght==2 and n not in list_id_weekends_T3: shift_lenght=1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) # print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght'] flag_possible_infection=0 for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift if G.node[doctor]["status"]=="I": flag_possible_infection=1 if flag_possible_infection: for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection for i in range(shift_lenght): # i repeat the infection process several times, to acount for shift lenght if G.node[doctor]["status"]=="S": rand=random.random() if rand<prob_infection: G.node[doctor]["status"]="I" # if G.node[doctor]["type"]=="A": # fellows participate in the dynamics, but i only consider the attendings as real adopters list_I.append(G.node[doctor]["label"]) # if for_testing_fixed_set=="YES": # if t==cutting_day: # list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_I)))) # print abs(float(list_actual_evol[-1])-float(len(list_I))), float(list_actual_evol[t]),float(len(list_I)) list_single_t_evolution.append(float(len(list_I))) t+=1 ######## end t loop ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file4 = open(file_name_indiv_evol,'at') #for i in range(len(list_single_t_evolution)): #time step by time step # print >> file4, i,list_single_t_evolution[i], prob_infection, prob_Immune #file4.close() ######################################################## list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol,list_single_t_evolution)) list_dist_fixed_parameters_testing_segment.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves_testing_segment( list_actual_evol,list_single_t_evolution, cutting_day)) list_abs_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1]-list_actual_evol[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1]-list_actual_evol[-1]) # i save the distance at the ending point between the current simu and actual evol list_final_num_infected.append(list_single_t_evolution[-1]) for index in range(len(list_single_t_evolution)): list_abs_dist_point_by_point_indiv_simus_to_actual.append(abs(list_single_t_evolution[index]-list_actual_evol[index])) list_dist_point_by_point_indiv_simus_to_actual.append(list_single_t_evolution[index]-list_actual_evol[index]) ######## end loop Niter list_pair_dist_std_delta_end=[] list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) ) list_pair_dist_std_delta_end.append(numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)) if for_testing_fixed_set=="NO": file3 = open(output_file3,'at') # i print out the landscape print >> file3, prob_infection,prob_Immune,numpy.mean(list_abs_dist_at_ending_point_fixed_parameters), numpy.mean(list_dist_fixed_parameters), numpy.mean(list_final_num_infected),numpy.std(list_final_num_infected) file3.close() if (numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[output_file2]=list_pair_dist_std_delta_end file2 = open(output_file2,'at') for s in range(len(list_single_t_evolution)): list_fixed_t=[] for iter in range (Niter): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file2, s,numpy.mean(list_fixed_t) file2.close() print "printed out: ", output_file2 # raw_input() if envelopes=="YES": calculate_envelope_set_curves.calculate_envelope(list_lists_t_evolutions,percent_envelope,"Infection",[prob_infection,prob_Immune]) if for_testing_fixed_set=="YES": num_valid_endings=0. for item in list_abs_dist_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings+=1. print "average distance of the optimum in the testing segment:",numpy.mean(list_dist_fixed_parameters),numpy.std(list_dist_fixed_parameters),list_dist_fixed_parameters,"\n" print "fraction of realizations that end within delta_doctor:",num_valid_endings/Niter,"mean ending dist:",numpy.mean(list_dist_at_ending_point_fixed_parameters), "SD final dist",numpy.std(list_dist_at_ending_point_fixed_parameters) ,list_dist_at_ending_point_fixed_parameters,"\n" histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename) histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2) histogram_filename3="../Results/weight_shifts/histogr_sum_dist_testing_segment_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_bines_gral.histograma_bins_zero(list_dist_fixed_parameters_testing_segment,Nbins,histogram_filename3) histogram_filename4="../Results/weight_shifts/histogr_abs_dist_point_by_point_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_gral_negv_posit.histograma(list_abs_dist_point_by_point_indiv_simus_to_actual,histogram_filename4) histogram_filename5="../Results/weight_shifts/histogr_dist_point_by_point_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_gral_negv_posit.histograma(list_dist_point_by_point_indiv_simus_to_actual,histogram_filename5) output_file10="../Results/weight_shifts/Summary_results_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" file10 = open(output_file10,'wt') print >> file10, "Summary results from best fit infection with",Niter, "iter, and with values for the parameters: prob_inf ",prob_infection," prob immune: ",prob_Immune,"\n" print >> file10, "average distance of the optimum in the testing segment:",numpy.mean(list_dist_fixed_parameters),numpy.std(list_dist_fixed_parameters),list_dist_fixed_parameters,"\n" print >> file10, "fraction of realizations that end within delta_doctor:",num_valid_endings/Niter,"mean ending dist:",numpy.mean(list_dist_at_ending_point_fixed_parameters), "SD final dist",numpy.std(list_dist_at_ending_point_fixed_parameters) ,list_dist_at_ending_point_fixed_parameters,"\n" print >> file10, "written optimum best fit evolution file:",output_file2 print >> file10,"written histogram file: ",histogram_filename file10.close() print "written Summary file: ",output_file10 prob_infection+= delta_prob prob_Immune+= delta_prob_Immune if for_testing_fixed_set=="NO": # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,"Infection_weight",all_team,Niter,None) # last argument doesnt apply (cutting day) if for_testing_fixed_set=="NO": print "written landscape file:",output_file3
import matplotlib.pyplot as plt import networkx as nx import numpy as np #read graph G = nx.read_gml('./../data/dolphins/dolphins.gml') #get noralized laplacian A = nx.adjacency_matrix(G).todense() deg = A.sum(axis=1) deg = np.squeeze(np.array(deg)) D_hf_inv = np.diag(deg**-0.5) D = np.diag(deg) L = D - A L_norm = np.matmul(np.matmul(D_hf_inv, L), D_hf_inv) #compute eigenvecs,eigenvalues vals, vec = np.linalg.eig(L_norm) # sort vec = vec[:, np.argsort(vals)] vals = vals[np.argsort(vals)] # use Fiedler value to find best cut to separate data based on sign community_a = vec[:, 1] > 0 #obtain list of node names V = list(G.nodes) cluster_a = [] cluster_b = [] for i, x in enumerate(community_a): if x:
def read_graph(self, filename, file_type='edgelist', separator='\t', remove_whitespace=False, header=False, headerrow=None, vtype=np.uint32, itype=np.uint32): """ Reads the graph from an edgelist, gml or graphml file and initializes the class attribute adjacency_matrix. Parameters ---------- filename : string Name of the file, for example 'JohnsHopkins.edgelist', 'JohnsHopkins.gml', 'JohnsHopkins.graphml'. file_type : string Type of file. Currently only 'edgelist', 'gml' and 'graphml' are supported. Default = 'edgelist' separator : string used if file_type = 'edgelist' Default = '\t' remove_whitespace : bool set it to be True when there is more than one kinds of separators in the file Default = False header : bool This lets the first line of the file contain a set of heade information that should be ignore_index Default = False headerrow : int Use which row as column names. This argument takes precidence over the header=True using headerrow = 0 Default = None vtype numpy integer type of CSC format index array Default = np.uint32 itype numpy integer type of CSC format index pointer array Default = np.uint32 """ if file_type == 'edgelist': #dtype = {0:'int32', 1:'int32', 2:'float64'} if header and headerrow is None: headerrow = 0 if remove_whitespace: df = pd.read_csv(filename, header=headerrow, delim_whitespace=remove_whitespace) else: df = pd.read_csv(filename, sep=separator, header=headerrow, delim_whitespace=remove_whitespace) cols = [0,1,2] if header != None: cols = list(df.columns) source = df[cols[0]].values target = df[cols[1]].values if df.shape[1] == 2: weights = np.ones(source.shape[0]) elif df.shape[1] == 3: weights = df[cols[2]].values else: raise Exception('GraphLocal.read_graph: df.shape[1] not in (2, 3)') self._num_vertices = max(source.max() + 1, target.max()+1) #self.adjacency_matrix = source, target, weights self.adjacency_matrix = sp.csr_matrix((weights.astype(np.float64), (source, target)), shape=(self._num_vertices, self._num_vertices)) elif file_type == 'gml': warnings.warn("Loading a gml is not efficient, we suggest using an edgelist format for this API.") G = nx.read_gml(filename).to_undirected() self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64) self._num_vertices = nx.number_of_nodes(G) elif file_type == 'graphml': warnings.warn("Loading a graphml is not efficient, we suggest using an edgelist format for this API.") G = nx.read_graphml(filename).to_undirected() self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64) self._num_vertices = nx.number_of_nodes(G) else: print('This file type is not supported') return self._weighted = False for i in self.adjacency_matrix.data: if i != 1: self._weighted = True break is_symmetric = (self.adjacency_matrix != self.adjacency_matrix.T).sum() == 0 if not is_symmetric: # Symmetrize matrix, choosing larger weight sel = self.adjacency_matrix.T > self.adjacency_matrix self.adjacency_matrix = self.adjacency_matrix - self.adjacency_matrix.multiply(sel) + self.adjacency_matrix.T.multiply(sel) assert (self.adjacency_matrix != self.adjacency_matrix.T).sum() == 0 self._num_edges = self.adjacency_matrix.nnz self.compute_statistics() self.ai = itype(self.adjacency_matrix.indptr) self.aj = vtype(self.adjacency_matrix.indices)