def adjlist2gexf(fAdjlist, bIntNode=1): ''' Converts a graph in the adjacency list format to the GEXF format. input parameters: fAdjlist: The file name of the adjacency list bIntNode: Indicates if the node type is integer. The default is 1 (i.e., nodes are interger type). returns: None output: This function generates an GEXF format file with the same name the input file, with .gexf extension. ''' # first, loading the graph if bIntNode==1: G = nx.read_adjlist(fAdjlist, nodetype=int) else: G = nx.read_adjlist(fAdjlist) # the output file name (fOutRoot,tmpExt) = os.path.splitext(fAdjlist) fOut = fOutRoot + '.gexf' # writing out nx.write_gexf(G, fOut)
def write_gexf(self, path, ext = '.gexf', max_edges = None): """Write as a GEXF output, suitable for Gephi input.""" filename = path + ext print "writing GeoGraph as GEXF to %s" % filename gexf = self.geo_gexf_graph(max_edges) #gexf_path = os.path.join(name, name + '_' + '_'.join(append) + ext) nx.write_gexf(gexf, filename)
def main(seed): depth = 0 global g # Connect to Neo4j graph_db = neo4j.GraphDatabaseService(NEODB) print "Starting Node Export at {0}.".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) for s in seed: query = q.format(s) neo_nodes, metadata = cypher.execute(graph_db, query) # add the node g.add_node(neo_nodes[0][0]) attr = graph_db.node(neo_nodes[0][0]).get_properties() attr = addNodeAttributes(attr) g.node[neo_nodes[0][0]] = attr complete.add(neo_nodes[0][0]) # pass them to the recursive DFS dfs_parse_nodes(neo_nodes, depth + 1) print "Saving File" nx.write_gexf(g, GEXF_FILE) print "Done at {0}.".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"))
def test_real_graph(nparts): logging.info('Reading author collab graph') author_graph = nx.read_graphml('/home/amir/az/io/spam/mgraph2.gexf') author_graph.name = 'author graph' logging.info('Reading the full author product graph') full_graph = nx.read_graphml('/home/amir/az/io/spam/spam_graph.graphml') full_graph.name = 'full graph' proper_author_graph = author_graph.subgraph([a for a in author_graph if 'revLen' in author_graph.node[a] and 'hlpful_fav_unfav' in author_graph.node[a] and 'vrf_prchs_fav_unfav' in author_graph.node[a]]) # features = {'revLen': 0.0, 'hlpful_fav_unfav': False, 'vrf_prchs_fav_unfav': False} # for a in author_graph: # for feat, def_val in features.items(): # if feat not in author_graph.node[a]: # author_graph.node[a][feat] = def_val # sub sample proper_author_graph # proper_author_graph.remove_edges_from(random.sample(proper_author_graph.edges(), 2*proper_author_graph.size()/3)) # degree = proper_author_graph.degree() # proper_author_graph.remove_nodes_from([n for n in proper_author_graph if degree[n] == 0]) # author to the product reviewed by him mapping logging.debug('forming the product mapping') author_product_mapping = {} for a in proper_author_graph: author_product_mapping[a] = [p for p in full_graph[a] if 'starRating' in full_graph[a][p] and full_graph[a][p]['starRating'] >= 4] logging.debug('Running EM') ll, partition = HardEM.run_EM(proper_author_graph, author_product_mapping, nparts=nparts, parallel=True) print 'best loglikelihood: %s' % ll for n in partition: author_graph.node[n]['cLabel'] = int(partition[n]) nx.write_gexf(author_graph, '/home/amir/az/io/spam/spam_graph_mgraph_sage_labeled.gexf')
def write_gexf(filename, graph=None, adjacency=None, attributes=None): """ Output a matrix of nodal flows to GEXF format """ if graph is None: graph = to_graph(adjacency, attributes) nx.write_gexf(graph, filename, prettyprint=True)
def spectral_clustering(G, graph_name, num_clusters): #Find a way to figure out clusters number automatically subgraphs = [] write_directory = os.path.join(Constants.SPECTRAL_PATH,graph_name) if not os.path.exists(write_directory): os.makedirs(write_directory) nodeList = G.nodes() matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList) spectral = SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="rbf") spectral.fit(matrix_data) label = spectral.labels_ clusters = {} for nodeIndex, nodeLabel in enumerate(label): if nodeLabel not in clusters: clusters[nodeLabel] = [] clusters[nodeLabel].append(nodeList[nodeIndex]) #countNodes is used to test whether we have all the nodes in the clusters for clusterIndex, subGraphNodes in enumerate(clusters.keys()): subgraph = G.subgraph(clusters[subGraphNodes]) subgraphs.append(subgraph) nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+"_I"+Constants.GEXF_FORMAT)) #countNodes = countNodes + len(clusters[subGraphNodes]) return subgraphs
def build(task): """Build network based on task""" try: graph = nx.DiGraph() with open(task["file"]) as df: for line in df: action = json.loads(line) user = action["user"] actions = action["actions"] graph.add_node(user, tweets=actions["tweets"]) #handle mentions men_counter = Counter(actions["mentions"]) for t_user, num in men_counter.iteritems(): graph.add_edge(user, t_user, mentions=num) #handle retweet retweet_counter = Counter(actions["retweets"]) for t_user, num in retweet_counter.iteritems(): graph.add_edge(user, t_user, retweets=num) #handle reply reply_counter = Counter(actions['replies']) for t_user, num in reply_counter.iteritems(): graph.add_edge(user, t_user, replies=num) nx.write_gexf(graph, task["output"]) print 'Done[%s]' % task['file'] except: print "Error:", task, sys.exc_info()[0], line
def main(mysql_db, crawl_id, output): # print crawl_id query = ( """ SELECT tags.content AS tag_content, d.content AS entity_content, start_words, frequency FROM (SELECT content, start_words, frequency, id_tag FROM (SELECT re.id_rws_entity, content, start_words, frequency FROM (SELECT id_rws_entity, frequency, start_words FROM (SELECT id_document, start_words FROM documents_crawls AS dc JOIN crawls AS c ON dc.id_crawl = c.id_crawl WHERE c.id_crawl = %s) AS a JOIN rws_entities_documents_unignored AS redu ON redu.id_document = a.id_document) AS b JOIN rws_entities AS re ON re.id_rws_entity = b.id_rws_entity) AS c JOIN rws_entities_tags AS ret ON ret.id_rws_entity = c.id_rws_entity) AS d LEFT OUTER JOIN tags ON tags.id_tag = d.id_tag ORDER BY start_words LIMIT 20 """ % crawl_id ) connection = MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWORD, db=mysql_db) cursor = connection.cursor() cursor.execute(query) res = cursor.fetchall() g = nx.Graph() for key, group in groupby(res, lambda x: x[2]): g.add_node(key, type="start_word") # print "added", key, "type : actor" for thing in group: # print thing if thing[1] != key: g.add_node(thing[1], frequency=float(thing[3]), type=thing[0]) # print "added", thing[1], "type : entity" g.add_edge(key, thing[1], weight=float(thing[3])) # print " " # filename = EXPORT_DIR + "anta-export-graph-" + str(getrandbits(128)) + ".gexf" nx.write_gexf(g, output) return
def add_lamina_LPU(config, i, lamina, manager): ''' This method adds Lamina LPU and its parameters to the manager so that it can be initialized later. -- config: configuration dictionary like object i: identifier of eye in case more than one is used lamina: lamina array object required for the generation of graph. manager: manager object to which LPU will be added generator: generator object or None ''' output_filename = config['Lamina']['output_file'] gexf_filename = config['Lamina']['gexf_file'] suffix = config['General']['file_suffix'] dt = config['General']['dt'] debug = config['Lamina']['debug'] time_sync = config['Lamina']['time_sync'] output_file = '{}{}{}.h5'.format(output_filename, i, suffix) gexf_file = '{}{}{}.gexf.gz'.format(gexf_filename, i, suffix) G = lamina.get_graph() nx.write_gexf(G, gexf_file) n_dict_ret, s_dict_ret = lLPU.lpu_parser(gexf_file) lamina_id = get_lamina_id(i) modules = [] manager.add(lLPU, lamina_id, dt, n_dict_ret, s_dict_ret, input_file=None, output_file=output_file, device=2*i+1, debug=debug, time_sync=time_sync, modules=modules, input_generator=None)
def main(): # the description link graph g = nx.read_gexf('data/subreddits_edged_by_description_links.gexf') # an empty graph for showing communities g1 = nx.Graph() communities = get_coalesced_communities(g) for c in communities: g1.add_node(c.name) g1.node[c.name]['size'] = len(c.members) count = 0 ratio_weight = 0.0 for c1, c2 in product(communities, communities): if c1.id == c2.id or g1.has_edge(c1.name, c2.name) or len(c1.members) > len(c2.members): continue overlap = len(c1.members & c2.members) if overlap > 0: g1.add_edge(c1.name, c2.name, weight=overlap / len(c1.members)) ratio_weight += overlap / len(c1.members) count += 1 average_weight_ratio = ratio_weight / count print "average weight ratio: %s" % str(average_weight_ratio) g1.remove_edges_from(filter(lambda x: x[2]['weight'] < average_weight_ratio, g1.edges(data=True))) print "%d subreddits included" % len(reduce(lambda x,y: x.union(y.members), communities, set())) nx.write_gexf(g1, 'test_coalesce.gexf')
def kmeans_cluster(G, graph_name, num_clusters): subgraphs = [] #Find a way to figure out clusters number automatically write_directory = os.path.join(Constants.KMEANS_PATH,graph_name) if not os.path.exists(write_directory): os.makedirs(write_directory) nodeList = G.nodes() matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList) kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10) kmeans.fit(matrix_data) label = kmeans.labels_ clusters = {} for nodeIndex, nodeLabel in enumerate(label): if nodeLabel not in clusters: clusters[nodeLabel] = [] clusters[nodeLabel].append(nodeList[nodeIndex]) #countNodes is used to test whether we have all the nodes in the clusters countNodes = 0 for clusterIndex, subGraphNodes in enumerate(clusters.keys()): subgraph = G.subgraph(clusters[subGraphNodes]) subgraphs.append(subgraph) nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+Constants.GEXF_FORMAT)) #countNodes = countNodes + len(clusters[subGraphNodes]) pass return num_clusters
def NXexportDoubleMatAsGraph(matrix,TElist,TEdict,TEfamilydict,nameout,namestatout): fileout=open(namestatout,'w') size=matrix.shape graph=nx.DiGraph() i=0 j=0 L=len(TElist) #k=0 #l=0 #m=0 while i<size[0]: while j<size[1]: #safest call if matrix[i,j]>0: if i>=L and j>=L: graph.add_edge("-"+TElist[i-L],"-"+TElist[j-L],weight=matrix[i,j]) fileout.write("-"+TElist[i-L]+"\t-"+TElist[j-L]+"\t"+TEfamilydict[TElist[i-L]]+"\t"+TEfamilydict[TElist[j-L]]+"\t"+str(matrix[i,j])+"\n") #k+=matrix[i,j] elif i>=L: graph.add_edge("-"+TElist[i-L],TElist[j],weight=matrix[i,j]) fileout.write("-"+TElist[i-L]+"\t"+TElist[j]+"\t"+TEfamilydict[TElist[i-L]]+"\t"+TEfamilydict[TElist[j]]+"\t"+str(matrix[i,j])+"\n") #l+=matrix[i,j] elif j>=L: graph.add_edge(TElist[i],"-"+TElist[j-L],weight=matrix[i,j]) fileout.write(TElist[i]+"\t-"+TElist[j-L]+"\t"+TEfamilydict[TElist[i]]+"\t"+TEfamilydict[TElist[j-L]]+"\t"+str(matrix[i,j])+"\n") #m+=matrix[i,j] else: graph.add_edge(TElist[i],TElist[j],weight=matrix[i,j]) fileout.write(TElist[i]+"\t"+TElist[j]+"\t"+TEfamilydict[TElist[i]]+"\t"+TEfamilydict[TElist[j]]+"\t"+str(matrix[i,j])+"\n") j+=1 j=0 i+=1 #print(k,l,m) #unitary test #pos=nx.spring_layout(G) # positions for all nodes : not need if export in gexf nx.write_gexf(graph,nameout) fileout.close()
def analyze_rdn_graph(): G = generate_random_graph(188979, 7) #nodes and nodes/edges nx.write_gexf(G, "./networks/barabasi_panel.gexf") print "Nodes:", G.number_of_nodes() print "Edges:", G.number_of_edges() analize_cliques(G) analize_degrees(G)
def scrap_dbpedia(): G = nx.DiGraph() for querie in queries: print 'Executing querie: ' + querie payload = {'query': querie, 'format': 'json'} r = requests.get("http://dbpedia.org/sparql/", params=payload) results = r.json()['results']['bindings'] for result in results: advisor = result['advisor']['value'] advisorName = result['labelAdvisor']['value'] student = result['student']['value'] studentName = result['labelStudent']['value'] print u'Advisor:', advisor, u'student:', student add_node(G, advisor, advisorName) add_node(G, student, studentName) add_edge(G, advisor, student) print '' print '-Nodes: ' print len(G.nodes()) print '-Edges: ' print len(G.edges()) print 'Writing file' nx.write_gexf(G, 'dbpedia_genealogy.gexf') print 'Done'
def main(args): egos = [] for arg in args: # to do: deal with hashtags here if arg[0] is '@': if "/" in arg: parts = arg.split("/") egos.extend(get_members_from_list(parts[0][1:],parts[1])) else: egos.append(arg[1:]) # replace egos with data = {'nodes': {}, 'edges': {}} for ego in egos: data = get_mentionball(ego,data) G = data_to_network(data) clean_ball(G) nx.write_gexf(G,"mentionball-%s.gexf" % "+".join(args).replace('/','~'))
def build_thesis_genealogy(): cnx = mysql.connector.connect(**config) cursor = cnx.cursor() query = "SELECT thesis.author_id, advisor.person_id FROM thesis, advisor WHERE thesis.id = advisor.thesis_id" cursor.execute(query) G = nx.DiGraph() for thesis in cursor: G.add_edge(thesis[1], thesis[0]) i = 0 for n in G.nodes(): try: node = str(n) G.node[n]["name"] = persons_id[node] try: G.node[n]["university"] = persons_university[node]["university"]["name"] G.node[n]["location"] = persons_university[node]["university"]["location"] i += 1 except: G.node[n]["university"] = "none" G.node[n]["location"] = "none" except: print n print "Total persons with a location:", i cursor.close() cnx.close() nx.write_gexf(G, "./networks/genealogy.gexf") return G
def build_interaction(): ids, inverse_ids = get_all_ids() G = nx.DiGraph() cnx = mysql.connector.connect(**config) cursor = cnx.cursor() query = "SELECT user_id, target_id, weight FROM interactions" cursor.execute(query) for relation in cursor: source = inverse_ids[relation[0]] target = inverse_ids[relation[1]] weight = relation[2] G.add_edge(source, target, weight = weight) cnx.close() print 'Nodes:', len(G.nodes()) print 'Edges:', len(G.edges()) nx.write_gexf(G, './sna/interactions-nonfiltered-%s-%s.gexf' % (datetime.datetime.now().month, datetime.datetime.now().day)) filter_weight(G, weight_limit = 4, degree_limit = 0) print 'Nodes:', len(G.nodes()) print 'Edges:', len(G.edges()) nx.write_gexf(G, './sna/interactions-%s-%s.gexf' % (datetime.datetime.now().month, datetime.datetime.now().day))
def main(): n = int(sys.argv[1]) out = sys.argv[2] ans = float(sys.argv[3]) if len(sys.argv) >= 4 else None G = nx.Graph() # create nodes randomly placed in [0,100)x[0,100) P = [None] * n for i in xrange(n): G.add_node(i, x=random.random() * 100, y=random.random() * 100) # create a complete weighted graph using Euclidian distances for i in xrange(n): for j in xrange(i + 1, n): G.add_edge(i, j, weight=euclidian(G, i, j)) # embed a tour with edge weight = ans (usually 0) if ans is not None: T = list(G.nodes()) random.shuffle(T) for i in xrange(1, len(T)): G.edge[T[i - 1]][T[i]]["weight"] = ans G.edge[T[-1]][T[0]]["weight"] = ans print T nx.write_gexf(G, out) print "n=%d m=%d" % (G.number_of_nodes(), G.number_of_edges())
def Gephi_Graph(r_serv, graphpath, mincard, maxcard, insert_type): """Create Gephi Graph by calling a "Sub function": Create_Graph :param r_serv: -- connexion to redis database :param graphpath: -- the absolute path of the .gephi graph created. :param mincard: -- the minimum links between 2 nodes to be created :param maxcard: -- the maximum links between 2 nodes to be created :param insert_type: -- the type of datastructure used to create the graph. In fact this function is juste here to be able to choose between two kind of Redis database structure: One which is a Sorted set and the other a simple set. """ g = nx.Graph() if (insert_type == 0): for h in r_serv.smembers("hash"): Create_Graph(r_serv, g, h, graphpath, mincard, maxcard) elif (insert_type == 2): for h in r_serv.zrange("hash", 0, -1): Create_Graph(r_serv, g, h, graphpath, mincard, maxcard) nx.write_gexf(g,graphpath) print nx.info(g)
def as_gexf(self): """ Return the graph as GEXF for download """ sio = StringIO.StringIO() nx.write_gexf(self.graph_with_metadata, sio) return sio.getvalue()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", help="path to alternate config file", default="./config.json", type=str) args = parser.parse_args() lastfm = LastFm(args.config) artists = lastfm.get_top_artists() artists = map(lambda x: x['name'], artists) similar_artists = get_similar_artists(lastfm, artists) edges = [] nodes = {} map(lambda row: row[0], similar_artists) for row in similar_artists: if row[0] not in nodes: nodes[row[0]] = Node(row[0], color=(0,0,205)) else: node = nodes[row[0]] node.set_value(node.get_value() + 1) node.set_color((0,0,205)) for similar_artist in row[1]: if similar_artist not in nodes: nodes[similar_artist] = Node(similar_artist, color=(238,0,0)) else: node = nodes[similar_artist] node.set_value(node.get_value() + 1) edges.append((row[0], similar_artist)) graph = build_graph(nodes, edges) nx.write_gexf(graph,'graph.gexf')
def build_from_single_tweet(self, tweet): """ This will save all the hashtags from a single tweet to the graph file """ entities = Extractors.getEntities(tweet) if len(entities['hashtags']) >= 1: self.load_graph() try: list_of_hashtags = entities['hashtags'] tweet_tuples = [] # Format each of the hashtags and add the tuple for tag in list_of_hashtags: tag_cleaned = str(tag['text']) tag_cleaned = tag_cleaned.lower() tweetid = tweet['id_str'] tweet_tuple = (tweetid, tag_cleaned) tweet_tuples.append(tweet_tuple) try: #Add tag to the graph. Done here so that one bad edge won't bring everything down self.graph.add_edges_from(tweet_tuples) except Exception as e: print 'Error adding edge for tweet id %s and hashtag %s. Details: %s' % (tweetid, tag_cleaned, e) #todo Add redis log of non recorded hashtags finally: # record added edges nx.write_gexf(self.graph, self.graphFile)
def dbscan_cluster(G, graph_name): write_directory = os.path.join(DBSCAN_PATH,graph_name) if not os.path.exists(write_directory): os.makedirs(write_directory) nodeList = G.nodes() matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList) #kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10) #kmeans.fit(matrix_data) #label = kmeans.labels_ #print(matrix_data) # Compute DBSCAN db = DBSCAN(eps=1, min_samples=2).fit(matrix_data) #core_samples_mask = np.zeros_like(db.labels_, dtype=bool) #core_samples_mask[db.core_sample_indices_] = True label = db.labels_ clusters = {} for nodeIndex, nodeLabel in enumerate(label): if nodeLabel not in clusters: clusters[nodeLabel] = [] clusters[nodeLabel].append(nodeList[nodeIndex]) #print(label) #print("clusters",clusters) #countNodes is used to test whether we have all the nodes in the clusters countNodes = 0 for clusterIndex, subGraphNodes in enumerate(clusters.keys()): subgraph = G.subgraph(clusters[subGraphNodes]) nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+Constants.GEXF_FORMAT)) #countNodes = countNodes + len(clusters[subGraphNodes]) pass pass
def create_graph(category): graphdict, labeldict = load_dicts() G = nx.Graph() ego = labeldict[category] categories = [category] level = 0 while True: if level == 2 or len(categories) == 0: break for cat in categories: node = labeldict[cat][0].decode('utf-8') G.add_node(node) try: subcats = graphdict[cat] for scat in subcats: subnode = labeldict[scat][0].decode('utf-8') G.add_node(subnode) G.add_edge(node,subnode) except KeyError: continue categories = subcats level += 1 # Draw graph pos=nx.spring_layout(G) nx.draw_networkx_nodes(G,pos,node_size=10,node_color='white') nx.draw_networkx_edges(G,pos,width=0.5,alpha = 0.8, edge_color = 'black') nx.draw_networkx_labels(G,pos, font_size = 12, font_family = 'sans-serif') nx.write_gexf(G,'../output/graph.gexf') plt.savefig('../output/ego_graph.png') plt.show()
def main(notify): g = nx.Graph() out_filename = "data/subreddits_edged_by_description_links.gexf" parser = HTMLParser() session = Session() query = session.query(Subreddit) dbi = DBIterator(query=query) for subreddit in dbi.results_iter(): sub = subreddit.url.split("/")[2].lower() initialize_node(g, sub) if not subreddit.description_html: continue html = parser.unescape(subreddit.description_html) for linked_sub in find_sub_links(html): if g.has_edge(sub, linked_sub): g[sub][linked_sub]["weight"] += 1 else: g.add_edge(sub, linked_sub, weight=1) nx.write_gexf(g, out_filename)
def create_genealogy(graph_id = 'deusto.aitoralmeida'): print 'Loading graph' merged = nx.read_gexf('merged_genealogy.gexf', node_type = None) print 'Loading edge index' dict_edges = load_merged_edge_index() print 'Building genealogy' to_process = [graph_id] tree = set() #get all the ascenstors in tree while len(to_process) > 0: current = to_process[0] to_process.remove(current) tree.add(current) try: to_process += dict_edges[current] except: pass print 'Creating graph' G = nx.DiGraph() for person in tree: print person G.add_node(person, name = merged.node[person]['name']) for target in merged.edge[person].keys(): #add edges with the ancestors only if target in tree: G.add_edge(person, target) print 'Writing file' nx.write_gexf(G, 'created_genealogy.gexf')
def add_lamina_LPU(config, i, lamina, manager): output_filename = config["Lamina"]["output_file"] gexf_filename = config["Lamina"]["gexf_file"] suffix = config["General"]["file_suffix"] dt = config["General"]["dt"] debug = config["Lamina"]["debug"] time_sync = config["Lamina"]["time_sync"] output_file = "{}{}{}.h5".format(output_filename, i, suffix) gexf_file = "{}{}{}.gexf.gz".format(gexf_filename, i, suffix) G = lamina.get_graph() nx.write_gexf(G, gexf_file) n_dict_ret, s_dict_ret = LPU.lpu_parser(gexf_file) lamina_id = get_lamina_id(i) modules = [] manager.add( LPU, lamina_id, dt, n_dict_ret, s_dict_ret, input_file=None, output_file=output_file, device=2 * i + 1, debug=debug, time_sync=time_sync, modules=modules, input_generator=None, )
def fetch(self, oid, format="graphml", max_age=0): # If the file was already fetched check the timestamp and overwrite graphml = os.path.join(self.cache_dir, oid+ ".graphml") graphpng = os.path.join(self.cache_dir, oid+ ".png") graphgexf = os.path.join(self.cache_dir, oid+ ".gexf") logger.debug("Fetching "+graphml) if os.path.exists(graphml): ## cache hit was old and we have to refresh it if int(time.time()) - os.path.getmtime(graphml ) > max_age: DG=nx.read_graphml(graphml) labels=dict((n,d['label']) for n,d in DG.nodes(data=True)) nx.draw_networkx(DG,labels=labels) logger.debug("Generated graph "+graphpng); plt.savefig(graphpng) nx.write_gexf(DG, graphgexf ) else: logger.debug("Cache miss"); ## cache miss we have to generate the graph, this will take time! return None if format=="graphml": return json.dumps(['URL', graphml]) elif format=="png": return json.dumps(['URL', graphpng]) elif format=="png": return json.dumps(['URL', graphgexf ])
def create_tracker_graph(): cur.execute(("SELECT r.url, r.referrer, r.top_url, c.name, c.value " "FROM http_requests as r, http_cookies as c " "WHERE r.id = c.header_id " "AND c.http_type = 'request' " "AND top_url IN " "(SELECT DISTINCT top_url FROM http_requests LIMIT 1500)")) for url, ref, top, name, value in cur.fetchall(): if ref is None or ref == '': # Empty referrer continue req_host = psl.get_public_suffix(urlparse(url).hostname) ref_host = psl.get_public_suffix(urlparse(ref).hostname) top_host = psl.get_public_suffix(urlparse(top).hostname) if top_host != ref_host: # Request that doesn't have knowledge of top url continue if ref_host == req_host: # Self loops continue if req_host == 'facebook.com': # Facebook continue # Check if identifying cookie for item in id_cookies.keys(): if req_host.endswith(item) and name in id_cookies[item]: # If so, add nodes and edge G.add_node(req_host) G.add_node(ref_host) G.add_edge(ref_host, req_host) break networkx.write_gexf(G,os.path.expanduser('~/Desktop/05062014_triton.gexf'))
def convertNetToGefx(input_file): G = None if input_file.endswith(Constants.GEXF_FORMAT): G = nx.read_gexf(input_file, None, True) elif input_file.endswith(Constants.NET_FORMAT): G=nx.Graph() f = file(input_file, 'r') # iterate over the lines in the file for line in f: # split the line into a list of column values columns = line.split('\t') # clean any whitespace off the items columns = [col.strip() for col in columns] if columns: G.add_edge(columns[0], columns[1]) #write to a gexf file, so that GHOST can read it as well gexf_path = input_file[:-len(Constants.NET_FORMAT)]+Constants.GEXF_FORMAT #add attributes to nodes in gefx file for n,d in G.nodes_iter(data=True): G.node[n]["id"] = n G.node[n]["gname"] = n nx.write_gexf(G, gexf_path) else: print("Unsupported Format") exit(0) print("For "+input_file+" Number of Nodes =", G.number_of_nodes(), "No of edges = ", G.number_of_edges()) return G
if sen == ['']: print '***************skip*****************' continue G = nx.Graph() G.add_nodes_from(sen) for w in rolling_window(sen, 4): G.add_edges_from([(w[0], w[1]), (w[0], w[2]), (w[0], w[3])]) #nx.draw(G) #op_fname ="../../graph_of_words/WebKB/graph_of_words_{}.train.gexf".format(ind) op_fname = "../../graph_of_words/R8/graph_of_words_{}.train.gexf".format( ind) print op_fname #nx.write_graphml (G,op_fname) nx.write_gexf(G, op_fname) ind += 1 del G raw_input() test_filename = '../data/R8/r8-test-no-stop.txt' f = open(test_filename, 'rU') test_sentences = f.readlines() ind = 0 for sen in test_sentences: sen = sen.replace('\n', '').split('\t')[-1].split(' ')[:-1] if sen == ['']: print '***************skip*****************' continue G = nx.Graph() G.add_nodes_from(sen) for w in rolling_window(sen, 4):
# pass print "REPLY COUNTS" print userfreq print freq print poi_retweet #print "Mention COUNTS" #print mentions_count #print usermentionsfreq #print poi_mentions G = nx.DiGraph() #A=pgv.AGraph() for edge in edges: G.add_edge(edge[0], edge[1]) #A.add_edge(edge[0],edge[1]) #A.layout() # layout with default (neato) #A.draw('simple.png') # draw png #export so you can use gephi nx.write_graphml(G, 'ed-test-replies-to.graphml') nx.write_gexf(G, 'ed-test-replies-to.gexf') # nx.draw_spring(G) # nx.draw_shell(G) nx.draw_random(G) plt.show()
positions.sort() pickle.dump(positions, writeFg, pickle.HIGHEST_PROTOCOL) if (args.writeog is not None): ogFile = open(args.writeog, 'wb') pickle.dump(og, ogFile, pickle.HIGHEST_PROTOCOL) ogFile.close() if (args.writef is not None): outf = open(args.writef, 'wb') pickle.dump(frags, outf) outf.close() positions.sort() pg = MakePG(positions, frags) nx.write_gexf(pg, "before_fixing.gexf") nFixed = FixFragments(frags, 0.6) (ref, alt) = StoreFrequency(positions, frags) nRemoved = FilterHomozygousSites(positions, ref, alt, frags, args.minAlleleFreq) pg = MakePG(positions, frags) nx.write_gexf(pg, "after_fixing.gexf") # Try a second round. ClearFragmentSupport(frags) og = BuildOverlapGraph(frags, positions, args.minOverlap) pos = {positions[i]: i for i in range(0, len(positions))} AddOverlapSupport(og, frags, pos) nFixed = FixFragments(frags, 0.6)
def _paga_graph( adata, ax, solid_edges=None, dashed_edges=None, adjacency_solid=None, adjacency_dashed=None, transitions=None, threshold=None, root=0, colors=None, labels=None, fontsize=None, fontweight=None, fontoutline=None, text_kwds: Mapping[str, Any] = MappingProxyType({}), node_size_scale=1.0, node_size_power=0.5, edge_width_scale=1.0, normalize_to_color='reference', title=None, pos=None, cmap=None, frameon=True, min_edge_width=None, max_edge_width=None, export_to_gexf=False, colorbar=None, use_raw=True, cb_kwds: Mapping[str, Any] = MappingProxyType({}), single_component=False, arrowsize=30, ): import networkx as nx node_labels = labels # rename for clarity if (node_labels is not None and isinstance(node_labels, str) and node_labels != adata.uns['paga']['groups']): raise ValueError( 'Provide a list of group labels for the PAGA groups {}, not {}.'. format(adata.uns['paga']['groups'], node_labels)) groups_key = adata.uns['paga']['groups'] if node_labels is None: node_labels = adata.obs[groups_key].cat.categories if (colors is None or colors == groups_key) and groups_key is not None: if groups_key + '_colors' not in adata.uns or len( adata.obs[groups_key].cat.categories) != len( adata.uns[groups_key + '_colors']): _utils.add_colors_for_categorical_sample_annotation( adata, groups_key) colors = adata.uns[groups_key + '_colors'] for iname, name in enumerate(adata.obs[groups_key].cat.categories): if name in settings.categories_to_ignore: colors[iname] = 'grey' nx_g_solid = nx.Graph(adjacency_solid) if dashed_edges is not None: nx_g_dashed = nx.Graph(adjacency_dashed) # convert pos to array and dict if not isinstance(pos, (Path, str)): pos_array = pos else: pos = Path(pos) if pos.suffix != '.gdf': raise ValueError( 'Currently only supporting reading positions from .gdf files. ' 'Consider generating them using, for instance, Gephi.') s = '' # read the node definition from the file with pos.open() as f: f.readline() for line in f: if line.startswith('edgedef>'): break s += line from io import StringIO df = pd.read_csv(StringIO(s), header=-1) pos_array = df[[4, 5]].values # convert to dictionary pos = {n: [p[0], p[1]] for n, p in enumerate(pos_array)} # uniform color if isinstance(colors, str) and is_color_like(colors): colors = [colors for c in range(len(node_labels))] # color degree of the graph if isinstance(colors, str) and colors.startswith('degree'): # see also tools.paga.paga_degrees if colors == 'degree_dashed': colors = [d for _, d in nx_g_dashed.degree(weight='weight')] elif colors == 'degree_solid': colors = [d for _, d in nx_g_solid.degree(weight='weight')] else: raise ValueError( '`degree` either "degree_dashed" or "degree_solid".') colors = (np.array(colors) - np.min(colors)) / (np.max(colors) - np.min(colors)) # plot gene expression var_names = adata.var_names if adata.raw is None else adata.raw.var_names if isinstance(colors, str) and colors in var_names: x_color = [] cats = adata.obs[groups_key].cat.categories for icat, cat in enumerate(cats): subset = (cat == adata.obs[groups_key]).values if adata.raw is not None and use_raw: adata_gene = adata.raw[:, colors] else: adata_gene = adata[:, colors] x_color.append(np.mean(adata_gene.X[subset])) colors = x_color # plot continuous annotation if (isinstance(colors, str) and colors in adata.obs and not is_categorical_dtype(adata.obs[colors])): x_color = [] cats = adata.obs[groups_key].cat.categories for icat, cat in enumerate(cats): subset = (cat == adata.obs[groups_key]).values x_color.append(adata.obs.loc[subset, colors].mean()) colors = x_color # plot categorical annotation if (isinstance(colors, str) and colors in adata.obs and is_categorical_dtype(adata.obs[colors])): asso_names, asso_matrix = _sc_utils.compute_association_matrix_of_groups( adata, prediction=groups_key, reference=colors, normalization='reference' if normalize_to_color else 'prediction', ) _utils.add_colors_for_categorical_sample_annotation(adata, colors) asso_colors = _sc_utils.get_associated_colors_of_groups( adata.uns[colors + '_colors'], asso_matrix) colors = asso_colors if len(colors) != len(node_labels): raise ValueError( f'Expected `colors` to be of length `{len(node_labels)}`, ' f'found `{len(colors)}`.') # count number of connected components n_components, labels = scipy.sparse.csgraph.connected_components( adjacency_solid) if n_components > 1 and not single_component: logg.debug( 'Graph has more than a single connected component. ' 'To restrict to this component, pass `single_component=True`.') if n_components > 1 and single_component: component_sizes = np.bincount(labels) largest_component = np.where( component_sizes == component_sizes.max())[0][0] adjacency_solid = adjacency_solid.tocsr()[labels == largest_component, :] adjacency_solid = adjacency_solid.tocsc()[:, labels == largest_component] colors = np.array(colors)[labels == largest_component] node_labels = np.array(node_labels)[labels == largest_component] cats_dropped = (adata.obs[groups_key].cat.categories[ labels != largest_component].tolist()) logg.info( 'Restricting graph to largest connected component by dropping categories\n' f'{cats_dropped}') nx_g_solid = nx.Graph(adjacency_solid) if dashed_edges is not None: raise ValueError( '`single_component` only if `dashed_edges` is `None`.') # edge widths base_edge_width = edge_width_scale * 5 * rcParams['lines.linewidth'] # draw dashed edges if dashed_edges is not None: widths = [x[-1]['weight'] for x in nx_g_dashed.edges(data=True)] widths = base_edge_width * np.array(widths) if max_edge_width is not None: widths = np.clip(widths, None, max_edge_width) nx.draw_networkx_edges( nx_g_dashed, pos, ax=ax, width=widths, edge_color='grey', style='dashed', alpha=0.5, ) # draw solid edges if transitions is None: widths = [x[-1]['weight'] for x in nx_g_solid.edges(data=True)] widths = base_edge_width * np.array(widths) if min_edge_width is not None or max_edge_width is not None: widths = np.clip(widths, min_edge_width, max_edge_width) with warnings.catch_warnings(): warnings.simplefilter("ignore") nx.draw_networkx_edges(nx_g_solid, pos, ax=ax, width=widths, edge_color='black') # draw directed edges else: adjacency_transitions = adata.uns['paga'][transitions].copy() if threshold is None: threshold = 0.01 adjacency_transitions.data[adjacency_transitions.data < threshold] = 0 adjacency_transitions.eliminate_zeros() g_dir = nx.DiGraph(adjacency_transitions.T) widths = [x[-1]['weight'] for x in g_dir.edges(data=True)] widths = base_edge_width * np.array(widths) if min_edge_width is not None or max_edge_width is not None: widths = np.clip(widths, min_edge_width, max_edge_width) nx.draw_networkx_edges(g_dir, pos, ax=ax, width=widths, edge_color='black', arrowsize=arrowsize) if export_to_gexf: if isinstance(colors[0], tuple): from matplotlib.colors import rgb2hex colors = [rgb2hex(c) for c in colors] for count, n in enumerate(nx_g_solid.nodes()): nx_g_solid.node[count]['label'] = str(node_labels[count]) nx_g_solid.node[count]['color'] = str(colors[count]) nx_g_solid.node[count]['viz'] = dict(position=dict( x=1000 * pos[count][0], y=1000 * pos[count][1], z=0, )) filename = settings.writedir / 'paga_graph.gexf' logg.warning(f'exporting to {filename}') settings.writedir.mkdir(parents=True, exist_ok=True) nx.write_gexf(nx_g_solid, settings.writedir / 'paga_graph.gexf') ax.set_frame_on(frameon) ax.set_xticks([]) ax.set_yticks([]) # groups sizes if groups_key is not None and groups_key + '_sizes' in adata.uns: groups_sizes = adata.uns[groups_key + '_sizes'] else: groups_sizes = np.ones(len(node_labels)) base_scale_scatter = 2000 base_pie_size = (base_scale_scatter / (np.sqrt(adjacency_solid.shape[0]) + 10) * node_size_scale) median_group_size = np.median(groups_sizes) groups_sizes = base_pie_size * np.power(groups_sizes / median_group_size, node_size_power) if fontsize is None: fontsize = rcParams['legend.fontsize'] if fontoutline is not None: text_kwds = dict(text_kwds) text_kwds['path_effects'] = [ patheffects.withStroke(linewidth=fontoutline, foreground='w') ] # usual scatter plot if not isinstance(colors[0], cabc.Mapping): n_groups = len(pos_array) sct = ax.scatter( pos_array[:, 0], pos_array[:, 1], c=colors[:n_groups], edgecolors='face', s=groups_sizes, cmap=cmap, ) for count, group in enumerate(node_labels): ax.text( pos_array[count, 0], pos_array[count, 1], group, verticalalignment='center', horizontalalignment='center', size=fontsize, fontweight=fontweight, **text_kwds, ) # else pie chart plot else: for ix, (xx, yy) in enumerate(zip(pos_array[:, 0], pos_array[:, 1])): if not isinstance(colors[ix], cabc.Mapping): raise ValueError( f'{colors[ix]} is neither a dict of valid ' 'matplotlib colors nor a valid matplotlib color.') color_single = colors[ix].keys() fracs = [colors[ix][c] for c in color_single] total = sum(fracs) if total < 1: color_single = list(color_single) color_single.append('grey') fracs.append(1 - sum(fracs)) elif not np.isclose(total, 1): raise ValueError(f'Expected fractions for node `{ix}` to be ' f'close to 1, found `{total}`.') cumsum = np.cumsum(fracs) cumsum = cumsum / cumsum[-1] cumsum = [0] + cumsum.tolist() for r1, r2, color in zip(cumsum[:-1], cumsum[1:], color_single): angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2, 20) x = [0] + np.cos(angles).tolist() y = [0] + np.sin(angles).tolist() xy = np.column_stack([x, y]) s = np.abs(xy).max() sct = ax.scatter([xx], [yy], marker=xy, s=s**2 * groups_sizes[ix], color=color) if node_labels is not None: ax.text( xx, yy, node_labels[ix], verticalalignment='center', horizontalalignment='center', size=fontsize, fontweight=fontweight, **text_kwds, ) return sct
def save_network(self): nx.write_gexf(self.G, self.directory + self.name + '.gexf')
def graph2gexf(graph): gexf = BytesIO() networkx.write_gexf(graph, gexf) return gexf.getvalue().decode("utf8")
def write_graph_in_format(graph, filename, fileformat='gexf') : if fileformat.lower() == 'json': return json.dump(json_graph.node_link_data(graph), open(filename,'w')) return nx.write_gexf(graph, filename)
G.node[k]['viz'][cle] = Visu[cle] # print G.node[k] # nx.set_node_attributes(G, 'weight', attr_dict) outputFile = ndf+network+prefix+'JS.gexf' try: os.remove(ResultGephiPath+'/'+outputFile) except: try: os.remove(ResultGephiPath+'/'+outputFile) except: pass # nx.write_gexf(G, ResultGephiPath+'/'+outputFile, version='1.2draft') fic = open(ResultGephiPath+'/'+outputFile, 'r') # Next is a hack to correct the bad writing of the header of the gexf file # with dynamics properties fictemp=open(ResultGephiPath+'/'+"Good"+outputFile, 'w') ecrit = True data = fic.read() # VERY UGLY Hack here !!!! data = data.replace('ns0:', 'viz:') # may be someone knows how to set namespace in networkx... data = data.replace('a="None"', '') # may be someone knows why network set the "a" attribute... for lig in data.split('\n'): if lig.count('<nodes>'):
# add EDGES with weight g_pos = nx.from_pandas_edgelist(rho_pos, 'keywords1', 'keywords2', 'spearman_cor') g_neg = nx.from_pandas_edgelist(rho_neg, 'keywords1', 'keywords2', 'spearman_cor') # add NODES g_pos.add_nodes_from(nodes_pos.unique(), name=nodes_pos.unique()) g_neg.add_nodes_from(nodes_neg.unique(), name=nodes_neg.unique()) # check g_pos.number_of_nodes() g_pos.number_of_edges() g_neg.number_of_nodes() g_neg.number_of_edges() # save for GEPHI nx.write_gexf(g_pos, "network_positive_correlations.gexf") nx.write_gexf(g_neg, "network_negative_correlations.gexf") # check graph connectivity #nx.is_connected(g_pos) #nx.number_connected_components(g_pos) #nx.number_connected_components(g_neg) #comps = nx.connected_component_subgraphs(g_pos) # get list top 20 nodes (based on degree) degrees = [val for (node, val) in g_pos.degree()] degrees = pd.DataFrame(degrees) #join node name degrees['name'] = nodes_pos.unique() # get Edges
if x > y: t = y y = x x = t mat[x][y] += 1 G = nx.Graph() for i in range(len(a)): for j in range(len(a)): if mat[i][j] > 5: G.add_edge(a[i], a[j], weight=mat[i][j]) for i in G.nodes.keys(): for j in c[i]: print(c[i]) if i == j: idx = c[i].index(j) G.nodes[i]["name"] = a[i][idx] nx.write_gexf(G, "/Users/samue/OneDrive/桌面/journal/test.gexf") ''' pos = nx.spring_layout(G,weight='weight') plt.figure(figsize=(25, 25)) plt.axis('off') nx.draw_networkx(G, pos=pos, with_labels=False, node_size=30, edgecolors='black', edge_color='b') plt.savefig('C:/Users/samue/OneDrive/桌面/journal/test.png') plt.show() '''
"%s:E" % g_id, label="%s:%d-%d" % (g_id, g_b, g_l), length=abs(g_b - g_l), score=-score) sg.add_edge("%s:B" % g_id, "%s:E" % f_id, label="%s:%d-%d" % (f_id, f_e, f_l), length=abs(f_e - f_l), score=-score) sg.mark_tr_edges() print sum([1 for c in sg.e_reduce.values() if c == True]) print sum([1 for c in sg.e_reduce.values() if c == False]) G = SGToNXG(sg) nx.write_adjlist(G, "full_string_graph.adj") sg.mark_best_overlap() print sum([1 for c in sg.e_reduce.values() if c == False]) #sg.mark_repeat_overlap() #print sum( [1 for c in sg.repeat_overlap.values() if c == True] ) #print sum( [1 for c in sg.repeat_overlap.values() if c == False] ) #print len(sg.e_reduce), len(sg.repeat_overlap) G = SGToNXG(sg) nx.write_gexf(G, "string_graph.gexf") nx.write_adjlist(G, "string_graph.adj") #generate_max_contig(sg, seqs, out_fn="max_tigs.fa") u_edges = generate_unitig(sg, seqs, out_fn="unitgs.fa") ASM_graph = get_bundles(u_edges) nx.write_gexf(ASM_graph, "asm_graph.gexf")
def generate_unitig(sg, seqs, out_fn, connected_nodes=None): G = SGToNXG(sg) if connected_nodes != None: connected_nodes = set(sg.nodes) out_fasta = open(out_fn, "w") nodes_for_tig = set() sg_edges = set() for v, w in sg.edges: if sg.e_reduce[(v, w)] != True: sg_edges.add((v, w)) count = 0 edges_in_tigs = set() uni_edges = {} path_f = open("paths", "w") uni_edge_f = open("unit_edges.dat", "w") while len(sg_edges) > 0: v, w = sg_edges.pop() #nodes_for_tig.remove(n) upstream_nodes = [] c_node = v p_in_edges = sg.get_in_edges_for_node(c_node) p_out_edges = sg.get_out_edges_for_node(c_node) while len(p_in_edges) == 1 and len(p_out_edges) == 1: p_node = p_in_edges[0].in_node upstream_nodes.append(p_node.name) if (p_node.name, c_node) not in sg_edges: break sg_edges.remove((p_node.name, c_node)) p_in_edges = sg.get_in_edges_for_node(p_node.name) p_out_edges = sg.get_out_edges_for_node(p_node.name) c_node = p_node.name upstream_nodes.reverse() downstream_nodes = [] c_node = w n_out_edges = sg.get_out_edges_for_node(c_node) n_in_edges = sg.get_in_edges_for_node(c_node) while len(n_out_edges) == 1 and len(n_in_edges) == 1: n_node = n_out_edges[0].out_node downstream_nodes.append(n_node.name) if (c_node, n_node.name) not in sg_edges: break sg_edges.remove((c_node, n_node.name)) n_out_edges = sg.get_out_edges_for_node(n_node.name) n_in_edges = sg.get_in_edges_for_node(n_node.name) c_node = n_node.name whole_path = upstream_nodes + [v, w] + downstream_nodes #print len(whole_path) count += 1 subseqs = [] for i in range(len(whole_path) - 1): v_n, w_n = whole_path[i:i + 2] edge = sg.edges[(v_n, w_n)] edges_in_tigs.add((v_n, w_n)) #print n, next_node.name, e.attr["label"] read_id, coor = edge.attr["label"].split(":") b, e = coor.split("-") b = int(b) e = int(e) if b < e: subseqs.append(seqs[read_id][b:e]) else: try: subseqs.append("".join( [RCMAP[c] for c in seqs[read_id][b:e:-1]])) except: print seqs[read_id] uni_edges.setdefault((whole_path[0], whole_path[-1]), []) uni_edges[(whole_path[0], whole_path[-1])].append( (whole_path, "".join(subseqs))) print >> uni_edge_f, whole_path[0], whole_path[-1], "-".join( whole_path), "".join(subseqs) print >> path_f, ">%05dc-%s-%s-%d %s" % ( count, whole_path[0], whole_path[-1], len(whole_path), " ".join(whole_path)) print >> out_fasta, ">%05dc-%s-%s-%d" % ( count, whole_path[0], whole_path[-1], len(whole_path)) print >> out_fasta, "".join(subseqs) path_f.close() uni_edge_f.close() uni_graph = nx.DiGraph() for n1, n2 in uni_edges.keys(): uni_graph.add_edge(n1, n2, weight=len(uni_edges[(n1, n2)])) nx.write_gexf(uni_graph, "uni_graph.gexf") out_fasta.close() return uni_edges
# 执行sql语句 try: with con.cursor() as cursor: sql = "select * from user" cursor.execute(sql) result = cursor.fetchall() finally: con.close() df = pd.DataFrame(result) #转换成DataFrame格式 df.head() name_data = df['name'].tolist() #add_node following_data = df['following_id'] relation_list = [] for i in range(len(df)): a = df['name'][i] #取name if following_data[i] != '[]': b = following_data[i].split(",") #取following_id第一个数据并转换为list for j in range(len(b)): d = b[j].split("'")[1] if d in name_data: relation_list.append((a, d)) #add_edge import networkx as nx G = nx.Graph() G.clear() G.add_nodes_from(name_data) G.add_edges_from(relation_list) print(G.number_of_nodes(), G.number_of_edges()) nx.write_gexf(G, 'social_network.gexf')
if not G.has_edge(advisor_name, person): G.add_edge(advisor_name, person) G.node[advisor_name]['link'] = BASE_URL + advisor_link else: print 'Already processed: ' + person return G, processed # Depth 0 builds only the trunk based on the advisors, increasing the depth # recovers more student genterations. def build_genealogy(base_person_name, base_person_link, depth=0): MAX_DEPTH = depth G = nx.DiGraph() processed = [] G, _ = get_trunk(base_person_name, base_person_link, G, processed) processed = [] for i in range(MAX_DEPTH): print 'Expanding: ' + str(i) G, processed = expand_tree(G, processed) return G if __name__ == "__main__": print 'Getting genealogy' G = build_genealogy('Andy Hopper', 'http://en.wikipedia.org/wiki/Andy_Hopper') print G.nodes() print 'Total nodes: ' + str(len(G.nodes())) nx.write_gexf(G, "./test.gexf") print 'Fin'
to_id = t['retweeted_status']['id_str'] to_user = t['retweeted_status']['user']['screen_name'] to_user_id = t['retweeted_status']['user']['id_str'] add(from_user, from_id, to_user, to_id, "retweet") if options.min_subgraph_size or options.max_subgraph_size: g_copy = G.copy() for g in networkx.connected_component_subgraphs(G): if options.min_subgraph_size and len(g) < options.min_subgraph_size: g_copy.remove_nodes_from(g.nodes()) elif options.max_subgraph_size and len(g) > options.max_subgraph_size: g_copy.remove_nodes_from(g.nodes()) G = g_copy if output.endswith(".gexf"): networkx.write_gexf(G, output) elif output.endswith(".gml"): networkx.write_gml(G, output) elif output.endswith(".dot"): nx_pydot.write_dot(G, output) elif output.endswith(".json"): json.dump(to_json(G), open(output, "w"), indent=2) elif output.endswith(".html"): graph_data = json.dumps(to_json(G), indent=2) html = """<!DOCTYPE html> <meta charset="utf-8"> <script src="https://platform.twitter.com/widgets.js"></script>
G.add_edge(current_node, nodes[link]) else: if link not in to_visit: to_visit.append(link) m = G.number_of_nodes() + 1 G.add_node(m, page=link) G.add_edge(current_node, m) nodes[link] = m else: m = nodes[link] G.add_edge(current_node, m) else: if link not in outlinks: m = G.number_of_nodes() + 1 G.add_node(m, page=link) G.add_edge(current_node, m) outlinks.append(link) nodes[link] = m else: m = nodes[link] G.add_edge(current_node, m) outcome = dict(zip(visited, responses)) nx.write_gexf(G, 'ng.gexf')
from barl_simpleoptions import SubgoalOption from barl_simpleoptions import PrimitiveOption from barl_simpleoptions import OptionAgent from two_rooms_environment import TwoRoomsEnvironment from two_rooms_state import TwoRoomsState ################################ ## Generate Interaction Graph ## ################################ # Generate state-interaction graph for this environment and save it to a file. initial_state = TwoRoomsState((0, 0)) state_transition_graph = initial_state.generate_interaction_graph( [initial_state]) nx.write_gexf(state_transition_graph, "sa_graph.gexf") ######################## ## Construct options. ## ######################## options = [] # Construct primitive options. primitive_actions = TwoRoomsState.actions for action in primitive_actions: options.append(PrimitiveOption(action)) # Construct subgoal-directed option (i.e. door subgoal). door_policy_file_path = "door_option_policy.json" door_option = SubgoalOption(TwoRoomsState((1, 3)), state_transition_graph, door_policy_file_path, 19)
def exportGraph(graph): nx.write_gexf(graph, 'graph.gexf')
def main(): sub1, sub2, DEBUG, VERBOSE, LIMIT = parse_command_line_args() if DEBUG: sub1, sub2 = '100pushups', 'MakeupAddiction' user_agent = ("reddit_sna scraper v0.1 by /u/sna_bot " "https://github.com/brianreallymany/reddit_sna") r = praw.Reddit(user_agent=user_agent) graph = nx.Graph() submissions_per_subreddit = LIMIT # Add nodes and edges for users of first subreddit if VERBOSE: print("\nAdding nodes and in_group_submissions edges for first "+\ " subreddit, " + sub1) graph = update_graph_with_subreddit_of_interest(graph, submissions_per_subreddit, sub1, r, DEBUG, VERBOSE) # Add nodes and edges for users of second subreddit # If the two subreddits have any users in common, # edges between them will be annotated with the # "in_group_submissions" field and the name of the submission if VERBOSE: print("Adding nodes and in_group_submissions edges for second "+\ "subreddit, " + sub2) graph = update_graph_with_subreddit_of_interest(graph, submissions_per_subreddit, sub2, r, DEBUG, VERBOSE) # For each user in the graph, explore previous comments # made outside of the user's "user_of" subreddit(s). # If other users from the graph are present in the same # submission, add an edge with "out_group_submissions" # and the submission permalink. if VERBOSE: print("\nNow updating graph with submissions and comments from "+\ str(len(graph.nodes())) + " users.\n") count = 1 for user in graph.nodes(): update_graph_with_user_comments(graph, user, r, (sub1, sub2), DEBUG, VERBOSE, LIMIT) count += 1 if VERBOSE: if count % 100 == 0: print("\n\t\tNow processing user " + str(count) + "\n") # Summarize graph if VERBOSE: print_graph_summary(graph) print("writing gexf...") # Write .gexf file timestamp = datetime.datetime.now().isoformat() filename = sub1 + "." + sub2 + "." filename += "limit_" + str(LIMIT) + "." filename += timestamp + ".gexf" nx.write_gexf(graph, filename) if VERBOSE: print("wrote gexf...")
def execute(self, G, epsilon=0.25, weighted=False, min_community_size=30): """ Execute Demon algorithm :param G: the networkx graph on which perform Demon :param epsilon: the tolerance required in order to merge communities :param weighted: Whether the graph is weighted or not :param min_community_size:min nodes needed to form a community """ tempo_prima_parte = 0.0 tempo_pr_pt = 0.0 time_first = time.time() nx.set_node_attributes(G, 'communities', 0) ####### self.G = G self.epsilon = epsilon self.min_community_size = min_community_size for n in self.G.nodes(): G.node[n]['communities'] = [n] self.weighted = weighted ####### all_communities = {} #LABELING print("Map Start") nodiEgo = self.G.nodes() tempo_prima_parte += time.time() - time_first if (processor == 1): dicts = map(Functor(self.G), nodiEgo) else: dicts = p.map(Functor(self.G), nodiEgo) time_second = time.time() p.join print("Map End") #MERGING print("Reduce Start") community_to_nodes_tmp = {} community_to_nodes_tmp2 = {} all_communities = [] old_communities = [] tempo_accoppiamento = 0.0 tempo_map = 0.0 tempo_scorr = 0.0 tempo_ultimo_step = 0.0 tempo_iteraz = 0.0 tempo_reale = 0.0 millecinque = False tempo_millecinque = 0.0 millequattro = False tempo_millequattro = 0.0 milletre = False tempo_milletre = 0.0 milledue = False tempo_milledue = 0.0 cont = 0 for dd in dicts: tempo_pr_pt += dd["time"] dd.pop("time", None) for k in dd.keys(): community_to_nodes_tmp[cont] = list(dd[k]) cont += 1 #writef(community_to_nodes_tmp) #community_to_nodes_tmp = readf() time_in = time.time() tempo_prima_parte += time_in - time_second first = "" second = "" k = 0 #store number of communities before merge old_len = len(community_to_nodes_tmp) if (k_max_str == "log"): k_max = int(math.log(old_len)) elif (k_max_str == "sqrt"): k_max = math.sqrt(old_len) else: k_max = 1 #k_max = 1 j = 1 while True: time_in_iteraz = time.time() i = 0 group_of_comm = [] all_communities = [] alone = "" dim_group = int(math.ceil(old_len / processor)) print(old_len, ", ", processor, ", ", dim_group) max_time = 0.0 #save groups of communities in "groups_of_comm", the number of elements for each sub_community depends on how many processing unit are present #if the number of communities is the same of the processors i consider them as a single group. if (dim_group == 1): group_of_comm = dict_split(community_to_nodes_tmp, old_len) results = {} results = merge_communities2(group_of_comm) for dd in results.keys(): if (dd == "time"): if (results[dd] > max_time): max_time = results[dd] else: all_communities.append(list(dd)) tempo_reale += max_time else: group_of_comm = dict_split(community_to_nodes_tmp, dim_group) if (group_of_comm != []): if (type(group_of_comm) == dict): results = merge_communities2(group_of_comm) elif (processor == 1): results = merge_communities2(group_of_comm) else: results = p.map(merge_communities2, group_of_comm) #put the single communities in a new list "all_communities" if (type(results) == dict): for item in results.keys(): if (item == "time"): if (results[item] > max_time): max_time = results[item] else: all_communities.append(list(item)) else: for dd in results: if (type(dd) == tuple): all_communities.append(list(dd)) else: for item in dd.keys(): if (item == "time"): if (dd[item] > max_time): max_time = dd[item] else: all_communities.append(list(item)) tempo_reale += max_time if (len(all_communities) < 12000 and millecinque == False): tempo_millecinque = time.time() millecinque = True if (len(all_communities) < 11000 and millequattro == False): tempo_millequattro = time.time() millequattro = True if (len(all_communities) < 10000 and milletre == False): tempo_milletre = time.time() milletre = True if (len(all_communities) < 9000 and milledue == False): tempo_milledue = time.time() milledue = True if (len(all_communities) == old_len): k += 1 else: k = 0 if ((len(all_communities) == old_len) and (k >= k_max or old_len <= 2)): #create vector to write at the end community_to_nodes_tmp2.clear() for dd in all_communities: community_to_nodes_tmp2[tuple(sorted(dd))] = 0 break else: old_len = len(all_communities) old_communities = list(all_communities) random.shuffle(all_communities) j += 1 community_to_nodes_tmp.clear() x = 0 #create vector for next cycle for dd in all_communities: community_to_nodes_tmp[x] = list(sorted(dd)) x += 1 time_last_step = time.time() print("Tempo-Iterazione: ", time_last_step - time_in_iteraz) tempo_iteraz += time_last_step - time_in_iteraz time_fin = time.time() time_prima_parte = time_in - time_first time_prima_parte_reale = tempo_prima_parte + tempo_pr_pt / processor time_seconda_parte = time_fin - time_in print("tempo Prima Parte: " + str(time_prima_parte)) print("tempo Prima Reale: " + str(time_prima_parte_reale)) print("tempo Seconda Parte: " + str(time_seconda_parte)) print("tempo Seconda Reale: " + str(tempo_reale)) print("Tempo Totale: " + str(time_prima_parte + time_seconda_parte)) print("Tempo Totale Reale: " + str(time_prima_parte_reale + tempo_reale)) print("tempo Iterazione medio = ", tempo_iteraz / j) print("Reduce End") #OUTPUT print("Output Start") all_communities = {} all_communities = community_to_nodes_tmp2 out_file_com = open( "communities(epsilo=" + str(self.epsilon) + "," + str(min_community_size) + ").txt", "w") idc = 0 classified = 0 num_of_members = 0 for c in community_to_nodes_tmp2.keys(): out_file_com.write("%d\t%d\t%s\n" % (idc, len(c), str(sorted(c)))) idc += 1 num_of_members += len(c) out_file_com.flush() out_file_com.close() print("Numero Membri Medio = ", num_of_members / idc) for c in community_to_nodes_tmp2.keys(): for n in c: G.node[n]['comm_color'] = str(-1) communities = list(community_to_nodes_tmp2.keys()) communities.sort(key=len) for i, c in enumerate(communities): for n in c: if (G.node[n]['comm_color'] == str(-1)): classified += 1 G.node[n]['comm_color'] = str(i) perc_nodi_scartati = str(1 - (classified / num_of_nodes)) print("Percentuale nodi scartati= " + str(1 - (classified / num_of_nodes))) for n in self.G.nodes(): G.node[n]['communities'] = n nx.write_gexf( G, file_name + "(epsilon=" + str(epsilon) + "," + str(min_community_size) + ").gexf") with open('tempiNew.csv', 'a', newline='') as csvfile: spamwriter = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow([ processor, epsilon, k_max_str, str(tempo_millecinque - time_first).replace('.', ','), str(tempo_millequattro - time_first).replace('.', ','), str(tempo_milletre - time_first).replace('.', ','), str(tempo_milledue - time_first).replace('.', ','), str(time_prima_parte).replace('.', ','), str(time_prima_parte_reale).replace('.', ','), str(time_seconda_parte).replace('.', ','), str(tempo_reale).replace('.', ','), str(time_prima_parte + time_seconda_parte).replace('.', ','), str(time_prima_parte_reale + tempo_reale).replace('.', ','), old_len, perc_nodi_scartati ]) print("Output end") return
noeud)]['label'] = noeud + '-' + attr['name'] else: print "on devrait pas être là, never", noeud #G.node[ListeNoeuds.index(noeud)]['end'] = ExtraitMinDate(G.node[ListeNoeuds.index(noeud)]) + DureeBrevet #G.node[ListeNoeuds.index(noeud)]['start'] = G.graph['defaultedgetype'] = "directed" G.graph['timeformat'] = "date" G.graph['mode'] = "dynamic" G.graph['start'] = dateMini G.graph['end'] = dateMax ndf = ndf.replace('Families', '') ndf = ndf.replace('.dump', '') nx.write_gexf(G, ResultPathGephi + '\\' + ndf + "2.gexf", version='1.2draft') fic = open(ResultPathGephi + '\\' + ndf + '2.gexf', 'r') # # Next is a hack to correct the bad writing of the header of the gexf file # with dynamics properties fictemp = open(ResultPathGephi + '\\' + "Good" + ndf + '2.gexf', 'w') fictemp.write( """<?xml version="1.0" encoding="utf-8"?><gexf version="1.2" xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance"> <graph defaultedgetype="directed" mode="dynamic" timeformat="date"> <attributes class="edge" mode="static"> <attribute id="11" title="NormedWeight" type="double" /> <attribute id="13" title="deb" type="string" /> <attribute id="14" title="fin" type="string" /> <attribute id="15" title="rel" type="string" /> </attributes>
nodes = [] links = [] for n, line in enumerate(f): node = {} link = {} source, targets = parse(line) idx = getcluster(source) node["name"] = source node["group"] = idx nodes.append(node.copy()) G.add_node(source, group=idx) for t in targets: exploded = t.split(' ') distance = exploded[1] edge = exploded[0] link["source"] = source link["target"] = edge link["weight"] = int(distance) G.add_edge(source, edge, weight=distance) links.append(link.copy()) print("{\"nodes\":", file=y) print(json.dumps(nodes), file=y) print(",", file=y) print("\"links\":", file=y) print(json.dumps(links), file=y) print("}", file=y) nx.write_gexf(G, "./data/flickr/network_flickr.gexf")
def write(self): suffix = "".join( ["_" + k + "=" + v for (k, v) in self.attributes.items()]) nx.write_gexf(self.graph, DIR_GEPHI + "papers-network" + suffix + ".gexf")
def _export_to_gexf(self, filename: str): """Save the network as .gexf file.""" nx.write_gexf(self.graph, filename)
@author: paavo.ronni """ import numpy as np import matplotlib import matplotlib.pyplot as plt import collections from tqdm import tqdm import networkx as nx G = nx.read_gexf( 'C:\OmatProjektit\ComplexNetworks\graphs\chessnetwork_joined_filtered.gexf' ) G = max(nx.connected_component_subgraphs(G), key=len) N = len(G.nodes()) BA_G = nx.barabasi_albert_graph(N, 8) print('Number of edges: {}'.format(G.number_of_edges())) print('Average degree: {}'.format(2. * G.number_of_edges() / G.number_of_nodes())) degree_sequence = [degree for node, degree in G.degree()] print('Max degree: {}'.format(max(degree_sequence))) print('Network density: {}'.format(G.number_of_edges() / (N * (N - 1) / 2.))) print('Average clustering coef.: {}'.format(nx.average_clustering(G))) nx.write_gexf(BA_G, 'barabasi_albert_graph.gexf')
pos = nx.spring_layout(G, scale=1000) print('Saving attributes...') #Add the visual attrs to each node for i in range(len(comms)): for n in comms[i]: G.node[n]['viz'] = { 'color': rgbs[i], 'position': { 'x': pos[n][0], 'y': pos[n][1] } } #Export to 'facebook.gexf' nx.write_gexf(G, './network/data/facebook.gexf') # nx.draw_graphviz(sg, prog='sfdp') # c = max_clique(sg) # print nx.info(c) # for n in sg: # sg.node[n]['name'] = n # d = json_graph.node_link_data(sg) # json.dump(d, open('force.json','w')) # nx.write_dot(sg, 'data.dot') # print nx.radius(sg) # print nx.diameter(sg)
def Smoothness(): todayDate = graphUtils.getTodayDateFolder() lastSmoothnessDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_SMOOTHNESS_DIR) lastSuggSmoothnessDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_SUGG_SMOOTHNESS_DIR) if lastSmoothnessDate: graphUtils.logger.info("Graph Smoothness done last for =" + lastSmoothnessDate) else: graphUtils.logger.info("Graph Smoothness done last for None") if lastSuggSmoothnessDate: graphUtils.logger.info("GraphSugg Smoothness done last for =" + lastSuggSmoothnessDate) else: graphUtils.logger.info("GraphSugg Smoothness done last for None") if todayDate == lastSmoothnessDate and todayDate == lastSuggSmoothnessDate: graphUtils.logger.info( "Graph Smoothness signal already done for today :" + todayDate) return True graph_path = os.path.join(graphConstants.ROOT_FOLDER, graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_MAIN) graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) write_graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) if not os.path.exists(graph_path): os.makedirs(graph_path) G = nx.read_gexf(graph_file) trainFiles, trainFileNames = graphUtils.findRecommTrainGraphFiles() trainCorpus = graphUtils.findCorpus(trainFiles) bm25obj = Bm25(trainCorpus) trainUniqueWords = [] for trainText in trainCorpus: trainUniqueWords.append(set(trainText)) if todayDate != lastSmoothnessDate: testFiles, testFileName = graphUtils.findRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) testUniqueWords = [] mini = 100 maxi = -1 count = 0 smoothness = zeros((len(testCorpus), len(trainCorpus))) for testText in testCorpus: testUniqueWords.append(set(testText)) for testDoc in range(len(testCorpus)): recomm_nodename = testFileName[testDoc] uniqueTest = testUniqueWords[testDoc] SminusDcontext = zeros(bm25obj.N) DminusScontext = zeros(bm25obj.N) for trainDoc in range(len(trainCorpus)): uniqueTrain = trainUniqueWords[trainDoc] SminusD = [ word for word in trainCorpus[trainDoc] if word not in uniqueTest ] DminusS = [ word for word in testCorpus[testDoc] if word not in uniqueTrain ] SminusDcontext = bm25obj.BM25Score(SminusD) DminusScontext = bm25obj.BM25Score(DminusS) smoothness[testDoc][trainDoc] = np.dot(SminusDcontext, DminusScontext) dict_arr = { key: value for (key, value) in enumerate(smoothness[testDoc]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename][ 'type'] = graphConstants.TYPE_GOOGLE trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT count = count + 1 #print smoothness[testDoc] graphUtils.logger.info( "Simple graph Smoothness completed for normalGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Smoothness edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_SMOOTHNESS_DIR, todayDate) pass if todayDate != lastSuggSmoothnessDate: testFiles, testFileName = graphUtils.findSuggRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) testUniqueWords = [] mini = 100 maxi = -1 count = 0 smoothness = zeros((len(testCorpus), len(trainCorpus))) for testText in testCorpus: testUniqueWords.append(set(testText)) for testDoc in range(len(testCorpus)): recomm_nodename = testFileName[testDoc] uniqueTest = testUniqueWords[testDoc] SminusDcontext = zeros(bm25obj.N) DminusScontext = zeros(bm25obj.N) for trainDoc in range(len(trainCorpus)): uniqueTrain = trainUniqueWords[trainDoc] SminusD = [ word for word in trainCorpus[trainDoc] if word not in uniqueTest ] DminusS = [ word for word in testCorpus[testDoc] if word not in uniqueTrain ] SminusDcontext = bm25obj.BM25Score(SminusD) DminusScontext = bm25obj.BM25Score(DminusS) smoothness[testDoc][trainDoc] = np.dot(SminusDcontext, DminusScontext) dict_arr = { key: value for (key, value) in enumerate(smoothness[testDoc]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename]['type'] = graphConstants.TYPE_SUGG trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT count = count + 1 #print smoothness[testDoc] graphUtils.logger.info( "Simple graph Smoothness completed for suggGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Smoothness edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_SUGG_SMOOTHNESS_DIR, todayDate) pass
add_annotations("subjects", references_article_grouped, g) add_annotations("authors", references_article_grouped, g) add_annotations("institutions", references_article_grouped, g) add_annotations("keywords", references_article_grouped, g) add_annotations("countries", references_article_grouped, g) del references_article_grouped if CONFIG["report_verbose"]: print "have now %s nodes" % len(g.nodes()) if not os.path.exists(CONFIG["output_directory"]): os.mkdir(CONFIG["output_directory"]) if CONFIG["export_ref_annotated_format"] == "gexf": if CONFIG["process_verbose"]: print "write gexf export" networkx.write_gexf( g, os.path.join(CONFIG["output_directory"], "%s_annotated.gexf" % span)) elif CONFIG["export_ref_annotated_format"] == "edgelist": if CONFIG["process_verbose"]: print "write csv export" networkx.write_weighted_edgelist(g, os.path.join( CONFIG["output_directory"], "%s_annotated.csv" % span), delimiter="\t") elif CONFIG["export_ref_annotated_format"] == "pajek": if CONFIG["process_verbose"]: print "write pajek export" networkx.write_pajek( g, os.path.join(CONFIG["output_directory"], "%s_annotated.net" % span)) elif CONFIG["export_ref_annotated_format"] == "graphml":
def main(argv): parser = argparse.ArgumentParser(description='convert Celera(R) Assembler\'s \"best.edges\" to a gexf graph file') parser.add_argument('-g','--gkp_store', help='CA gkp_store directory, (celera-assembler.gkpStore)', default="celera-assembler.gkpStore") parser.add_argument('-t','--tig_store', help='CA tig_store directory, (celera-assembler.tigStore)', default="celera-assembler.tigStore") parser.add_argument('-b','--best_edge', help='CA best edge file, (./4-unitigger/best.edges)', default="./4-unitigger/best.edges") parser.add_argument('-c','--csv_data', help='file containing arbitrary data in csv format', required=False) parser.add_argument('-o','--output', help='output gexf file, (output.gexf)', default="output") args = parser.parse_args() gkp_store = args.gkp_store tig_store = args.tig_store best_edge = args.best_edge csv = args.csv_data output = args.output G=nx.DiGraph() frg_to_tig = {} cout = {} args = shlex.split("tigStore -g %s -t %s 2 -D unitiglist" % (gkp_store, tig_store )) out = subprocess.check_output(args) out = out.split("\n") for l in out: l = l.strip().split() if len(l) == 0: continue if l[0] == "maID": continue unitig_id = int(l[0]) os.system("tigStore -g %s -t %s 2 -d frags -u %d > frag_list" % ( gkp_store, tig_store, unitig_id) ) args = shlex.split( "tigStore -g %s -t %s 2 -d frags -u %d" % ( gkp_store, tig_store, unitig_id) ) f_out = subprocess.check_output(args) f_out = f_out.split("\n") for l in f_out: """FRG 1453 179419,182165""" l = l.replace(",", " ") l = l.strip().split() if len(l) == 0: continue frg_id = l[1] frg_to_tig[frg_id] = unitig_id if(csv): with open(csv) as fin: for l in fin: l = l.strip().split(",") contig, cov, size, ref = l cout[contig] = size, cov, ref with open(best_edge) as f: for l in f: if l[0] == "#": continue l = l.strip().split() id1, lib_id, best5, o1, best3, o3, j1, j2 = l # id1, lib_id, best5, o1, best3, o3 = l try: G.add_node(id1, label="utg%s" % frg_to_tig[id1], size=int(cout["unitig_%s"%frg_to_tig[id1]][0]), cov=float(cout["unitig_%s"%frg_to_tig[id1]][1]),ref=(cout["unitig_%s"%frg_to_tig[id1]][2])) except KeyError: G.add_node(id1, label="utg%s" % frg_to_tig[id1], size=int(0), cov=float(0)) if best5 != "0": G.add_edge(best5, id1) if best3 != "0": G.add_edge(id1, best3) output_gexf = "%s.gexf" % output nx.write_gexf(G, output_gexf)
def get_embeddedness(self): DG1 = nx.MultiDiGraph() # for only following graph DG2 = nx.Graph() # for mention and following graph (undirected) DG_2 = nx.MultiDiGraph() # FOR GEPHI: for mention and following graph ################ # Get list of scientist and other public users a node (could be either scientist or public) follows ################ print("Getting following dict ...") t_start = time.time() following_dict = {} for n in range(1, 21): lines = open(path_to_following_list_folder + str(n) + '.csv', 'r').readlines() for line in lines: spline = line.rstrip('\n').split(',') key = spline[0] value = [] for n in range(1, len(spline) - 1, 2): value.append([spline[n], spline[n + 1]]) following_dict[key] = (value) print("Length of following dict: " + str(len(following_dict))) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") ################### # create graph for following_list_dict ################### t_start = time.time() lines = open(path_to_combined_mention_edges, 'r').readlines() edges_pos = [] edges_neg = [] for line in lines: spline = line.rstrip('\n').split(',') if spline[2] == 'pos': edges_pos.append([spline[0], spline[1]]) if spline[2] == 'neg': edges_neg.append([spline[0], spline[1]]) print() print("Length of positive edges (mentions): " + str(len(edges_pos))) print("Length of negative edges (mentions): " + str(len(edges_neg))) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") print () print ("Getting nodes (mention)...") t_start = time.time() nodes_temp = [] # get number of nodes from mention graph, to be used to check with final nodes count (should be the same!) for ep in edges_pos: if ep[0] not in nodes_temp: nodes_temp.append(ep[0]) if ep[1] not in nodes_temp: nodes_temp.append(ep[1]) for en in edges_neg: if en[0] not in nodes_temp: nodes_temp.append(en[0]) if en[1] not in nodes_temp: nodes_temp.append(en[1]) print("Length of nodes (from mention graph): " + str(len(nodes_temp))) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") print () print ("Getting edges (mentions+following, incl.intra-group)...") edges_pos_2 = [] # to store only following edges for key, value in following_dict.items(): for v in value: edges_pos.append([key, v[1]]) edges_pos_2.append([key, v[1]]) #print (edges_pos_2) print() print("Length of positive edges (following, incl. intra-group): " + str(len(edges_pos_2))) print() print("Length of positive edges (mentions+following): " + str(len(edges_pos))) print("Length of negative edges (mentions+following): " + str(len(edges_neg))) print("Length of total edges (mentions+following): " + str(len(edges_neg) + len(edges_pos))) # get nodes print () print ("Getting nodes (mention+following)...") t_start = time.time() nodes = [] for ep in edges_pos: if ep[0] not in nodes: nodes.append(ep[0]) if ep[1] not in nodes: nodes.append(ep[1]) for en in edges_neg: if en[0] not in nodes: nodes.append(en[0]) if en[1] not in nodes: nodes.append(en[1]) print("Length of nodes (for mention+following graph): " + str(len(nodes))) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") print () print ("Getting nodes (following, incl. intra-group)...") t_start = time.time() nodes_1 = [] # for only following graph for ep in edges_pos_2: if ep[0] not in nodes_1: nodes_1.append(ep[0]) if ep[1] not in nodes_1: nodes_1.append(ep[1]) print ("Length of nodes (following, incl. intra-group): ",len(nodes_1)) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") print () print ("Creating GEPHI graph (following, incl. intra-group)...") t_start = time.time() DG1.add_edges_from(edges_pos_2, sign='+') DG1.add_nodes_from(nodes_1) nx.write_gexf(DG1, path_to_store_following_graph) #includes intra-public and intra-scientist group following! DG2.add_edges_from(edges_pos, sign='+') DG2.add_edges_from(edges_neg, sign='-') DG2.add_nodes_from(nodes) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") # ---------------------- # CREATE GEPHI GRAPH: # sum up signs (if > 0 then positive, if < 0 then negative) if parallel edges exist (ONLY for Gephi, as it doesn't accept parallel edges) # if the sum is zero, default to negative # print () # print ("Creating GEPHI graph (mention+following, incl. intra-group)...") # # t_start = time.time() # # edges_all = edges_pos + edges_neg # # edges_set = set(map(tuple, edges_all)) # result: {[1,2], [3,4]} # edges_unique_tuple = list(edges_set) # result: [(1,2), (3,4)] # edges_unique = [list(eu) for eu in edges_unique_tuple] # convert list of tuples to list of list # # print() # print("Length of unique edges(before): " + str(len(edges_unique))) # # print (edges_unique) # # edges_unique_sum_pos = [] # edges_unique_sum_neg = [] # edges_unique_dict = {} # # for eu in edges_unique: # count_pos = edges_pos.count(eu) # count_neg = edges_neg.count(eu) # count = count_pos - count_neg # # if count > 0: # edges_unique_sum_pos.append([eu[0], eu[1], count]) # edges_unique_dict[eu[0], eu[1]] = 'pos' # # if count <= 0: # edges_unique_sum_neg.append([eu[0], eu[1], count]) # edges_unique_dict[eu[0], eu[1]] = 'neg' # # print("Length of unique edges(after): " + str(len(edges_unique_sum_pos) + len(edges_unique_sum_neg))) # # for eup in edges_unique_sum_pos: # DG_2.add_edges_from([(eup[0], eup[1])], sign='+', sentiment=eup[2]) # # for eun in edges_unique_sum_neg: # DG_2.add_edges_from([(eun[0], eun[1])], sign='-', sentiment=eun[2]) # # nx.write_gexf(DG_2, path_to_store_combined_mention_and_following_graph) # # t_end = time.time() # total_time = round(((t_end - t_start) / 60), 2) # print("Computing time was " + str(total_time) + " minutes.") # # # write to file (so that don't have to recreate this when creating triad as it takes a freaking long time) # # edges_unique_list = [] # # for key,value in edges_unique_dict.items(): # key = list(key) # key.append(value) # edges_unique_list.append(key) # # print () # print ("Writing unique edges, following edges and mentions+following edges to file...") # # f = open(path_to_store_unique_edges_file, 'w') # # for eu in edges_unique_list: # f.write(','.join(eu)+'\n') # # f.close() # ------------------------- # write to file # f = open(path_to_store_following_edges, 'w') # # for ep in edges_pos_2: # f.write(','.join(ep) + ',pos' + '\n') # # f.close() # # f = open(path_to_store_combined_mentions_and_following_edges, 'w') # # for ep in edges_pos: # f.write(','.join(ep) + ',pos' + '\n') # # for en in edges_neg: # f.write(','.join(en) + ',neg' + '\n') # # f.close() #------------------------ # get common neighbours (embeddedness) print() print ('------------------------') print ("Getting common neighbours (embeddedness) ...") t_start = time.time() lines = open(path_to_trust_links_file,'r').readlines() embeddedness = [] embedded_node_list = [] for line in lines: spline = line.rstrip('\n').split(',') embedded_list = [] common_neighbours = sorted(nx.common_neighbors(DG2, spline[0],spline[1])) embeddedness.append([spline[0],spline[1],str(len(common_neighbours))]) embedded_list.append(spline[0]) embedded_list.append(spline[1]) for n in range(len(common_neighbours)): embedded_list.insert(n+2,common_neighbours[n]) embedded_node_list.append(embedded_list) #print (embeddedness) #print (embedded_node_list) print (len(embedded_node_list)) t_end = time.time() total_time = round(((t_end - t_start) / 60), 2) print("Computing time was " + str(total_time) + " minutes.") f = open(path_to_store_embeddedness,'w') for e in embeddedness: f.write(','.join(e)+'\n') f.close() f = open(path_to_store_embeddeded_node_list, 'w') for e in embedded_node_list: f.write(','.join(e) + '\n') f.close()