Example #1
0
def adjlist2gexf(fAdjlist, bIntNode=1):
    '''
    Converts a graph in the adjacency list format to the GEXF format.

    input parameters:
          fAdjlist:   The file name of the adjacency list
          bIntNode:   Indicates if the node type is integer. The default is 1
                      (i.e., nodes are interger type).
    
    returns:
          None

    output:
          This function generates an GEXF format file with the same name the 
          input file, with .gexf extension.

    '''
    # first, loading the graph
    if bIntNode==1:
        G = nx.read_adjlist(fAdjlist, nodetype=int)
    else:
        G = nx.read_adjlist(fAdjlist)

    # the output file name
    (fOutRoot,tmpExt) = os.path.splitext(fAdjlist)
    fOut = fOutRoot + '.gexf'

    # writing out
    nx.write_gexf(G, fOut)
Example #2
0
 def write_gexf(self, path, ext = '.gexf', max_edges = None):
     """Write as a GEXF output, suitable for Gephi input."""
     filename = path + ext
     print "writing GeoGraph as GEXF to %s" % filename
     gexf = self.geo_gexf_graph(max_edges)
     #gexf_path = os.path.join(name, name + '_' + '_'.join(append) + ext)
     nx.write_gexf(gexf, filename)
def main(seed):
    depth = 0
    global g

    # Connect to Neo4j
    graph_db = neo4j.GraphDatabaseService(NEODB)

    print "Starting Node Export at {0}.".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"))

    for s in seed:
        query = q.format(s)
        neo_nodes, metadata = cypher.execute(graph_db, query)

        # add the node
        g.add_node(neo_nodes[0][0])
        attr = graph_db.node(neo_nodes[0][0]).get_properties()
        attr = addNodeAttributes(attr)
        g.node[neo_nodes[0][0]] = attr
        complete.add(neo_nodes[0][0])

        # pass them to the recursive DFS
        dfs_parse_nodes(neo_nodes, depth + 1)

    print "Saving File"
    nx.write_gexf(g, GEXF_FILE)

    print "Done at {0}.".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"))
Example #4
0
def test_real_graph(nparts):
    logging.info('Reading author collab graph')
    author_graph = nx.read_graphml('/home/amir/az/io/spam/mgraph2.gexf')
    author_graph.name = 'author graph'
    logging.info('Reading the full author product graph')
    full_graph = nx.read_graphml('/home/amir/az/io/spam/spam_graph.graphml')
    full_graph.name = 'full graph'

    proper_author_graph = author_graph.subgraph([a for a in author_graph if 'revLen' in author_graph.node[a]
                                                                            and 'hlpful_fav_unfav' in author_graph.node[a]
                                                                            and 'vrf_prchs_fav_unfav' in author_graph.node[a]])
    # features = {'revLen': 0.0, 'hlpful_fav_unfav': False, 'vrf_prchs_fav_unfav': False}
    # for a in author_graph:
    #     for feat, def_val in features.items():
    #         if feat not in author_graph.node[a]:
    #             author_graph.node[a][feat] = def_val

    # sub sample proper_author_graph
    # proper_author_graph.remove_edges_from(random.sample(proper_author_graph.edges(), 2*proper_author_graph.size()/3))
    # degree = proper_author_graph.degree()
    # proper_author_graph.remove_nodes_from([n for n in proper_author_graph if degree[n] == 0])
    # author to the product reviewed by him mapping
    logging.debug('forming the product mapping')
    author_product_mapping = {}
    for a in proper_author_graph:
        author_product_mapping[a] = [p for p in full_graph[a] if 'starRating' in full_graph[a][p] and
                                                                 full_graph[a][p]['starRating'] >= 4]
    logging.debug('Running EM')
    ll, partition = HardEM.run_EM(proper_author_graph, author_product_mapping, nparts=nparts, parallel=True)
    print 'best loglikelihood: %s' % ll
    for n in partition:
        author_graph.node[n]['cLabel'] = int(partition[n])
    nx.write_gexf(author_graph, '/home/amir/az/io/spam/spam_graph_mgraph_sage_labeled.gexf')
Example #5
0
def write_gexf(filename, graph=None, adjacency=None, attributes=None):
    """
    Output a matrix of nodal flows to GEXF format
    """
    if graph is None:
        graph = to_graph(adjacency, attributes)
    nx.write_gexf(graph, filename, prettyprint=True)
def spectral_clustering(G, graph_name, num_clusters):
    #Find a way to figure out clusters number automatically
    subgraphs = []
    write_directory = os.path.join(Constants.SPECTRAL_PATH,graph_name)
    if not os.path.exists(write_directory):
        os.makedirs(write_directory)
    nodeList = G.nodes()
    matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList)
    spectral = SpectralClustering(n_clusters=2,
                                          eigen_solver='arpack',
                                          affinity="rbf")   
    spectral.fit(matrix_data)
    label = spectral.labels_
    clusters = {}
    
    for nodeIndex, nodeLabel in enumerate(label):
        if nodeLabel not in clusters:
            clusters[nodeLabel] = []
        clusters[nodeLabel].append(nodeList[nodeIndex])
        
    #countNodes is used to test whether we have all the nodes in the clusters 
   
    for clusterIndex, subGraphNodes in enumerate(clusters.keys()):
        subgraph = G.subgraph(clusters[subGraphNodes])
        subgraphs.append(subgraph)
        nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+"_I"+Constants.GEXF_FORMAT))
        #countNodes = countNodes + len(clusters[subGraphNodes])
    return subgraphs
def build(task):
    """Build network based on task"""
    try:
        graph = nx.DiGraph()
        with open(task["file"]) as df:
            for line in df:
                action = json.loads(line)
                user = action["user"]
                actions = action["actions"]
                graph.add_node(user, tweets=actions["tweets"])

                #handle mentions
                men_counter = Counter(actions["mentions"])
                for t_user, num in men_counter.iteritems():
                    graph.add_edge(user, t_user, mentions=num)

                #handle retweet
                retweet_counter = Counter(actions["retweets"])
                for t_user, num in retweet_counter.iteritems():
                    graph.add_edge(user, t_user, retweets=num)

                #handle reply
                reply_counter = Counter(actions['replies'])
                for t_user, num in reply_counter.iteritems():
                    graph.add_edge(user, t_user, replies=num)
        nx.write_gexf(graph, task["output"])
        print 'Done[%s]' % task['file']
    except:
        print "Error:", task, sys.exc_info()[0], line
Example #8
0
def main(mysql_db, crawl_id, output):
    # print crawl_id
    query = (
        """
    SELECT tags.content AS tag_content, 
           d.content    AS entity_content, 
           start_words, 
           frequency 
    FROM   (SELECT content, 
                   start_words, 
                   frequency, 
                   id_tag 
            FROM   (SELECT re.id_rws_entity, 
                           content, 
                           start_words, 
                           frequency 
                    FROM   (SELECT id_rws_entity, 
                                   frequency, 
                                   start_words 
                            FROM   (SELECT id_document, 
                                           start_words 
                                    FROM   documents_crawls AS dc 
                                           JOIN crawls AS c 
                                             ON dc.id_crawl = c.id_crawl 
                                    WHERE  c.id_crawl = %s) AS a 
                                   JOIN rws_entities_documents_unignored AS redu 
                                     ON redu.id_document = a.id_document) AS b 
                           JOIN rws_entities AS re 
                             ON re.id_rws_entity = b.id_rws_entity) AS c 
                   JOIN rws_entities_tags AS ret 
                     ON ret.id_rws_entity = c.id_rws_entity) AS d 
    LEFT OUTER JOIN tags 
    ON tags.id_tag = d.id_tag  
    ORDER BY start_words
    LIMIT 20
             """
        % crawl_id
    )

    connection = MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWORD, db=mysql_db)
    cursor = connection.cursor()
    cursor.execute(query)

    res = cursor.fetchall()

    g = nx.Graph()

    for key, group in groupby(res, lambda x: x[2]):
        g.add_node(key, type="start_word")
        # print "added", key, "type : actor"
        for thing in group:
            # print thing
            if thing[1] != key:
                g.add_node(thing[1], frequency=float(thing[3]), type=thing[0])
            # print "added", thing[1], "type : entity"
            g.add_edge(key, thing[1], weight=float(thing[3]))
        # print " "
    # filename = EXPORT_DIR + "anta-export-graph-" + str(getrandbits(128)) + ".gexf"
    nx.write_gexf(g, output)
    return
Example #9
0
def add_lamina_LPU(config, i, lamina, manager):
    '''
        This method adds Lamina LPU and its parameters to the manager
        so that it can be initialized later.

        --
        config: configuration dictionary like object
        i: identifier of eye in case more than one is used
        lamina: lamina array object required for the generation of
            graph.
        manager: manager object to which LPU will be added
        generator: generator object or None
    '''

    output_filename = config['Lamina']['output_file']
    gexf_filename = config['Lamina']['gexf_file']
    suffix = config['General']['file_suffix']

    dt = config['General']['dt']
    debug = config['Lamina']['debug']
    time_sync = config['Lamina']['time_sync']

    output_file = '{}{}{}.h5'.format(output_filename, i, suffix)
    gexf_file = '{}{}{}.gexf.gz'.format(gexf_filename, i, suffix)
    G = lamina.get_graph()
    nx.write_gexf(G, gexf_file)

    n_dict_ret, s_dict_ret = lLPU.lpu_parser(gexf_file)
    lamina_id = get_lamina_id(i)
    modules = []
    manager.add(lLPU, lamina_id, dt, n_dict_ret, s_dict_ret,
                input_file=None, output_file=output_file,
                device=2*i+1, debug=debug, time_sync=time_sync,
                modules=modules, input_generator=None)
def main():
    # the description link graph
    g = nx.read_gexf('data/subreddits_edged_by_description_links.gexf')

    # an empty graph for showing communities
    g1 = nx.Graph()

    communities = get_coalesced_communities(g)
    for c in communities:
        g1.add_node(c.name)
        g1.node[c.name]['size'] = len(c.members)

    count = 0
    ratio_weight = 0.0

    for c1, c2 in product(communities, communities):
        if c1.id == c2.id or g1.has_edge(c1.name, c2.name) or len(c1.members) > len(c2.members):
            continue
        
        overlap = len(c1.members & c2.members)

        if overlap > 0:
            g1.add_edge(c1.name, c2.name, weight=overlap / len(c1.members))
            ratio_weight += overlap / len(c1.members)
            count += 1

    average_weight_ratio = ratio_weight / count
    print "average weight ratio: %s" % str(average_weight_ratio)

    g1.remove_edges_from(filter(lambda x: x[2]['weight'] < average_weight_ratio, g1.edges(data=True)))

    print "%d subreddits included" % len(reduce(lambda x,y: x.union(y.members), communities, set()))
    nx.write_gexf(g1, 'test_coalesce.gexf')
def kmeans_cluster(G, graph_name, num_clusters):
    subgraphs = []
    #Find a way to figure out clusters number automatically
    write_directory = os.path.join(Constants.KMEANS_PATH,graph_name)
    if not os.path.exists(write_directory):
        os.makedirs(write_directory)
    nodeList = G.nodes()
    matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList)
    kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)   
    kmeans.fit(matrix_data)
    label = kmeans.labels_
    clusters = {}
    
    for nodeIndex, nodeLabel in enumerate(label):
        if nodeLabel not in clusters:
            clusters[nodeLabel] = []
        clusters[nodeLabel].append(nodeList[nodeIndex])
        
    #countNodes is used to test whether we have all the nodes in the clusters 
    countNodes = 0    
    for clusterIndex, subGraphNodes in enumerate(clusters.keys()):
        subgraph = G.subgraph(clusters[subGraphNodes])
        subgraphs.append(subgraph)
        nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+Constants.GEXF_FORMAT))
        #countNodes = countNodes + len(clusters[subGraphNodes])
        pass
    return num_clusters
def NXexportDoubleMatAsGraph(matrix,TElist,TEdict,TEfamilydict,nameout,namestatout):
	fileout=open(namestatout,'w')
	size=matrix.shape
	graph=nx.DiGraph()
	i=0
	j=0
	L=len(TElist)
	#k=0
	#l=0
	#m=0
	while i<size[0]:
		while j<size[1]: #safest call
			if matrix[i,j]>0:
				if i>=L and j>=L:
					graph.add_edge("-"+TElist[i-L],"-"+TElist[j-L],weight=matrix[i,j])
					fileout.write("-"+TElist[i-L]+"\t-"+TElist[j-L]+"\t"+TEfamilydict[TElist[i-L]]+"\t"+TEfamilydict[TElist[j-L]]+"\t"+str(matrix[i,j])+"\n")
					#k+=matrix[i,j]
				elif i>=L:
					graph.add_edge("-"+TElist[i-L],TElist[j],weight=matrix[i,j])
					fileout.write("-"+TElist[i-L]+"\t"+TElist[j]+"\t"+TEfamilydict[TElist[i-L]]+"\t"+TEfamilydict[TElist[j]]+"\t"+str(matrix[i,j])+"\n")
					#l+=matrix[i,j]
				elif j>=L:
					graph.add_edge(TElist[i],"-"+TElist[j-L],weight=matrix[i,j])
					fileout.write(TElist[i]+"\t-"+TElist[j-L]+"\t"+TEfamilydict[TElist[i]]+"\t"+TEfamilydict[TElist[j-L]]+"\t"+str(matrix[i,j])+"\n")
					#m+=matrix[i,j]
				else:
					graph.add_edge(TElist[i],TElist[j],weight=matrix[i,j])
					fileout.write(TElist[i]+"\t"+TElist[j]+"\t"+TEfamilydict[TElist[i]]+"\t"+TEfamilydict[TElist[j]]+"\t"+str(matrix[i,j])+"\n")
			j+=1
		j=0
		i+=1
	#print(k,l,m) #unitary test
	#pos=nx.spring_layout(G) # positions for all nodes : not need if export in gexf
	nx.write_gexf(graph,nameout)
	fileout.close()
Example #13
0
def analyze_rdn_graph():
    G = generate_random_graph(188979, 7) #nodes and nodes/edges
    nx.write_gexf(G, "./networks/barabasi_panel.gexf")
    print "Nodes:", G.number_of_nodes()
    print "Edges:", G.number_of_edges()
    analize_cliques(G)
    analize_degrees(G)
def scrap_dbpedia():
    G = nx.DiGraph()
    
    for querie in queries:
        print 'Executing querie: ' + querie
        payload = {'query': querie,
                    'format': 'json'}
        r = requests.get("http://dbpedia.org/sparql/", params=payload)
        results = r.json()['results']['bindings']
        for result in results:
            advisor = result['advisor']['value']
            advisorName = result['labelAdvisor']['value']
            student = result['student']['value']
            studentName = result['labelStudent']['value']
            print u'Advisor:', advisor, u'student:', student
            add_node(G, advisor, advisorName)
            add_node(G, student, studentName)
            add_edge(G, advisor, student)
    
    print ''
    print '-Nodes: '
    print len(G.nodes())
    print '-Edges: '
    print len(G.edges())
    print 'Writing file'
    nx.write_gexf(G, 'dbpedia_genealogy.gexf')
    print 'Done'
Example #15
0
def main(args):

    egos = []

    for arg in args:
        # to do: deal with hashtags here
        if arg[0] is '@':
            if "/" in arg:
                parts = arg.split("/")
                egos.extend(get_members_from_list(parts[0][1:],parts[1]))
            else:
                egos.append(arg[1:])

    
    # replace egos with 

    data = {'nodes': {}, 'edges': {}}

    for ego in egos:
        data = get_mentionball(ego,data)

    G = data_to_network(data)

    clean_ball(G)

    nx.write_gexf(G,"mentionball-%s.gexf" % "+".join(args).replace('/','~'))
Example #16
0
def build_thesis_genealogy():
    cnx = mysql.connector.connect(**config)
    cursor = cnx.cursor()    
    query = "SELECT thesis.author_id, advisor.person_id FROM thesis, advisor WHERE thesis.id = advisor.thesis_id"
    cursor.execute(query)
    G = nx.DiGraph()
    for thesis in cursor:
        G.add_edge(thesis[1], thesis[0])
        
    i = 0        
    for n in G.nodes():
        try:
            node = str(n)
            G.node[n]["name"] = persons_id[node]
            try:
                G.node[n]["university"] = persons_university[node]["university"]["name"]
                G.node[n]["location"] = persons_university[node]["university"]["location"]
                i += 1
            except:
                G.node[n]["university"] = "none"
                G.node[n]["location"] = "none"
        except:
            print n
    
    print "Total persons with a location:", i
    cursor.close()
    cnx.close()
        
    nx.write_gexf(G, "./networks/genealogy.gexf")
    return G
def build_interaction():
    ids, inverse_ids = get_all_ids()
    G = nx.DiGraph()
    cnx = mysql.connector.connect(**config)
    cursor = cnx.cursor()    
    query = "SELECT user_id, target_id, weight FROM interactions"
    cursor.execute(query)
    for relation in cursor:
        source = inverse_ids[relation[0]]
        target = inverse_ids[relation[1]]
        weight = relation[2]
        G.add_edge(source, target, weight = weight)
            
    cnx.close()
    
    print 'Nodes:', len(G.nodes())
    print 'Edges:', len(G.edges())
    nx.write_gexf(G, './sna/interactions-nonfiltered-%s-%s.gexf' % (datetime.datetime.now().month, datetime.datetime.now().day))    
    
    filter_weight(G, weight_limit = 4, degree_limit = 0)
    
    print 'Nodes:', len(G.nodes())
    print 'Edges:', len(G.edges())

    
    nx.write_gexf(G, './sna/interactions-%s-%s.gexf' % (datetime.datetime.now().month, datetime.datetime.now().day))
def main():
    n = int(sys.argv[1])
    out = sys.argv[2]
    ans = float(sys.argv[3]) if len(sys.argv) >= 4 else None
    G = nx.Graph()

    # create nodes randomly placed in [0,100)x[0,100)
    P = [None] * n
    for i in xrange(n):
        G.add_node(i, x=random.random() * 100, y=random.random() * 100)

    # create a complete weighted graph using Euclidian distances
    for i in xrange(n):
        for j in xrange(i + 1, n):
            G.add_edge(i, j, weight=euclidian(G, i, j))

    # embed a tour with edge weight = ans (usually 0)
    if ans is not None:
        T = list(G.nodes())
        random.shuffle(T)
        for i in xrange(1, len(T)):
            G.edge[T[i - 1]][T[i]]["weight"] = ans
        G.edge[T[-1]][T[0]]["weight"] = ans
        print T

    nx.write_gexf(G, out)
    print "n=%d m=%d" % (G.number_of_nodes(), G.number_of_edges())
Example #19
0
def Gephi_Graph(r_serv, graphpath, mincard, maxcard, insert_type):
    """Create Gephi Graph by calling a "Sub function": Create_Graph

    :param r_serv: -- connexion to redis database
    :param graphpath: -- the absolute path of the .gephi graph created.
    :param mincard: -- the minimum links between 2 nodes to be created
    :param maxcard: -- the maximum links between 2 nodes to be created
    :param insert_type: -- the type of datastructure used to create the graph.

    In fact this function is juste here to be able to choose between two kind of
    Redis database structure: One which is a Sorted set and the other a simple
    set.

    """
    g = nx.Graph()

    if (insert_type == 0):

        for h in r_serv.smembers("hash"):
            Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)

    elif (insert_type == 2):

        for h in r_serv.zrange("hash", 0, -1):
            Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)

    nx.write_gexf(g,graphpath)
    print nx.info(g)
Example #20
0
 def as_gexf(self):
     """
     Return the graph as GEXF for download
     """
     sio = StringIO.StringIO()
     nx.write_gexf(self.graph_with_metadata, sio)
     return sio.getvalue()
Example #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", help="path to alternate config file", default="./config.json", type=str)
    args = parser.parse_args()

    lastfm = LastFm(args.config)
    artists = lastfm.get_top_artists()
    artists = map(lambda x: x['name'], artists)
    similar_artists = get_similar_artists(lastfm, artists)
    edges = []
    nodes = {}
    map(lambda row: row[0], similar_artists)
    for row in similar_artists:
        if row[0] not in nodes:
            nodes[row[0]] = Node(row[0], color=(0,0,205))
        else:
            node = nodes[row[0]]
            node.set_value(node.get_value() + 1)
            node.set_color((0,0,205))
        for similar_artist in row[1]:
            if similar_artist not in nodes:
                nodes[similar_artist] = Node(similar_artist, color=(238,0,0))
            else:
                node = nodes[similar_artist]
                node.set_value(node.get_value() + 1)
            edges.append((row[0], similar_artist))
    graph = build_graph(nodes, edges)
    nx.write_gexf(graph,'graph.gexf')
 def build_from_single_tweet(self, tweet):
     """
     This will save all the hashtags from a single tweet to the graph file
     """
     entities = Extractors.getEntities(tweet)
     if len(entities['hashtags']) >= 1:
         self.load_graph()
         try:
             list_of_hashtags = entities['hashtags']
             tweet_tuples = []
             # Format each of the hashtags and add the tuple
             for tag in list_of_hashtags:
                 tag_cleaned = str(tag['text'])
                 tag_cleaned = tag_cleaned.lower()
                 tweetid = tweet['id_str']
                 tweet_tuple = (tweetid, tag_cleaned)
                 tweet_tuples.append(tweet_tuple)
             try:
                 #Add tag to the graph. Done here so that one bad edge won't bring everything down
                 self.graph.add_edges_from(tweet_tuples)
             except Exception as e:
                 print 'Error adding edge for tweet id %s and hashtag %s. Details: %s' % (tweetid, tag_cleaned, e)
             #todo Add redis log of non recorded hashtags
         finally:  # record added edges
             nx.write_gexf(self.graph, self.graphFile)
def dbscan_cluster(G, graph_name):
    write_directory = os.path.join(DBSCAN_PATH,graph_name)
    if not os.path.exists(write_directory):
        os.makedirs(write_directory)
    nodeList = G.nodes()
    matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList)
    #kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10)
    #kmeans.fit(matrix_data)
    #label = kmeans.labels_
    #print(matrix_data)

    # Compute DBSCAN
    db = DBSCAN(eps=1, min_samples=2).fit(matrix_data)
    #core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    #core_samples_mask[db.core_sample_indices_] = True
    label = db.labels_
    clusters = {}

    for nodeIndex, nodeLabel in enumerate(label):
        if nodeLabel not in clusters:
            clusters[nodeLabel] = []
        clusters[nodeLabel].append(nodeList[nodeIndex])
    
    #print(label)
    #print("clusters",clusters)    

    #countNodes is used to test whether we have all the nodes in the clusters
    countNodes = 0
    for clusterIndex, subGraphNodes in enumerate(clusters.keys()):
        subgraph = G.subgraph(clusters[subGraphNodes])
        nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+Constants.GEXF_FORMAT))
        #countNodes = countNodes + len(clusters[subGraphNodes])
        pass
    pass
def create_graph(category):
    graphdict, labeldict = load_dicts()
    G = nx.Graph()
    ego = labeldict[category]
    categories = [category]
    level = 0

    while True:
        if level == 2 or len(categories) == 0:
            break
        for cat in categories:
            node = labeldict[cat][0].decode('utf-8')
            G.add_node(node)
            try:
                subcats = graphdict[cat]
                for scat in subcats:
                    subnode = labeldict[scat][0].decode('utf-8')
                    G.add_node(subnode)
                    G.add_edge(node,subnode)
            except KeyError:
                continue
        categories = subcats
        level += 1

    # Draw graph
    pos=nx.spring_layout(G)
    nx.draw_networkx_nodes(G,pos,node_size=10,node_color='white')
    nx.draw_networkx_edges(G,pos,width=0.5,alpha = 0.8, edge_color = 'black')
    nx.draw_networkx_labels(G,pos, font_size = 12, font_family = 'sans-serif')
    nx.write_gexf(G,'../output/graph.gexf')
    plt.savefig('../output/ego_graph.png')
    plt.show()
def main(notify):

    g = nx.Graph()
    out_filename = "data/subreddits_edged_by_description_links.gexf"
    parser = HTMLParser()
    session = Session()
    query = session.query(Subreddit)
    dbi = DBIterator(query=query)

    for subreddit in dbi.results_iter():
        sub = subreddit.url.split("/")[2].lower()

        initialize_node(g, sub)

        if not subreddit.description_html:
            continue

        html = parser.unescape(subreddit.description_html)
        for linked_sub in find_sub_links(html):
            if g.has_edge(sub, linked_sub):
                g[sub][linked_sub]["weight"] += 1
            else:
                g.add_edge(sub, linked_sub, weight=1)

    nx.write_gexf(g, out_filename)
def create_genealogy(graph_id = 'deusto.aitoralmeida'):
    print 'Loading graph'
    merged = nx.read_gexf('merged_genealogy.gexf', node_type = None)
    print 'Loading edge index'
    dict_edges = load_merged_edge_index()
    
    print 'Building genealogy'
    to_process = [graph_id]
    tree = set()
    #get all the ascenstors in tree
    while len(to_process) > 0:
        current = to_process[0]
        to_process.remove(current)
        tree.add(current) 
        
        try:
            to_process += dict_edges[current]
        except:
            pass 
        
    print 'Creating graph'
    G = nx.DiGraph()
    for person in tree:
        print person
        G.add_node(person, name = merged.node[person]['name'])
        for target in merged.edge[person].keys(): 
            #add edges with the ancestors only       
            if target in tree:        
                G.add_edge(person, target)
    print 'Writing file'
    nx.write_gexf(G, 'created_genealogy.gexf')    
Example #27
0
def add_lamina_LPU(config, i, lamina, manager):
    output_filename = config["Lamina"]["output_file"]
    gexf_filename = config["Lamina"]["gexf_file"]
    suffix = config["General"]["file_suffix"]

    dt = config["General"]["dt"]
    debug = config["Lamina"]["debug"]
    time_sync = config["Lamina"]["time_sync"]

    output_file = "{}{}{}.h5".format(output_filename, i, suffix)
    gexf_file = "{}{}{}.gexf.gz".format(gexf_filename, i, suffix)
    G = lamina.get_graph()
    nx.write_gexf(G, gexf_file)

    n_dict_ret, s_dict_ret = LPU.lpu_parser(gexf_file)
    lamina_id = get_lamina_id(i)
    modules = []
    manager.add(
        LPU,
        lamina_id,
        dt,
        n_dict_ret,
        s_dict_ret,
        input_file=None,
        output_file=output_file,
        device=2 * i + 1,
        debug=debug,
        time_sync=time_sync,
        modules=modules,
        input_generator=None,
    )
Example #28
0
 def fetch(self, oid, format="graphml", max_age=0):
     # If the file was already fetched check the timestamp and overwrite
     graphml = os.path.join(self.cache_dir, oid+ ".graphml")
     graphpng = os.path.join(self.cache_dir, oid+ ".png")
     graphgexf  = os.path.join(self.cache_dir, oid+ ".gexf")
     logger.debug("Fetching "+graphml)
     if os.path.exists(graphml):
         ## cache hit was old and we have to refresh it
         if int(time.time()) - os.path.getmtime(graphml ) > max_age:
             DG=nx.read_graphml(graphml)
             labels=dict((n,d['label']) for n,d in DG.nodes(data=True))
             nx.draw_networkx(DG,labels=labels)
             logger.debug("Generated graph "+graphpng);
             plt.savefig(graphpng)
             nx.write_gexf(DG, graphgexf )
     else: 
         logger.debug("Cache miss");
         ## cache miss we have to generate the graph, this will take time!
         return None
     if format=="graphml": 
         return json.dumps(['URL', graphml])
     elif format=="png": 
         return json.dumps(['URL', graphpng])
     elif format=="png": 
         return json.dumps(['URL', graphgexf ])
def create_tracker_graph():
    cur.execute(("SELECT r.url, r.referrer, r.top_url, c.name, c.value "
                "FROM http_requests as r, http_cookies as c "
                "WHERE r.id = c.header_id "
                "AND c.http_type = 'request' "
                "AND top_url IN "
                "(SELECT DISTINCT top_url FROM http_requests LIMIT 1500)"))
    for url, ref, top, name, value in cur.fetchall():
        if ref is None or ref == '': # Empty referrer
            continue
        
        req_host = psl.get_public_suffix(urlparse(url).hostname)
        ref_host = psl.get_public_suffix(urlparse(ref).hostname)
        top_host = psl.get_public_suffix(urlparse(top).hostname)

        if top_host != ref_host: # Request that doesn't have knowledge of top url
            continue
        if ref_host == req_host: # Self loops
            continue
        if req_host == 'facebook.com': # Facebook
            continue

        # Check if identifying cookie
        for item in id_cookies.keys():
            if req_host.endswith(item) and name in id_cookies[item]:
                # If so, add nodes and edge
                G.add_node(req_host)
                G.add_node(ref_host)
                G.add_edge(ref_host, req_host)
                break
    networkx.write_gexf(G,os.path.expanduser('~/Desktop/05062014_triton.gexf'))
Example #30
0
def convertNetToGefx(input_file):
    G = None
    if input_file.endswith(Constants.GEXF_FORMAT):
        G = nx.read_gexf(input_file, None, True)
    elif input_file.endswith(Constants.NET_FORMAT):
        G=nx.Graph()
        f = file(input_file, 'r')
        # iterate over the lines in the file
        for line in f:
            # split the line into a list of column values
            columns = line.split('\t')
            # clean any whitespace off the items
            columns = [col.strip() for col in columns]
            if columns:
                G.add_edge(columns[0], columns[1])
        #write to a gexf file, so that GHOST can read it as well
        gexf_path = input_file[:-len(Constants.NET_FORMAT)]+Constants.GEXF_FORMAT
        #add attributes to nodes in gefx file
        for n,d in G.nodes_iter(data=True):
            G.node[n]["id"] = n
            G.node[n]["gname"] = n
        nx.write_gexf(G, gexf_path)
    else:
        print("Unsupported Format")
        exit(0)
    print("For "+input_file+" Number of Nodes =", G.number_of_nodes(), "No of edges = ", G.number_of_edges())

    return G
Example #31
0
        if sen == ['']:
            print '***************skip*****************'
            continue
        G = nx.Graph()
        G.add_nodes_from(sen)

        for w in rolling_window(sen, 4):
            G.add_edges_from([(w[0], w[1]), (w[0], w[2]), (w[0], w[3])])

        #nx.draw(G)
        #op_fname ="../../graph_of_words/WebKB/graph_of_words_{}.train.gexf".format(ind)
        op_fname = "../../graph_of_words/R8/graph_of_words_{}.train.gexf".format(
            ind)
        print op_fname
        #nx.write_graphml (G,op_fname)
        nx.write_gexf(G, op_fname)
        ind += 1
        del G
    raw_input()
    test_filename = '../data/R8/r8-test-no-stop.txt'
    f = open(test_filename, 'rU')
    test_sentences = f.readlines()
    ind = 0
    for sen in test_sentences:
        sen = sen.replace('\n', '').split('\t')[-1].split(' ')[:-1]
        if sen == ['']:
            print '***************skip*****************'
            continue
        G = nx.Graph()
        G.add_nodes_from(sen)
        for w in rolling_window(sen, 4):
Example #32
0
#            pass

print "REPLY COUNTS"
print userfreq
print freq
print poi_retweet

#print "Mention COUNTS"
#print mentions_count
#print usermentionsfreq
#print poi_mentions

G = nx.DiGraph()
#A=pgv.AGraph()

for edge in edges:
    G.add_edge(edge[0], edge[1])
    #A.add_edge(edge[0],edge[1])

#A.layout() # layout with default (neato)
#A.draw('simple.png') # draw png

#export so you can use gephi
nx.write_graphml(G, 'ed-test-replies-to.graphml')
nx.write_gexf(G, 'ed-test-replies-to.gexf')

# nx.draw_spring(G)
# nx.draw_shell(G)
nx.draw_random(G)

plt.show()
    positions.sort()
    pickle.dump(positions, writeFg, pickle.HIGHEST_PROTOCOL)

if (args.writeog is not None):
    ogFile = open(args.writeog, 'wb')
    pickle.dump(og, ogFile, pickle.HIGHEST_PROTOCOL)
    ogFile.close()

if (args.writef is not None):
    outf = open(args.writef, 'wb')
    pickle.dump(frags, outf)
    outf.close()

positions.sort()
pg = MakePG(positions, frags)
nx.write_gexf(pg, "before_fixing.gexf")
nFixed = FixFragments(frags, 0.6)

(ref, alt) = StoreFrequency(positions, frags)

nRemoved = FilterHomozygousSites(positions, ref, alt, frags,
                                 args.minAlleleFreq)
pg = MakePG(positions, frags)
nx.write_gexf(pg, "after_fixing.gexf")

# Try a second round.
ClearFragmentSupport(frags)
og = BuildOverlapGraph(frags, positions, args.minOverlap)
pos = {positions[i]: i for i in range(0, len(positions))}
AddOverlapSupport(og, frags, pos)
nFixed = FixFragments(frags, 0.6)
Example #34
0
def _paga_graph(
    adata,
    ax,
    solid_edges=None,
    dashed_edges=None,
    adjacency_solid=None,
    adjacency_dashed=None,
    transitions=None,
    threshold=None,
    root=0,
    colors=None,
    labels=None,
    fontsize=None,
    fontweight=None,
    fontoutline=None,
    text_kwds: Mapping[str, Any] = MappingProxyType({}),
    node_size_scale=1.0,
    node_size_power=0.5,
    edge_width_scale=1.0,
    normalize_to_color='reference',
    title=None,
    pos=None,
    cmap=None,
    frameon=True,
    min_edge_width=None,
    max_edge_width=None,
    export_to_gexf=False,
    colorbar=None,
    use_raw=True,
    cb_kwds: Mapping[str, Any] = MappingProxyType({}),
    single_component=False,
    arrowsize=30,
):
    import networkx as nx

    node_labels = labels  # rename for clarity
    if (node_labels is not None and isinstance(node_labels, str)
            and node_labels != adata.uns['paga']['groups']):
        raise ValueError(
            'Provide a list of group labels for the PAGA groups {}, not {}.'.
            format(adata.uns['paga']['groups'], node_labels))
    groups_key = adata.uns['paga']['groups']
    if node_labels is None:
        node_labels = adata.obs[groups_key].cat.categories

    if (colors is None or colors == groups_key) and groups_key is not None:
        if groups_key + '_colors' not in adata.uns or len(
                adata.obs[groups_key].cat.categories) != len(
                    adata.uns[groups_key + '_colors']):
            _utils.add_colors_for_categorical_sample_annotation(
                adata, groups_key)
        colors = adata.uns[groups_key + '_colors']
        for iname, name in enumerate(adata.obs[groups_key].cat.categories):
            if name in settings.categories_to_ignore:
                colors[iname] = 'grey'

    nx_g_solid = nx.Graph(adjacency_solid)
    if dashed_edges is not None:
        nx_g_dashed = nx.Graph(adjacency_dashed)

    # convert pos to array and dict
    if not isinstance(pos, (Path, str)):
        pos_array = pos
    else:
        pos = Path(pos)
        if pos.suffix != '.gdf':
            raise ValueError(
                'Currently only supporting reading positions from .gdf files. '
                'Consider generating them using, for instance, Gephi.')
        s = ''  # read the node definition from the file
        with pos.open() as f:
            f.readline()
            for line in f:
                if line.startswith('edgedef>'):
                    break
                s += line
        from io import StringIO

        df = pd.read_csv(StringIO(s), header=-1)
        pos_array = df[[4, 5]].values

    # convert to dictionary
    pos = {n: [p[0], p[1]] for n, p in enumerate(pos_array)}

    # uniform color
    if isinstance(colors, str) and is_color_like(colors):
        colors = [colors for c in range(len(node_labels))]

    # color degree of the graph
    if isinstance(colors, str) and colors.startswith('degree'):
        # see also tools.paga.paga_degrees
        if colors == 'degree_dashed':
            colors = [d for _, d in nx_g_dashed.degree(weight='weight')]
        elif colors == 'degree_solid':
            colors = [d for _, d in nx_g_solid.degree(weight='weight')]
        else:
            raise ValueError(
                '`degree` either "degree_dashed" or "degree_solid".')
        colors = (np.array(colors) - np.min(colors)) / (np.max(colors) -
                                                        np.min(colors))

    # plot gene expression
    var_names = adata.var_names if adata.raw is None else adata.raw.var_names
    if isinstance(colors, str) and colors in var_names:
        x_color = []
        cats = adata.obs[groups_key].cat.categories
        for icat, cat in enumerate(cats):
            subset = (cat == adata.obs[groups_key]).values
            if adata.raw is not None and use_raw:
                adata_gene = adata.raw[:, colors]
            else:
                adata_gene = adata[:, colors]
            x_color.append(np.mean(adata_gene.X[subset]))
        colors = x_color

    # plot continuous annotation
    if (isinstance(colors, str) and colors in adata.obs
            and not is_categorical_dtype(adata.obs[colors])):
        x_color = []
        cats = adata.obs[groups_key].cat.categories
        for icat, cat in enumerate(cats):
            subset = (cat == adata.obs[groups_key]).values
            x_color.append(adata.obs.loc[subset, colors].mean())
        colors = x_color

    # plot categorical annotation
    if (isinstance(colors, str) and colors in adata.obs
            and is_categorical_dtype(adata.obs[colors])):
        asso_names, asso_matrix = _sc_utils.compute_association_matrix_of_groups(
            adata,
            prediction=groups_key,
            reference=colors,
            normalization='reference' if normalize_to_color else 'prediction',
        )
        _utils.add_colors_for_categorical_sample_annotation(adata, colors)
        asso_colors = _sc_utils.get_associated_colors_of_groups(
            adata.uns[colors + '_colors'], asso_matrix)
        colors = asso_colors

    if len(colors) != len(node_labels):
        raise ValueError(
            f'Expected `colors` to be of length `{len(node_labels)}`, '
            f'found `{len(colors)}`.')

    # count number of connected components
    n_components, labels = scipy.sparse.csgraph.connected_components(
        adjacency_solid)
    if n_components > 1 and not single_component:
        logg.debug(
            'Graph has more than a single connected component. '
            'To restrict to this component, pass `single_component=True`.')
    if n_components > 1 and single_component:
        component_sizes = np.bincount(labels)
        largest_component = np.where(
            component_sizes == component_sizes.max())[0][0]
        adjacency_solid = adjacency_solid.tocsr()[labels ==
                                                  largest_component, :]
        adjacency_solid = adjacency_solid.tocsc()[:,
                                                  labels == largest_component]
        colors = np.array(colors)[labels == largest_component]
        node_labels = np.array(node_labels)[labels == largest_component]
        cats_dropped = (adata.obs[groups_key].cat.categories[
            labels != largest_component].tolist())
        logg.info(
            'Restricting graph to largest connected component by dropping categories\n'
            f'{cats_dropped}')
        nx_g_solid = nx.Graph(adjacency_solid)
        if dashed_edges is not None:
            raise ValueError(
                '`single_component` only if `dashed_edges` is `None`.')

    # edge widths
    base_edge_width = edge_width_scale * 5 * rcParams['lines.linewidth']

    # draw dashed edges
    if dashed_edges is not None:
        widths = [x[-1]['weight'] for x in nx_g_dashed.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if max_edge_width is not None:
            widths = np.clip(widths, None, max_edge_width)
        nx.draw_networkx_edges(
            nx_g_dashed,
            pos,
            ax=ax,
            width=widths,
            edge_color='grey',
            style='dashed',
            alpha=0.5,
        )

    # draw solid edges
    if transitions is None:
        widths = [x[-1]['weight'] for x in nx_g_solid.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if min_edge_width is not None or max_edge_width is not None:
            widths = np.clip(widths, min_edge_width, max_edge_width)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            nx.draw_networkx_edges(nx_g_solid,
                                   pos,
                                   ax=ax,
                                   width=widths,
                                   edge_color='black')
    # draw directed edges
    else:
        adjacency_transitions = adata.uns['paga'][transitions].copy()
        if threshold is None:
            threshold = 0.01
        adjacency_transitions.data[adjacency_transitions.data < threshold] = 0
        adjacency_transitions.eliminate_zeros()
        g_dir = nx.DiGraph(adjacency_transitions.T)
        widths = [x[-1]['weight'] for x in g_dir.edges(data=True)]
        widths = base_edge_width * np.array(widths)
        if min_edge_width is not None or max_edge_width is not None:
            widths = np.clip(widths, min_edge_width, max_edge_width)
        nx.draw_networkx_edges(g_dir,
                               pos,
                               ax=ax,
                               width=widths,
                               edge_color='black',
                               arrowsize=arrowsize)

    if export_to_gexf:
        if isinstance(colors[0], tuple):
            from matplotlib.colors import rgb2hex

            colors = [rgb2hex(c) for c in colors]
        for count, n in enumerate(nx_g_solid.nodes()):
            nx_g_solid.node[count]['label'] = str(node_labels[count])
            nx_g_solid.node[count]['color'] = str(colors[count])
            nx_g_solid.node[count]['viz'] = dict(position=dict(
                x=1000 * pos[count][0],
                y=1000 * pos[count][1],
                z=0,
            ))
        filename = settings.writedir / 'paga_graph.gexf'
        logg.warning(f'exporting to {filename}')
        settings.writedir.mkdir(parents=True, exist_ok=True)
        nx.write_gexf(nx_g_solid, settings.writedir / 'paga_graph.gexf')

    ax.set_frame_on(frameon)
    ax.set_xticks([])
    ax.set_yticks([])

    # groups sizes
    if groups_key is not None and groups_key + '_sizes' in adata.uns:
        groups_sizes = adata.uns[groups_key + '_sizes']
    else:
        groups_sizes = np.ones(len(node_labels))
    base_scale_scatter = 2000
    base_pie_size = (base_scale_scatter /
                     (np.sqrt(adjacency_solid.shape[0]) + 10) *
                     node_size_scale)
    median_group_size = np.median(groups_sizes)
    groups_sizes = base_pie_size * np.power(groups_sizes / median_group_size,
                                            node_size_power)

    if fontsize is None:
        fontsize = rcParams['legend.fontsize']
    if fontoutline is not None:
        text_kwds = dict(text_kwds)
        text_kwds['path_effects'] = [
            patheffects.withStroke(linewidth=fontoutline, foreground='w')
        ]
    # usual scatter plot
    if not isinstance(colors[0], cabc.Mapping):
        n_groups = len(pos_array)
        sct = ax.scatter(
            pos_array[:, 0],
            pos_array[:, 1],
            c=colors[:n_groups],
            edgecolors='face',
            s=groups_sizes,
            cmap=cmap,
        )
        for count, group in enumerate(node_labels):
            ax.text(
                pos_array[count, 0],
                pos_array[count, 1],
                group,
                verticalalignment='center',
                horizontalalignment='center',
                size=fontsize,
                fontweight=fontweight,
                **text_kwds,
            )
    # else pie chart plot
    else:
        for ix, (xx, yy) in enumerate(zip(pos_array[:, 0], pos_array[:, 1])):
            if not isinstance(colors[ix], cabc.Mapping):
                raise ValueError(
                    f'{colors[ix]} is neither a dict of valid '
                    'matplotlib colors nor a valid matplotlib color.')
            color_single = colors[ix].keys()
            fracs = [colors[ix][c] for c in color_single]
            total = sum(fracs)

            if total < 1:
                color_single = list(color_single)
                color_single.append('grey')
                fracs.append(1 - sum(fracs))
            elif not np.isclose(total, 1):
                raise ValueError(f'Expected fractions for node `{ix}` to be '
                                 f'close to 1, found `{total}`.')

            cumsum = np.cumsum(fracs)
            cumsum = cumsum / cumsum[-1]
            cumsum = [0] + cumsum.tolist()

            for r1, r2, color in zip(cumsum[:-1], cumsum[1:], color_single):
                angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2, 20)
                x = [0] + np.cos(angles).tolist()
                y = [0] + np.sin(angles).tolist()

                xy = np.column_stack([x, y])
                s = np.abs(xy).max()

                sct = ax.scatter([xx], [yy],
                                 marker=xy,
                                 s=s**2 * groups_sizes[ix],
                                 color=color)

            if node_labels is not None:
                ax.text(
                    xx,
                    yy,
                    node_labels[ix],
                    verticalalignment='center',
                    horizontalalignment='center',
                    size=fontsize,
                    fontweight=fontweight,
                    **text_kwds,
                )

    return sct
Example #35
0
    def save_network(self):

        nx.write_gexf(self.G, self.directory + self.name + '.gexf')
Example #36
0
def graph2gexf(graph):
    gexf = BytesIO()
    networkx.write_gexf(graph, gexf)
    return gexf.getvalue().decode("utf8")
def write_graph_in_format(graph, filename, fileformat='gexf') :
    if fileformat.lower() == 'json':
        return json.dump(json_graph.node_link_data(graph), open(filename,'w'))
    return nx.write_gexf(graph, filename)
Example #38
0
                G.node[k]['viz'][cle] = Visu[cle]

         #               print G.node[k]
         #       nx.set_node_attributes(G, 'weight', attr_dict)

        outputFile = ndf+network+prefix+'JS.gexf'

        try:
            os.remove(ResultGephiPath+'/'+outputFile)
        except:
            try:
                os.remove(ResultGephiPath+'/'+outputFile)
            except:
                pass
    #
        nx.write_gexf(G, ResultGephiPath+'/'+outputFile, version='1.2draft')
        fic = open(ResultGephiPath+'/'+outputFile, 'r')

        # Next is a hack to correct the bad writing of the header of the gexf file
        # with dynamics properties
        fictemp=open(ResultGephiPath+'/'+"Good"+outputFile, 'w')


        ecrit = True
        data = fic.read()
        # VERY UGLY Hack here !!!!
        data = data.replace('ns0:', 'viz:') # may be someone knows how to set namespace in networkx...
        data = data.replace('a="None"', '') # may be someone knows why network set the "a" attribute...

        for lig in data.split('\n'):
            if lig.count('<nodes>'):
Example #39
0
# add EDGES with weight
g_pos = nx.from_pandas_edgelist(rho_pos, 'keywords1', 'keywords2',
                                'spearman_cor')
g_neg = nx.from_pandas_edgelist(rho_neg, 'keywords1', 'keywords2',
                                'spearman_cor')
# add NODES
g_pos.add_nodes_from(nodes_pos.unique(), name=nodes_pos.unique())
g_neg.add_nodes_from(nodes_neg.unique(), name=nodes_neg.unique())
# check
g_pos.number_of_nodes()
g_pos.number_of_edges()
g_neg.number_of_nodes()
g_neg.number_of_edges()
# save for GEPHI
nx.write_gexf(g_pos, "network_positive_correlations.gexf")
nx.write_gexf(g_neg, "network_negative_correlations.gexf")

# check graph connectivity
#nx.is_connected(g_pos)
#nx.number_connected_components(g_pos)
#nx.number_connected_components(g_neg)
#comps = nx.connected_component_subgraphs(g_pos)

# get list top 20 nodes (based on degree)
degrees = [val for (node, val) in g_pos.degree()]
degrees = pd.DataFrame(degrees)
#join node name
degrees['name'] = nodes_pos.unique()

# get Edges
Example #40
0
            if x > y:
                t = y
                y = x
                x = t
            mat[x][y] += 1

G = nx.Graph()
for i in range(len(a)):
    for j in range(len(a)):
        if mat[i][j] > 5:
            G.add_edge(a[i], a[j], weight=mat[i][j])

for i in G.nodes.keys():
    for j in c[i]:
        print(c[i])
        if i == j:
            idx = c[i].index(j)
            G.nodes[i]["name"] = a[i][idx]

nx.write_gexf(G, "/Users/samue/OneDrive/桌面/journal/test.gexf")
'''
pos = nx.spring_layout(G,weight='weight')


plt.figure(figsize=(25, 25))
plt.axis('off')
nx.draw_networkx(G, pos=pos, with_labels=False, node_size=30, edgecolors='black', edge_color='b')
plt.savefig('C:/Users/samue/OneDrive/桌面/journal/test.png')

plt.show()
'''
Example #41
0
                            "%s:E" % g_id,
                            label="%s:%d-%d" % (g_id, g_b, g_l),
                            length=abs(g_b - g_l),
                            score=-score)
                sg.add_edge("%s:B" % g_id,
                            "%s:E" % f_id,
                            label="%s:%d-%d" % (f_id, f_e, f_l),
                            length=abs(f_e - f_l),
                            score=-score)

    sg.mark_tr_edges()
    print sum([1 for c in sg.e_reduce.values() if c == True])
    print sum([1 for c in sg.e_reduce.values() if c == False])
    G = SGToNXG(sg)
    nx.write_adjlist(G, "full_string_graph.adj")
    sg.mark_best_overlap()
    print sum([1 for c in sg.e_reduce.values() if c == False])
    #sg.mark_repeat_overlap()
    #print sum( [1 for c in sg.repeat_overlap.values() if c == True] )
    #print sum( [1 for c in sg.repeat_overlap.values() if c == False] )
    #print len(sg.e_reduce), len(sg.repeat_overlap)

    G = SGToNXG(sg)
    nx.write_gexf(G, "string_graph.gexf")
    nx.write_adjlist(G, "string_graph.adj")

    #generate_max_contig(sg, seqs, out_fn="max_tigs.fa")
    u_edges = generate_unitig(sg, seqs, out_fn="unitgs.fa")
    ASM_graph = get_bundles(u_edges)
    nx.write_gexf(ASM_graph, "asm_graph.gexf")
Example #42
0
def generate_unitig(sg, seqs, out_fn, connected_nodes=None):
    G = SGToNXG(sg)
    if connected_nodes != None:
        connected_nodes = set(sg.nodes)
    out_fasta = open(out_fn, "w")
    nodes_for_tig = set()
    sg_edges = set()
    for v, w in sg.edges:
        if sg.e_reduce[(v, w)] != True:
            sg_edges.add((v, w))
    count = 0
    edges_in_tigs = set()

    uni_edges = {}
    path_f = open("paths", "w")
    uni_edge_f = open("unit_edges.dat", "w")
    while len(sg_edges) > 0:
        v, w = sg_edges.pop()

        #nodes_for_tig.remove(n)
        upstream_nodes = []

        c_node = v
        p_in_edges = sg.get_in_edges_for_node(c_node)
        p_out_edges = sg.get_out_edges_for_node(c_node)
        while len(p_in_edges) == 1 and len(p_out_edges) == 1:
            p_node = p_in_edges[0].in_node
            upstream_nodes.append(p_node.name)
            if (p_node.name, c_node) not in sg_edges:
                break
            sg_edges.remove((p_node.name, c_node))
            p_in_edges = sg.get_in_edges_for_node(p_node.name)
            p_out_edges = sg.get_out_edges_for_node(p_node.name)
            c_node = p_node.name

        upstream_nodes.reverse()

        downstream_nodes = []
        c_node = w
        n_out_edges = sg.get_out_edges_for_node(c_node)
        n_in_edges = sg.get_in_edges_for_node(c_node)
        while len(n_out_edges) == 1 and len(n_in_edges) == 1:
            n_node = n_out_edges[0].out_node
            downstream_nodes.append(n_node.name)
            if (c_node, n_node.name) not in sg_edges:
                break
            sg_edges.remove((c_node, n_node.name))
            n_out_edges = sg.get_out_edges_for_node(n_node.name)
            n_in_edges = sg.get_in_edges_for_node(n_node.name)
            c_node = n_node.name

        whole_path = upstream_nodes + [v, w] + downstream_nodes
        #print len(whole_path)
        count += 1
        subseqs = []
        for i in range(len(whole_path) - 1):
            v_n, w_n = whole_path[i:i + 2]

            edge = sg.edges[(v_n, w_n)]
            edges_in_tigs.add((v_n, w_n))
            #print n, next_node.name, e.attr["label"]

            read_id, coor = edge.attr["label"].split(":")
            b, e = coor.split("-")
            b = int(b)
            e = int(e)
            if b < e:
                subseqs.append(seqs[read_id][b:e])
            else:
                try:
                    subseqs.append("".join(
                        [RCMAP[c] for c in seqs[read_id][b:e:-1]]))
                except:
                    print seqs[read_id]

        uni_edges.setdefault((whole_path[0], whole_path[-1]), [])
        uni_edges[(whole_path[0], whole_path[-1])].append(
            (whole_path, "".join(subseqs)))
        print >> uni_edge_f, whole_path[0], whole_path[-1], "-".join(
            whole_path), "".join(subseqs)

        print >> path_f, ">%05dc-%s-%s-%d %s" % (
            count, whole_path[0], whole_path[-1], len(whole_path),
            " ".join(whole_path))

        print >> out_fasta, ">%05dc-%s-%s-%d" % (
            count, whole_path[0], whole_path[-1], len(whole_path))
        print >> out_fasta, "".join(subseqs)
    path_f.close()
    uni_edge_f.close()
    uni_graph = nx.DiGraph()
    for n1, n2 in uni_edges.keys():
        uni_graph.add_edge(n1, n2, weight=len(uni_edges[(n1, n2)]))
    nx.write_gexf(uni_graph, "uni_graph.gexf")

    out_fasta.close()
    return uni_edges
Example #43
0
# 执行sql语句
try:
    with con.cursor() as cursor:
        sql = "select * from user"
        cursor.execute(sql)
        result = cursor.fetchall()
finally:
    con.close()
df = pd.DataFrame(result)  #转换成DataFrame格式 df.head()

name_data = df['name'].tolist()  #add_node
following_data = df['following_id']

relation_list = []
for i in range(len(df)):
    a = df['name'][i]  #取name
    if following_data[i] != '[]':
        b = following_data[i].split(",")  #取following_id第一个数据并转换为list
        for j in range(len(b)):
            d = b[j].split("'")[1]
            if d in name_data:
                relation_list.append((a, d))  #add_edge

import networkx as nx
G = nx.Graph()
G.clear()
G.add_nodes_from(name_data)
G.add_edges_from(relation_list)
print(G.number_of_nodes(), G.number_of_edges())
nx.write_gexf(G, 'social_network.gexf')
Example #44
0
                if not G.has_edge(advisor_name, person):
                    G.add_edge(advisor_name, person)
                    G.node[advisor_name]['link'] = BASE_URL + advisor_link
        else:
            print 'Already processed: ' + person
    return G, processed


# Depth 0 builds only the trunk based on the advisors, increasing the depth
# recovers more student genterations.
def build_genealogy(base_person_name, base_person_link, depth=0):
    MAX_DEPTH = depth
    G = nx.DiGraph()
    processed = []
    G, _ = get_trunk(base_person_name, base_person_link, G, processed)
    processed = []
    for i in range(MAX_DEPTH):
        print 'Expanding: ' + str(i)
        G, processed = expand_tree(G, processed)
    return G


if __name__ == "__main__":
    print 'Getting genealogy'
    G = build_genealogy('Andy Hopper',
                        'http://en.wikipedia.org/wiki/Andy_Hopper')
    print G.nodes()
    print 'Total nodes: ' + str(len(G.nodes()))
    nx.write_gexf(G, "./test.gexf")
    print 'Fin'
Example #45
0
            to_id = t['retweeted_status']['id_str']
            to_user = t['retweeted_status']['user']['screen_name']
            to_user_id = t['retweeted_status']['user']['id_str']
            add(from_user, from_id, to_user, to_id, "retweet")

if options.min_subgraph_size or options.max_subgraph_size:
    g_copy = G.copy()
    for g in networkx.connected_component_subgraphs(G):
        if options.min_subgraph_size and len(g) < options.min_subgraph_size:
            g_copy.remove_nodes_from(g.nodes())
        elif options.max_subgraph_size and len(g) > options.max_subgraph_size:
            g_copy.remove_nodes_from(g.nodes())
    G = g_copy

if output.endswith(".gexf"):
    networkx.write_gexf(G, output)

elif output.endswith(".gml"):
    networkx.write_gml(G, output)

elif output.endswith(".dot"):
    nx_pydot.write_dot(G, output)

elif output.endswith(".json"):
    json.dump(to_json(G), open(output, "w"), indent=2)

elif output.endswith(".html"):
    graph_data = json.dumps(to_json(G), indent=2)
    html = """<!DOCTYPE html>
<meta charset="utf-8">
<script src="https://platform.twitter.com/widgets.js"></script>
Example #46
0
                    G.add_edge(current_node, nodes[link])

                else:

                    if link not in to_visit:
                        to_visit.append(link)
                        m = G.number_of_nodes() + 1
                        G.add_node(m, page=link)
                        G.add_edge(current_node, m)
                        nodes[link] = m

                    else:
                        m = nodes[link]
                        G.add_edge(current_node, m)

            else:

                if link not in outlinks:
                    m = G.number_of_nodes() + 1
                    G.add_node(m, page=link)
                    G.add_edge(current_node, m)
                    outlinks.append(link)
                    nodes[link] = m

                else:
                    m = nodes[link]
                    G.add_edge(current_node, m)

outcome = dict(zip(visited, responses))
nx.write_gexf(G, 'ng.gexf')
Example #47
0
from barl_simpleoptions import SubgoalOption
from barl_simpleoptions import PrimitiveOption
from barl_simpleoptions import OptionAgent

from two_rooms_environment import TwoRoomsEnvironment
from two_rooms_state import TwoRoomsState

################################
## Generate Interaction Graph ##
################################

# Generate state-interaction graph for this environment and save it to a file.
initial_state = TwoRoomsState((0, 0))
state_transition_graph = initial_state.generate_interaction_graph(
    [initial_state])
nx.write_gexf(state_transition_graph, "sa_graph.gexf")

########################
## Construct options. ##
########################
options = []

# Construct primitive options.
primitive_actions = TwoRoomsState.actions
for action in primitive_actions:
    options.append(PrimitiveOption(action))

# Construct subgoal-directed option (i.e. door subgoal).
door_policy_file_path = "door_option_policy.json"
door_option = SubgoalOption(TwoRoomsState((1, 3)), state_transition_graph,
                            door_policy_file_path, 19)
Example #48
0
def exportGraph(graph):
    nx.write_gexf(graph, 'graph.gexf')
Example #49
0
def main():
    sub1, sub2, DEBUG, VERBOSE, LIMIT = parse_command_line_args()

    if DEBUG:
        sub1, sub2 = '100pushups', 'MakeupAddiction'

    user_agent = ("reddit_sna scraper v0.1 by /u/sna_bot "
                  "https://github.com/brianreallymany/reddit_sna")
    r = praw.Reddit(user_agent=user_agent)

    graph = nx.Graph()

    submissions_per_subreddit = LIMIT

    # Add nodes and edges for users of first subreddit
    if VERBOSE:
        print("\nAdding nodes and in_group_submissions edges for first "+\
                " subreddit, " + sub1)
    graph = update_graph_with_subreddit_of_interest(graph,
                                                    submissions_per_subreddit,
                                                    sub1, r, DEBUG, VERBOSE)

    # Add nodes and edges for users of second subreddit
    #   If the two subreddits have any  users in common,
    #   edges between them will be annotated with the
    #   "in_group_submissions" field and the name of the submission
    if VERBOSE:
        print("Adding nodes and in_group_submissions edges for second "+\
                "subreddit, " + sub2)
    graph = update_graph_with_subreddit_of_interest(graph,
                                                    submissions_per_subreddit,
                                                    sub2, r, DEBUG, VERBOSE)

    # For each user in the graph, explore previous comments
    #   made outside of the user's "user_of" subreddit(s).
    #   If other users from the graph are present in the same
    #   submission, add an edge with "out_group_submissions"
    #   and the submission permalink.
    if VERBOSE:
        print("\nNow updating graph with submissions and comments from "+\
                str(len(graph.nodes())) + " users.\n")
    count = 1
    for user in graph.nodes():
        update_graph_with_user_comments(graph, user, r, (sub1, sub2), DEBUG,
                                        VERBOSE, LIMIT)
        count += 1
        if VERBOSE:
            if count % 100 == 0:
                print("\n\t\tNow processing user " + str(count) + "\n")

    # Summarize graph
    if VERBOSE:
        print_graph_summary(graph)
        print("writing gexf...")

    # Write .gexf file
    timestamp = datetime.datetime.now().isoformat()
    filename = sub1 + "." + sub2 + "."
    filename += "limit_" + str(LIMIT) + "."
    filename += timestamp + ".gexf"
    nx.write_gexf(graph, filename)

    if VERBOSE:
        print("wrote gexf...")
Example #50
0
    def execute(self, G, epsilon=0.25, weighted=False, min_community_size=30):
        """
        Execute Demon algorithm

        :param G: the networkx graph on which perform Demon
        :param epsilon: the tolerance required in order to merge communities
        :param weighted: Whether the graph is weighted or not
        :param min_community_size:min nodes needed to form a community
        """
        tempo_prima_parte = 0.0
        tempo_pr_pt = 0.0
        time_first = time.time()
        nx.set_node_attributes(G, 'communities', 0)

        #######
        self.G = G
        self.epsilon = epsilon
        self.min_community_size = min_community_size
        for n in self.G.nodes():
            G.node[n]['communities'] = [n]
        self.weighted = weighted
        #######

        all_communities = {}
        #LABELING
        print("Map Start")

        nodiEgo = self.G.nodes()
        tempo_prima_parte += time.time() - time_first
        if (processor == 1):
            dicts = map(Functor(self.G), nodiEgo)
        else:
            dicts = p.map(Functor(self.G), nodiEgo)
        time_second = time.time()
        p.join
        print("Map End")

        #MERGING
        print("Reduce Start")
        community_to_nodes_tmp = {}
        community_to_nodes_tmp2 = {}
        all_communities = []
        old_communities = []
        tempo_accoppiamento = 0.0
        tempo_map = 0.0
        tempo_scorr = 0.0
        tempo_ultimo_step = 0.0
        tempo_iteraz = 0.0
        tempo_reale = 0.0
        millecinque = False
        tempo_millecinque = 0.0
        millequattro = False
        tempo_millequattro = 0.0
        milletre = False
        tempo_milletre = 0.0
        milledue = False
        tempo_milledue = 0.0

        cont = 0
        for dd in dicts:
            tempo_pr_pt += dd["time"]
            dd.pop("time", None)
            for k in dd.keys():
                community_to_nodes_tmp[cont] = list(dd[k])
                cont += 1

        #writef(community_to_nodes_tmp)
        #community_to_nodes_tmp = readf()
        time_in = time.time()
        tempo_prima_parte += time_in - time_second

        first = ""
        second = ""
        k = 0
        #store number of communities before merge
        old_len = len(community_to_nodes_tmp)
        if (k_max_str == "log"):
            k_max = int(math.log(old_len))
        elif (k_max_str == "sqrt"):
            k_max = math.sqrt(old_len)
        else:
            k_max = 1
        #k_max = 1
        j = 1

        while True:
            time_in_iteraz = time.time()
            i = 0
            group_of_comm = []
            all_communities = []
            alone = ""
            dim_group = int(math.ceil(old_len / processor))
            print(old_len, ", ", processor, ", ", dim_group)
            max_time = 0.0
            #save groups of communities in "groups_of_comm", the number of elements for each sub_community depends on how many processing unit are present
            #if the number of communities is the same of the processors i consider them as a single group.
            if (dim_group == 1):
                group_of_comm = dict_split(community_to_nodes_tmp, old_len)
                results = {}
                results = merge_communities2(group_of_comm)
                for dd in results.keys():
                    if (dd == "time"):
                        if (results[dd] > max_time):
                            max_time = results[dd]
                    else:
                        all_communities.append(list(dd))
                tempo_reale += max_time
            else:
                group_of_comm = dict_split(community_to_nodes_tmp, dim_group)
                if (group_of_comm != []):
                    if (type(group_of_comm) == dict):
                        results = merge_communities2(group_of_comm)
                    elif (processor == 1):
                        results = merge_communities2(group_of_comm)
                    else:
                        results = p.map(merge_communities2, group_of_comm)

                #put the single communities in a new list "all_communities"
                if (type(results) == dict):
                    for item in results.keys():
                        if (item == "time"):
                            if (results[item] > max_time):
                                max_time = results[item]
                        else:
                            all_communities.append(list(item))
                else:
                    for dd in results:
                        if (type(dd) == tuple):
                            all_communities.append(list(dd))
                        else:
                            for item in dd.keys():
                                if (item == "time"):
                                    if (dd[item] > max_time):
                                        max_time = dd[item]
                                else:
                                    all_communities.append(list(item))
                tempo_reale += max_time
            if (len(all_communities) < 12000 and millecinque == False):
                tempo_millecinque = time.time()
                millecinque = True
            if (len(all_communities) < 11000 and millequattro == False):
                tempo_millequattro = time.time()
                millequattro = True
            if (len(all_communities) < 10000 and milletre == False):
                tempo_milletre = time.time()
                milletre = True
            if (len(all_communities) < 9000 and milledue == False):
                tempo_milledue = time.time()
                milledue = True
            if (len(all_communities) == old_len):
                k += 1
            else:
                k = 0
            if ((len(all_communities) == old_len)
                    and (k >= k_max or old_len <= 2)):
                #create vector to write at the end
                community_to_nodes_tmp2.clear()
                for dd in all_communities:
                    community_to_nodes_tmp2[tuple(sorted(dd))] = 0
                break
            else:
                old_len = len(all_communities)
                old_communities = list(all_communities)
                random.shuffle(all_communities)
                j += 1
            community_to_nodes_tmp.clear()
            x = 0
            #create vector for next cycle
            for dd in all_communities:
                community_to_nodes_tmp[x] = list(sorted(dd))
                x += 1
            time_last_step = time.time()
            print("Tempo-Iterazione: ", time_last_step - time_in_iteraz)
            tempo_iteraz += time_last_step - time_in_iteraz

        time_fin = time.time()
        time_prima_parte = time_in - time_first
        time_prima_parte_reale = tempo_prima_parte + tempo_pr_pt / processor
        time_seconda_parte = time_fin - time_in
        print("tempo Prima Parte: " + str(time_prima_parte))
        print("tempo Prima Reale: " + str(time_prima_parte_reale))

        print("tempo Seconda Parte: " + str(time_seconda_parte))
        print("tempo Seconda Reale: " + str(tempo_reale))

        print("Tempo Totale: " + str(time_prima_parte + time_seconda_parte))
        print("Tempo Totale Reale: " +
              str(time_prima_parte_reale + tempo_reale))

        print("tempo Iterazione medio = ", tempo_iteraz / j)
        print("Reduce End")

        #OUTPUT

        print("Output Start")
        all_communities = {}
        all_communities = community_to_nodes_tmp2
        out_file_com = open(
            "communities(epsilo=" + str(self.epsilon) + "," +
            str(min_community_size) + ").txt", "w")
        idc = 0
        classified = 0
        num_of_members = 0
        for c in community_to_nodes_tmp2.keys():
            out_file_com.write("%d\t%d\t%s\n" % (idc, len(c), str(sorted(c))))
            idc += 1
            num_of_members += len(c)
        out_file_com.flush()
        out_file_com.close()
        print("Numero Membri Medio = ", num_of_members / idc)

        for c in community_to_nodes_tmp2.keys():
            for n in c:
                G.node[n]['comm_color'] = str(-1)

        communities = list(community_to_nodes_tmp2.keys())
        communities.sort(key=len)

        for i, c in enumerate(communities):
            for n in c:
                if (G.node[n]['comm_color'] == str(-1)):
                    classified += 1
                G.node[n]['comm_color'] = str(i)
        perc_nodi_scartati = str(1 - (classified / num_of_nodes))
        print("Percentuale nodi scartati= " + str(1 -
                                                  (classified / num_of_nodes)))

        for n in self.G.nodes():
            G.node[n]['communities'] = n
        nx.write_gexf(
            G, file_name + "(epsilon=" + str(epsilon) + "," +
            str(min_community_size) + ").gexf")

        with open('tempiNew.csv', 'a', newline='') as csvfile:
            spamwriter = csv.writer(csvfile,
                                    delimiter=';',
                                    quotechar='|',
                                    quoting=csv.QUOTE_MINIMAL)
            spamwriter.writerow([
                processor, epsilon, k_max_str,
                str(tempo_millecinque - time_first).replace('.', ','),
                str(tempo_millequattro - time_first).replace('.', ','),
                str(tempo_milletre - time_first).replace('.', ','),
                str(tempo_milledue - time_first).replace('.', ','),
                str(time_prima_parte).replace('.', ','),
                str(time_prima_parte_reale).replace('.', ','),
                str(time_seconda_parte).replace('.', ','),
                str(tempo_reale).replace('.', ','),
                str(time_prima_parte + time_seconda_parte).replace('.', ','),
                str(time_prima_parte_reale + tempo_reale).replace('.', ','),
                old_len, perc_nodi_scartati
            ])

        print("Output end")

        return
Example #51
0
                        noeud)]['label'] = noeud + '-' + attr['name']
            else:
                print "on devrait pas être là, never", noeud
                #G.node[ListeNoeuds.index(noeud)]['end'] = ExtraitMinDate(G.node[ListeNoeuds.index(noeud)]) + DureeBrevet
                #G.node[ListeNoeuds.index(noeud)]['start'] =
    G.graph['defaultedgetype'] = "directed"
    G.graph['timeformat'] = "date"
    G.graph['mode'] = "dynamic"
    G.graph['start'] = dateMini

    G.graph['end'] = dateMax

    ndf = ndf.replace('Families', '')
    ndf = ndf.replace('.dump', '')
    nx.write_gexf(G,
                  ResultPathGephi + '\\' + ndf + "2.gexf",
                  version='1.2draft')
    fic = open(ResultPathGephi + '\\' + ndf + '2.gexf', 'r')
    #
    # Next is a hack to correct the bad writing of the header of the gexf file
    # with dynamics properties
    fictemp = open(ResultPathGephi + '\\' + "Good" + ndf + '2.gexf', 'w')
    fictemp.write(
        """<?xml version="1.0" encoding="utf-8"?><gexf version="1.2" xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance">
  <graph defaultedgetype="directed" mode="dynamic" timeformat="date">
    <attributes class="edge" mode="static">
      <attribute id="11" title="NormedWeight" type="double" />
      <attribute id="13" title="deb" type="string" />
      <attribute id="14" title="fin" type="string" />
      <attribute id="15" title="rel" type="string" />
    </attributes>
Example #52
0
nodes = []
links = []
for n, line in enumerate(f):
    node = {}
    link = {}

    source, targets = parse(line)
    idx = getcluster(source)
    node["name"] = source
    node["group"] = idx
    nodes.append(node.copy())
    G.add_node(source, group=idx)

    for t in targets:
        exploded = t.split(' ')
        distance = exploded[1]
        edge = exploded[0]
        link["source"] = source
        link["target"] = edge
        link["weight"] = int(distance)
        G.add_edge(source, edge, weight=distance)
        links.append(link.copy())

print("{\"nodes\":", file=y)
print(json.dumps(nodes), file=y)
print(",", file=y)
print("\"links\":", file=y)
print(json.dumps(links), file=y)
print("}", file=y)
nx.write_gexf(G, "./data/flickr/network_flickr.gexf")
Example #53
0
 def write(self):
     suffix = "".join(
         ["_" + k + "=" + v for (k, v) in self.attributes.items()])
     nx.write_gexf(self.graph,
                   DIR_GEPHI + "papers-network" + suffix + ".gexf")
Example #54
0
 def _export_to_gexf(self, filename: str):
     """Save the network as .gexf file."""
     nx.write_gexf(self.graph, filename)
Example #55
0
@author: paavo.ronni
"""

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import collections
from tqdm import tqdm

import networkx as nx

G = nx.read_gexf(
    'C:\OmatProjektit\ComplexNetworks\graphs\chessnetwork_joined_filtered.gexf'
)
G = max(nx.connected_component_subgraphs(G), key=len)

N = len(G.nodes())

BA_G = nx.barabasi_albert_graph(N, 8)

print('Number of edges: {}'.format(G.number_of_edges()))
print('Average degree: {}'.format(2. * G.number_of_edges() /
                                  G.number_of_nodes()))
degree_sequence = [degree for node, degree in G.degree()]
print('Max degree: {}'.format(max(degree_sequence)))
print('Network density: {}'.format(G.number_of_edges() / (N * (N - 1) / 2.)))
print('Average clustering coef.: {}'.format(nx.average_clustering(G)))

nx.write_gexf(BA_G, 'barabasi_albert_graph.gexf')
Example #56
0
pos = nx.spring_layout(G, scale=1000)

print('Saving attributes...')
#Add the visual attrs to each node
for i in range(len(comms)):
    for n in comms[i]:
        G.node[n]['viz'] = {
            'color': rgbs[i],
            'position': {
                'x': pos[n][0],
                'y': pos[n][1]
            }
        }

#Export to 'facebook.gexf'
nx.write_gexf(G, './network/data/facebook.gexf')

# nx.draw_graphviz(sg, prog='sfdp')

# c = max_clique(sg)
# print nx.info(c)

# for n in sg:
#     sg.node[n]['name'] = n
# d = json_graph.node_link_data(sg)

# json.dump(d, open('force.json','w'))
# nx.write_dot(sg, 'data.dot')

# print nx.radius(sg)
# print nx.diameter(sg)
Example #57
0
def Smoothness():
    todayDate = graphUtils.getTodayDateFolder()
    lastSmoothnessDate = graphUtils.loadSettings(
        graphConstants.LAST_GRAPH_SMOOTHNESS_DIR)
    lastSuggSmoothnessDate = graphUtils.loadSettings(
        graphConstants.LAST_GRAPH_SUGG_SMOOTHNESS_DIR)

    if lastSmoothnessDate:
        graphUtils.logger.info("Graph Smoothness done last for =" +
                               lastSmoothnessDate)
    else:
        graphUtils.logger.info("Graph Smoothness done last for None")

    if lastSuggSmoothnessDate:
        graphUtils.logger.info("GraphSugg Smoothness done last for =" +
                               lastSuggSmoothnessDate)
    else:
        graphUtils.logger.info("GraphSugg Smoothness done last for None")

    if todayDate == lastSmoothnessDate and todayDate == lastSuggSmoothnessDate:
        graphUtils.logger.info(
            "Graph Smoothness signal already done for today :" + todayDate)
        return True
    graph_path = os.path.join(graphConstants.ROOT_FOLDER,
                              graphConstants.GRAPH_DIR,
                              graphConstants.GRAPH_DIR,
                              graphConstants.TYPE_MAIN)
    graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE)
    write_graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE)
    if not os.path.exists(graph_path):
        os.makedirs(graph_path)
    G = nx.read_gexf(graph_file)
    trainFiles, trainFileNames = graphUtils.findRecommTrainGraphFiles()
    trainCorpus = graphUtils.findCorpus(trainFiles)
    bm25obj = Bm25(trainCorpus)
    trainUniqueWords = []
    for trainText in trainCorpus:
        trainUniqueWords.append(set(trainText))

    if todayDate != lastSmoothnessDate:
        testFiles, testFileName = graphUtils.findRecommFiles()
        testCorpus = graphUtils.findCorpus(testFiles)
        testUniqueWords = []
        mini = 100
        maxi = -1
        count = 0
        smoothness = zeros((len(testCorpus), len(trainCorpus)))
        for testText in testCorpus:
            testUniqueWords.append(set(testText))
        for testDoc in range(len(testCorpus)):
            recomm_nodename = testFileName[testDoc]
            uniqueTest = testUniqueWords[testDoc]
            SminusDcontext = zeros(bm25obj.N)
            DminusScontext = zeros(bm25obj.N)
            for trainDoc in range(len(trainCorpus)):
                uniqueTrain = trainUniqueWords[trainDoc]
                SminusD = [
                    word for word in trainCorpus[trainDoc]
                    if word not in uniqueTest
                ]
                DminusS = [
                    word for word in testCorpus[testDoc]
                    if word not in uniqueTrain
                ]
                SminusDcontext = bm25obj.BM25Score(SminusD)
                DminusScontext = bm25obj.BM25Score(DminusS)
                smoothness[testDoc][trainDoc] = np.dot(SminusDcontext,
                                                       DminusScontext)
            dict_arr = {
                key: value
                for (key, value) in enumerate(smoothness[testDoc])
            }
            sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1))
            sorted_x.reverse()
            sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE]
            total = sum([pair[1] for pair in sorted_x])
            for (idxsim, val) in sorted_x:
                prob = val / total
                if recomm_nodename not in G.nodes():
                    G.add_node(recomm_nodename)
                    G.node[recomm_nodename][
                        'type'] = graphConstants.TYPE_GOOGLE
                trainNode = trainFileNames[idxsim]
                if trainNode in G.nodes():
                    if prob < mini:
                        mini = prob
                    if prob > maxi:
                        maxi = prob
                    if G.has_edge(recomm_nodename, trainNode) is False:
                        G.add_edge(recomm_nodename,
                                   trainNode,
                                   weight=prob *
                                   graphConstants.SMOOTHNESS_EDGE_WEIGHT)
                    else:
                        G[recomm_nodename][trainNode][
                            'weight'] = G[recomm_nodename][trainNode][
                                'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT

                    if G.has_edge(trainNode, recomm_nodename) is False:
                        G.add_edge(trainNode,
                                   recomm_nodename,
                                   weight=prob *
                                   graphConstants.SMOOTHNESS_EDGE_WEIGHT)
                    else:
                        G[trainNode][recomm_nodename][
                            'weight'] = G[trainNode][recomm_nodename][
                                'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT
                    count = count + 1

            #print smoothness[testDoc]
        graphUtils.logger.info(
            "Simple graph Smoothness completed for normalGoogle today. Stats follow"
        )
        graphUtils.logger.info("mini =" + str(mini))
        graphUtils.logger.info("maxi =" + str(maxi))
        graphUtils.logger.info("Smoothness edges count =" + str(count))
        nx.write_gexf(G, write_graph_file)
        graphUtils.saveSettings(graphConstants.LAST_GRAPH_SMOOTHNESS_DIR,
                                todayDate)
        pass

    if todayDate != lastSuggSmoothnessDate:
        testFiles, testFileName = graphUtils.findSuggRecommFiles()
        testCorpus = graphUtils.findCorpus(testFiles)
        testUniqueWords = []
        mini = 100
        maxi = -1
        count = 0
        smoothness = zeros((len(testCorpus), len(trainCorpus)))
        for testText in testCorpus:
            testUniqueWords.append(set(testText))
        for testDoc in range(len(testCorpus)):
            recomm_nodename = testFileName[testDoc]
            uniqueTest = testUniqueWords[testDoc]
            SminusDcontext = zeros(bm25obj.N)
            DminusScontext = zeros(bm25obj.N)
            for trainDoc in range(len(trainCorpus)):
                uniqueTrain = trainUniqueWords[trainDoc]
                SminusD = [
                    word for word in trainCorpus[trainDoc]
                    if word not in uniqueTest
                ]
                DminusS = [
                    word for word in testCorpus[testDoc]
                    if word not in uniqueTrain
                ]
                SminusDcontext = bm25obj.BM25Score(SminusD)
                DminusScontext = bm25obj.BM25Score(DminusS)
                smoothness[testDoc][trainDoc] = np.dot(SminusDcontext,
                                                       DminusScontext)
            dict_arr = {
                key: value
                for (key, value) in enumerate(smoothness[testDoc])
            }
            sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1))
            sorted_x.reverse()
            sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE]
            total = sum([pair[1] for pair in sorted_x])
            for (idxsim, val) in sorted_x:
                prob = val / total
                if recomm_nodename not in G.nodes():
                    G.add_node(recomm_nodename)
                    G.node[recomm_nodename]['type'] = graphConstants.TYPE_SUGG
                trainNode = trainFileNames[idxsim]
                if trainNode in G.nodes():
                    if prob < mini:
                        mini = prob
                    if prob > maxi:
                        maxi = prob
                    if G.has_edge(recomm_nodename, trainNode) is False:
                        G.add_edge(recomm_nodename,
                                   trainNode,
                                   weight=prob *
                                   graphConstants.SMOOTHNESS_EDGE_WEIGHT)
                    else:
                        G[recomm_nodename][trainNode][
                            'weight'] = G[recomm_nodename][trainNode][
                                'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT

                    if G.has_edge(trainNode, recomm_nodename) is False:
                        G.add_edge(trainNode,
                                   recomm_nodename,
                                   weight=prob *
                                   graphConstants.SMOOTHNESS_EDGE_WEIGHT)
                    else:
                        G[trainNode][recomm_nodename][
                            'weight'] = G[trainNode][recomm_nodename][
                                'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT
                    count = count + 1

            #print smoothness[testDoc]
        graphUtils.logger.info(
            "Simple graph Smoothness completed for suggGoogle today. Stats follow"
        )
        graphUtils.logger.info("mini =" + str(mini))
        graphUtils.logger.info("maxi =" + str(maxi))
        graphUtils.logger.info("Smoothness edges count =" + str(count))
        nx.write_gexf(G, write_graph_file)
        graphUtils.saveSettings(graphConstants.LAST_GRAPH_SUGG_SMOOTHNESS_DIR,
                                todayDate)
        pass
Example #58
0
    add_annotations("subjects", references_article_grouped, g)
    add_annotations("authors", references_article_grouped, g)
    add_annotations("institutions", references_article_grouped, g)
    add_annotations("keywords", references_article_grouped, g)
    add_annotations("countries", references_article_grouped, g)
    del references_article_grouped
    if CONFIG["report_verbose"]: print "have now %s nodes" % len(g.nodes())

    if not os.path.exists(CONFIG["output_directory"]):
        os.mkdir(CONFIG["output_directory"])

    if CONFIG["export_ref_annotated_format"] == "gexf":
        if CONFIG["process_verbose"]: print "write gexf export"
        networkx.write_gexf(
            g,
            os.path.join(CONFIG["output_directory"],
                         "%s_annotated.gexf" % span))
    elif CONFIG["export_ref_annotated_format"] == "edgelist":
        if CONFIG["process_verbose"]: print "write csv export"
        networkx.write_weighted_edgelist(g,
                                         os.path.join(
                                             CONFIG["output_directory"],
                                             "%s_annotated.csv" % span),
                                         delimiter="\t")
    elif CONFIG["export_ref_annotated_format"] == "pajek":
        if CONFIG["process_verbose"]: print "write pajek export"
        networkx.write_pajek(
            g,
            os.path.join(CONFIG["output_directory"],
                         "%s_annotated.net" % span))
    elif CONFIG["export_ref_annotated_format"] == "graphml":
Example #59
0
def main(argv):

    parser = argparse.ArgumentParser(description='convert Celera(R) Assembler\'s \"best.edges\" to a gexf graph file')

    parser.add_argument('-g','--gkp_store', help='CA gkp_store directory, (celera-assembler.gkpStore)', default="celera-assembler.gkpStore")
    parser.add_argument('-t','--tig_store', help='CA tig_store directory, (celera-assembler.tigStore)', default="celera-assembler.tigStore")
    parser.add_argument('-b','--best_edge', help='CA best edge file, (./4-unitigger/best.edges)', default="./4-unitigger/best.edges")
    parser.add_argument('-c','--csv_data',  help='file containing arbitrary data in csv format', required=False)
    parser.add_argument('-o','--output',    help='output gexf file, (output.gexf)', default="output")

    args = parser.parse_args()

    gkp_store = args.gkp_store
    tig_store = args.tig_store
    best_edge = args.best_edge
    csv = args.csv_data
    output = args.output

    G=nx.DiGraph()
    frg_to_tig = {}
    cout = {}
    args = shlex.split("tigStore -g %s -t %s 2 -D unitiglist" % (gkp_store, tig_store ))
    out = subprocess.check_output(args)
    out = out.split("\n")
    for l in out:
        l = l.strip().split()
        if len(l) == 0: continue
        if l[0] == "maID": continue
        unitig_id = int(l[0])

        os.system("tigStore -g %s -t %s 2 -d frags -u %d > frag_list" % ( gkp_store, tig_store, unitig_id) )

        args = shlex.split( "tigStore -g %s -t %s 2 -d frags -u %d" % ( gkp_store, tig_store, unitig_id) )
        f_out = subprocess.check_output(args)
        f_out = f_out.split("\n")
        for l in f_out:
            """FRG    1453 179419,182165"""
            l = l.replace(",", " ")
            l = l.strip().split()
            if len(l) == 0: continue
            frg_id = l[1]
            frg_to_tig[frg_id] = unitig_id
    if(csv):
        with open(csv) as fin:
            for l in fin:
                l = l.strip().split(",")
                contig, cov, size, ref = l
                cout[contig] = size, cov, ref


    with open(best_edge) as f:
        for l in f:
            if l[0] == "#": continue
            l = l.strip().split()
            id1, lib_id, best5, o1, best3, o3, j1, j2 = l
    #        id1, lib_id, best5, o1, best3, o3 = l
            try:
                G.add_node(id1, label="utg%s" % frg_to_tig[id1], size=int(cout["unitig_%s"%frg_to_tig[id1]][0]), cov=float(cout["unitig_%s"%frg_to_tig[id1]][1]),ref=(cout["unitig_%s"%frg_to_tig[id1]][2]))
            except KeyError:
                G.add_node(id1, label="utg%s" % frg_to_tig[id1], size=int(0), cov=float(0))
            if best5 != "0":
                G.add_edge(best5, id1)
            if best3 != "0":
                G.add_edge(id1, best3)

    output_gexf = "%s.gexf" % output
    nx.write_gexf(G, output_gexf)
Example #60
0
    def get_embeddedness(self):

        DG1 = nx.MultiDiGraph()  # for only following graph
        DG2 = nx.Graph() # for mention and following graph (undirected)
        DG_2 = nx.MultiDiGraph()  # FOR GEPHI: for mention and following graph

        ################
        # Get list of scientist and other public users a node (could be either scientist or public) follows
        ################

        print("Getting following dict ...")

        t_start = time.time()

        following_dict = {}

        for n in range(1, 21):
            lines = open(path_to_following_list_folder + str(n) + '.csv', 'r').readlines()

            for line in lines:
                spline = line.rstrip('\n').split(',')

                key = spline[0]
                value = []

                for n in range(1, len(spline) - 1, 2):
                    value.append([spline[n], spline[n + 1]])

                following_dict[key] = (value)

        print("Length of following dict: " + str(len(following_dict)))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")


        ###################
        # create graph for following_list_dict
        ###################

        t_start = time.time()

        lines = open(path_to_combined_mention_edges, 'r').readlines()

        edges_pos = []
        edges_neg = []

        for line in lines:
            spline = line.rstrip('\n').split(',')

            if spline[2] == 'pos':
                edges_pos.append([spline[0], spline[1]])

            if spline[2] == 'neg':
                edges_neg.append([spline[0], spline[1]])

        print()
        print("Length of positive edges (mentions): " + str(len(edges_pos)))
        print("Length of negative edges (mentions):  " + str(len(edges_neg)))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")

        print ()
        print ("Getting nodes (mention)...")

        t_start = time.time()

        nodes_temp = []  # get number of nodes from mention graph, to be used to check with final nodes count (should be the same!)

        for ep in edges_pos:

            if ep[0] not in nodes_temp:
                nodes_temp.append(ep[0])

            if ep[1] not in nodes_temp:
                nodes_temp.append(ep[1])

        for en in edges_neg:

            if en[0] not in nodes_temp:
                nodes_temp.append(en[0])

            if en[1] not in nodes_temp:
                nodes_temp.append(en[1])

        print("Length of nodes (from mention graph): " + str(len(nodes_temp)))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")


        print ()
        print ("Getting edges (mentions+following, incl.intra-group)...")

        edges_pos_2 = []  # to store only following edges

        for key, value in following_dict.items():

            for v in value:

                edges_pos.append([key, v[1]])
                edges_pos_2.append([key, v[1]])

        #print (edges_pos_2)

        print()
        print("Length of positive edges (following, incl. intra-group): " + str(len(edges_pos_2)))

        print()
        print("Length of positive edges (mentions+following): " + str(len(edges_pos)))
        print("Length of negative edges (mentions+following):  " + str(len(edges_neg)))
        print("Length of total edges (mentions+following):  " + str(len(edges_neg) + len(edges_pos)))

        # get nodes

        print ()
        print ("Getting nodes (mention+following)...")

        t_start = time.time()

        nodes = []

        for ep in edges_pos:

            if ep[0] not in nodes:
                nodes.append(ep[0])

            if ep[1] not in nodes:
                nodes.append(ep[1])

        for en in edges_neg:

            if en[0] not in nodes:
                nodes.append(en[0])

            if en[1] not in nodes:
                nodes.append(en[1])

        print("Length of nodes (for mention+following graph): " + str(len(nodes)))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")

        print ()
        print ("Getting nodes (following, incl. intra-group)...")

        t_start = time.time()

        nodes_1 = []  # for only following graph

        for ep in edges_pos_2:

            if ep[0] not in nodes_1:
                nodes_1.append(ep[0])

            if ep[1] not in nodes_1:
                nodes_1.append(ep[1])

        print ("Length of nodes (following, incl. intra-group): ",len(nodes_1))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")

        print ()
        print ("Creating GEPHI graph (following, incl. intra-group)...")

        t_start = time.time()

        DG1.add_edges_from(edges_pos_2, sign='+')
        DG1.add_nodes_from(nodes_1)

        nx.write_gexf(DG1, path_to_store_following_graph) #includes intra-public and intra-scientist group following!

        DG2.add_edges_from(edges_pos, sign='+')
        DG2.add_edges_from(edges_neg, sign='-')
        DG2.add_nodes_from(nodes)

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")


        # ----------------------
        # CREATE GEPHI GRAPH:
        # sum up signs (if > 0 then positive, if < 0 then negative) if parallel edges exist (ONLY for Gephi, as it doesn't accept parallel edges)
        # if the sum is zero, default to negative

        # print ()
        # print ("Creating GEPHI graph (mention+following, incl. intra-group)...")
        #
        # t_start = time.time()
        #
        # edges_all = edges_pos + edges_neg
        #
        # edges_set = set(map(tuple, edges_all))  # result: {[1,2], [3,4]}
        # edges_unique_tuple = list(edges_set)  # result: [(1,2), (3,4)]
        # edges_unique = [list(eu) for eu in edges_unique_tuple]  # convert list of tuples to list of list
        #
        # print()
        # print("Length of unique edges(before): " + str(len(edges_unique)))
        # # print (edges_unique)
        #
        # edges_unique_sum_pos = []
        # edges_unique_sum_neg = []
        # edges_unique_dict = {}
        #
        # for eu in edges_unique:
        #     count_pos = edges_pos.count(eu)
        #     count_neg = edges_neg.count(eu)
        #     count = count_pos - count_neg
        #
        #     if count > 0:
        #         edges_unique_sum_pos.append([eu[0], eu[1], count])
        #         edges_unique_dict[eu[0], eu[1]] = 'pos'
        #
        #     if count <= 0:
        #         edges_unique_sum_neg.append([eu[0], eu[1], count])
        #         edges_unique_dict[eu[0], eu[1]] = 'neg'
        #
        # print("Length of unique edges(after): " + str(len(edges_unique_sum_pos) + len(edges_unique_sum_neg)))
        #
        # for eup in edges_unique_sum_pos:
        #     DG_2.add_edges_from([(eup[0], eup[1])], sign='+', sentiment=eup[2])
        #
        # for eun in edges_unique_sum_neg:
        #     DG_2.add_edges_from([(eun[0], eun[1])], sign='-', sentiment=eun[2])
        #
        # nx.write_gexf(DG_2, path_to_store_combined_mention_and_following_graph)
        #
        # t_end = time.time()
        # total_time = round(((t_end - t_start) / 60), 2)
        # print("Computing time was " + str(total_time) + " minutes.")
        #
        # # write to file (so that don't have to recreate this when creating triad as it takes a freaking long time)
        #
        # edges_unique_list = []
        #
        # for key,value in edges_unique_dict.items():
        #     key = list(key)
        #     key.append(value)
        #     edges_unique_list.append(key)
        #
        # print ()
        # print ("Writing unique edges, following edges and mentions+following edges to file...")
        #
        # f = open(path_to_store_unique_edges_file, 'w')
        #
        # for eu in edges_unique_list:
        #     f.write(','.join(eu)+'\n')
        #
        # f.close()

        # -------------------------

        # write to file

        # f = open(path_to_store_following_edges, 'w')
        #
        # for ep in edges_pos_2:
        #     f.write(','.join(ep) + ',pos' + '\n')
        #
        # f.close()
        #
        # f = open(path_to_store_combined_mentions_and_following_edges, 'w')
        #
        # for ep in edges_pos:
        #     f.write(','.join(ep) + ',pos' + '\n')
        #
        # for en in edges_neg:
        #     f.write(','.join(en) + ',neg' + '\n')
        #
        # f.close()

        #------------------------
        # get common neighbours (embeddedness)

        print()
        print ('------------------------')
        print ("Getting common neighbours (embeddedness) ...")

        t_start = time.time()

        lines = open(path_to_trust_links_file,'r').readlines()

        embeddedness = []
        embedded_node_list = []


        for line in lines:
            spline = line.rstrip('\n').split(',')

            embedded_list = []

            common_neighbours = sorted(nx.common_neighbors(DG2, spline[0],spline[1]))

            embeddedness.append([spline[0],spline[1],str(len(common_neighbours))])
            embedded_list.append(spline[0])
            embedded_list.append(spline[1])

            for n in range(len(common_neighbours)):
                embedded_list.insert(n+2,common_neighbours[n])

            embedded_node_list.append(embedded_list)

        #print (embeddedness)
        #print (embedded_node_list)
        print (len(embedded_node_list))

        t_end = time.time()
        total_time = round(((t_end - t_start) / 60), 2)
        print("Computing time was " + str(total_time) + " minutes.")

        f = open(path_to_store_embeddedness,'w')

        for e in embeddedness:
            f.write(','.join(e)+'\n')

        f.close()

        f = open(path_to_store_embeddeded_node_list, 'w')

        for e in embedded_node_list:
            f.write(','.join(e) + '\n')

        f.close()