def main():
    """
    Pre-processing: 
        load data, compute centrality measures, write files with node data
    """
    print(nx.__version__)
    # Load network data, create storage dict, and extract main component
    depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    depends.name="depends"
    suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    suggests.name="suggests"
    imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    imports.name="imports"
    nets_dict={"depends":depends,"suggests":suggests,"imports":imports}
    for k in nets_dict.keys():
        main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes()
        nets_dict[k]=nx.subgraph(nets_dict[k],main_component)
    
    # Run multiple measures on graphs and normalize weights
    measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank]
    for g in nets_dict.values():
        multiple_measures(g,measure_list)
        normalize_weights(g)
        
    # Output networks in GraphML format (to store node attributes)
    for i in nets_dict.items():
        # print(i[1].edges(data=True))
        nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml")
        print("")
    print("All files written with data")
    
    """Visualization:
Exemple #2
0
def filterNet(DG,mindegree=None,indegree=100,outdegree=50,outdegreemax=9999999,indegreemax=999999):
	print 'In filterNet'
	filter=[]
	for n in DG:
		if outdegreemax==None or DG.out_degree(n)<=outdegreemax:
			if mindegree!=None:
				if DG.degree(n)>=mindegree:
					filter.append(n)
			else:
				if indegree!=None:
					if DG.in_degree(n)>=indegree:
						filter.append(n)
				if outdegree!=None:
					if DG.out_degree(n)>=outdegree:
						filter.append(n)
	#the filter represents the intersect of the *degreesets
	#indegree and outdegree values are ignored if mindegree is set
	filter=set(filter)
	H=DG.subgraph(filter)
	#Superstitiously, perhaps, make sure we only grab nodes that project edges...
	filter= [n for n in H if H.degree(n)>0]
	L=H.subgraph(filter)
	print "Filter set:",filter
	print L.order(),L.size()
	L=labelGraph(L,filter)
	nx.write_graphml(L, projname+"/followersCommonFriends.graphml")
	nx.write_edgelist(L, projname+"/followersCommonFriends.txt",data=False)
Exemple #3
0
def main():
    files = []
    for i in range(1,26): 
        files.append("db/Minna_no_nihongo_1.%02d.txt" % i)
    for i in range(26,51): 
        files.append("db/Minna_no_nihongo_2.%02d.txt" % i)


    words = get_words_from_files(files)

    G=nx.Graph()

    for w in words:
        G.add_node(w)
        G.node[w]['chapter'] = words[w]['chapter']
        G.node[w]['kana'] = words[w]['kana']
        G.node[w]['meaning'] = words[w]['meaning'][:-1]

    for word1, word2 in itertools.combinations(words,2):
        for w1 in word1[:-1]:
            #print w1.encode('utf-8')
            #print ud.name(w1)
            if "CJK UNIFIED" in ud.name(w1) and w1 in word2:
                #print word1.encode('utf-8'), word2.encode('utf-8')
                G.add_edge(word1, word2)
                break
    
    #G = nx.connected_component_subgraphs(G)
    G = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)
    #print len(G)
    #nx.draw(G)
    nx.write_graphml(G[0], "kanjis.graphml", encoding='utf-8', prettyprint=True)
Exemple #4
0
    def save_graph(self, DirectoryPath_str, FileName_str):
        '''This function saves the graph in a map in .graphml format.
        '''


        nx.write_graphml(self.GrapherObject_gr.get_graph(),
        DirectoryPath_str + FileName_str + '.graphml')
Exemple #5
0
def create_joined_multigraph():
    G=nx.DiGraph()
    upp=nx.read_graphml('upperlevel_hashtags.graphml')
    for ed in upp.edges(data=True):
        G.add_edge(ed[0],ed[1],attr_dict=ed[2])
        G.add_edge(ed[1],ed[0],attr_dict=ed[2])
    mid=nx.read_graphml('friendship_graph.graphml')
    for ed in mid.edges(data=True):
        G.add_edge(ed[0],ed[1],attr_dict=ed[2]) 
    inter=nx.read_graphml('interlevel_hashtags.graphml')
    for ed in inter.edges(data=True):
        G.add_edge(ed[0],ed[1],attr_dict=ed[2]) 
        G.add_edge(ed[1],ed[0],attr_dict=ed[2])
    down=nx.read_graphml('retweet.graphml')
    mapping_f={}
    for i,v in enumerate(down.nodes()):
        mapping_f[v]='%iretweet_net' %i
    for ed in down.edges(data=True):
        G.add_edge(mapping_f[ed[0]],mapping_f[ed[1]],attr_dict=ed[2]) 

    for nd in mid.nodes():
        if nd in mapping_f:
            G.add_edge(nd,mapping_f[nd])
            G.add_edge(mapping_f[nd],nd)
    nx.write_graphml(G,'joined_3layerdigraph.graphm')
    return G,upp.nodes(),mid.nodes(),mapping_f.values()
Exemple #6
0
def ministro_ministro(G):
    """
    Cria um grafo de ministros conectados de acordo com a sobreposição de seu uso da legislação
    Construido a partir to grafo ministro_lei
    """
    GM = nx.Graph()
    for m in G:
        try:
            int(m)
        except ValueError:# Add only if node is a minister
            if m != "None":
                GM.add_node(m.decode('utf-8'))
#    Add edges
    for n in GM:
        for m in GM:
            if n == m: continue
            if GM.has_edge(n,m) or GM.has_edge(m,n): continue
            # Edge weight is the cardinality of the intersection each node neighbor set.
            w = len(set(nx.neighbors(G,n.encode('utf-8'))) & set(nx.neighbors(G,m.encode('utf-8')))) #encode again to allow for matches
            if w > 5:
                GM.add_edge(n,m,{'weight':w})
    # abreviate node names
    GMA = nx.Graph()
    GMA.add_weighted_edges_from([(o.replace('MIN.','').strip(),d.replace('MIN.','').strip(),di['weight']) for o,d,di in GM.edges_iter(data=True)])
    P.figure()
    nx.draw_spectral(GMA)
    nx.write_graphml(GMA,'ministro_ministro.graphml')
    nx.write_gml(GMA,'ministro_ministro.gml')
    nx.write_pajek(GMA,'ministro_ministro.pajek')
    nx.write_dot(GMA,'ministro_ministro.dot')
    return GMA
def export_graph(G, write_filename):
    write_dir = "./output/" + write_filename + "/"
    if not os.path.isdir(write_dir):
        os.mkdir(write_dir)


    # Remove pho edge weights
    for n1 in G.edge:
        for n2 in G.edge[n1]:
            G.edge[n1][n2]={}
    print("\twriting gml")
    for node in G.nodes_iter():
        for key, val in list(G.node[node].items()):
            G.node[node][key]=int(val)
    nx.write_gml(G, write_dir + write_filename + ".gml")
    print("\twriting graphml")
    nx.write_graphml(G, write_dir + write_filename + ".graphml")
    print("\twriting edgelist")
    f = open(write_dir + write_filename + ".edgelist","w")
    for edge in G.edges_iter():
        f.write("\t".join([str(end) for end in list(edge)[:2]])+"\n")
    f.close()
    f = open(write_dir + write_filename + ".nodelist","w")
    print("\twriting nodelist")
    f.write("\t".join(["node_id"] + node_attributes) + "\n")
    for node in G.nodes_iter():
        f.write("\t".join([str(node)] + [str(G.node[node][attribute]) for attribute in node_attributes]) + "\n")
def createMergedGraph(groupSampleDict, processedDataDir, rawModelDir):

    print 'Merging genomes from specified taxonomic group'

# Loop over the keys of the dictionary, one for each group
    for group in groupSampleDict:

# Create an empty graph object
        mergedGraph = nx.DiGraph()

# Read in the graph of the group and merge with the graph from the previous
# iteration
        for sample in groupSampleDict[group]:

# Read in adjacency list and convert to digraph object
            myDiGraph = nx.read_adjlist(rawModelDir+'/'+sample+'/'+sample+'AdjList.txt',
                                create_using=nx.DiGraph())

# Append to the previous graph
            mergedGraph = nx.compose(mergedGraph, myDiGraph)

# Check that the proper output directory exists. It not, create it.
        if not os.path.exists(processedDataDir+'/'+group):
            os.makedirs(processedDataDir+'/'+group)

        nx.write_adjlist(mergedGraph, processedDataDir+'/'+group+'/'+group+'AdjList.txt')
        nx.write_graphml(mergedGraph, processedDataDir+'/'+group+'/'+group+'Graph.xml')

    return
Exemple #9
0
def lei_vs_lei(nedges=None):
    """
    Grafo de todas com todas (leis)
    """
    # Verão original Flávio comentada
    # curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where  peso >300 and lei_id_2>2')
    # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 20 and lei_id_1 = 1 and lei_id_2 <= 20 limit 0,1000')
    # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 8 and lei_id_1 <= 20 and lei_id_2 <= 20 limit 0,1000')
    curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where lei_count <= 10 and lei_id_1 <= 50 and lei_id_2 <= 200 limit 0,10000')
    if not nedges:
        res = curgrafo.fetchall()
        nedges = len(res)
    else:
        res = curgrafo.fetchmany(nedges)
    eds = [(i[0],i[3],i[6]) for i in res]
    G = nx.Graph()
    #eds = [i[:3] for i in res]
    G.add_weighted_edges_from(eds)
    print "== Grafo Lei_Lei =="
    print "==> Order: ",G.order()
    print "==> # Edges: ",len(G.edges())
    # Adding attributes to nodes
    for i in res:
        G.node[i[0]]['esfera'] = i[1]
        G.node[i[0]]['lei'] = i[2]
        G.node[i[3]]['esfera'] = i[4]
        G.node[i[3]]['lei'] = i[5]
    nx.write_graphml(G,'lei_lei.graphml')
    nx.write_gml(G,'lei_lei.gml')
    nx.write_pajek(G,'lei_lei.pajek')
    nx.write_dot(G,'lei_lei.dot')
    return G,res
Exemple #10
0
def produce_graph(cutoff, transform):
    G = nx.Graph()
    allusers = set()
    for c in user_relations:
        allusers.add(c[0])
        allusers.add(c[1])
    for u in allusers:
        G.add_node(u, weight = user_influence[u])
    user_top = {}
    for c in user_relations:
        user1 = c[0]
        user2 = c[1]
        score = user_relations[c]
        if user1 not in user_top:
            user_top[user1] = {}
        if user2 not in user_top:
            user_top[user2] = {}
        user_top[user1][user2] = score
    for u in user_top:
        top5 = dict(sorted(user_top[u].items(), key=lambda x: x[1], reverse=True)[:5])
        for j in top5:
            G.add_edge(u, j, weight = user_top[u][j])
            
##        user_top[c[0]]
##        if user_relations[c] > cutoff:
##            G.add_edge(c[0], c[1], weight = transform(user_relations[c]))
    nx.write_graphml(G, 'graph' + time.strftime("%Y-%m-%d-%H%M%S", time.gmtime()) + '.graphml')
    def generate_graph(self, result_only=False):
        media_id = self.media_id

        print 'running', media_id
        self.comments = []

        self.current_media = me.MediaHelper.get_media(media_id)
        self.comments = co.CommentHelper.get_comment(media_id)
        self.total_comments = len(self.comments)

        self.G = nx.DiGraph()
        self.G.add_node(self.current_media.user_id(), username=self.current_media.username(),
                   link=self.current_media.link(), tags=self.current_media.tags())

        self.add_comment(self.current_media.user_id(), 0)
        # code.interact(local=locals())

        self.calculate_influence(self.current_media.user_id())
        self.calculate_total_normalised_influence()
        self.calculate_no_of_normalised_influence()

        if not result_only:
            self.output_script_file()
            self.output_csv_file()

            nx.draw(self.G)
            plt.show(block=False)
            plt.savefig(self.IMAGE_FILENAME.format(media_id), format="PNG")

            filename = self.FILENAME.format(media_id)
            nx.write_graphml(self.G, filename)

        self.log_result()
Exemple #12
0
    def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"):
        """
        Save the celltype-to-celltype connectivity information in a file.
        
        filename -- path of the file to be saved.

        format -- format to save in. Using GML as GraphML support is
        not complete in NetworkX.  

        """
        start = datetime.now()
        if format == "gml":
            nx.write_gml(self.__celltype_graph, filename)
        elif format == "yaml":
            nx.write_yaml(self.__celltype_graph, filename)
        elif format == "graphml":
            nx.write_graphml(self.__celltype_graph, filename)
        elif format == "edgelist":
            nx.write_edgelist(self.__celltype_graph, filename)
        elif format == "pickle":
            nx.write_gpickle(self.__celltype_graph, filename)
        else:
            raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format))
        end = datetime.now()
        delta = end - start
        config.BENCHMARK_LOGGER.info(
            "Saved celltype_graph in file %s of format %s in %g s"
            % (filename, format, delta.seconds + delta.microseconds * 1e-6)
        )
        print "Saved celltype connectivity graph in", filename
    def test_write_read_attribute_numeric_type_graphml(self):
        from xml.etree.ElementTree import parse

        G = self.attribute_numeric_type_graph
        fh = io.BytesIO()
        nx.write_graphml(G, fh, infer_numeric_types=True)
        fh.seek(0)
        H = nx.read_graphml(fh)
        fh.seek(0)

        assert_equal(sorted(G.nodes()), sorted(H.nodes()))
        assert_equal(sorted(G.edges()), sorted(H.edges()))
        assert_equal(sorted(G.edges(data=True)),
                     sorted(H.edges(data=True)))
        self.attribute_numeric_type_fh.seek(0)

        xml = parse(fh)
        # Children are the key elements, and the graph element
        children = xml.getroot().getchildren()
        assert_equal(len(children), 3)

        keys = [child.items() for child in children[:2]]

        assert_equal(len(keys), 2)
        assert_in(('attr.type', 'double'), keys[0])
        assert_in(('attr.type', 'double'), keys[1])
Exemple #14
0
    def save_graph(self, graphname, fmt='edgelist'):
        """
        Saves the graph to disk

        **Positional Arguments:**

                graphname:
                    - Filename for the graph

        **Optional Arguments:**

                fmt:
                    - Output graph format
        """
        self.g.graph['ecount'] = nx.number_of_edges(self.g)
        g = nx.convert_node_labels_to_integers(self.g, first_label=1)
        if fmt == 'edgelist':
            nx.write_weighted_edgelist(g, graphname, encoding='utf-8')
        elif fmt == 'gpickle':
            nx.write_gpickle(g, graphname)
        elif fmt == 'graphml':
            nx.write_graphml(g, graphname)
        else:
            raise ValueError('edgelist, gpickle, and graphml currently supported')
        pass
def graph_constructed(path, name):
    list_line = load_file(path, name)

    list_source_nodes = []
    list_target_nodes = []

    DG = nx.DiGraph()
    for line in list_line:
        split_line = line.strip().split('\t')
        target = split_line[0]
        source = split_line[1]
        type = split_line[2]
        weight = int(split_line[3])

        if (source not in list_source_nodes):
            list_source_nodes.append(source)
            DG.add_node(source, type_ = type)

        if (target not in list_target_nodes):
            list_target_nodes.append(target)
            DG.add_node(source, type_ = 'target')

        DG.add_weighted_edges_from([(source, target, weight)])

    DG.nodes(data=True)
    nx.write_graphml(DG,'d:/' + name.replace('.csv', '') + '.graphml')
Exemple #16
0
def output_graph(mps, mp_data, edges):

  G=nx.Graph()

  # Define the nodes
  for mp in mps:
    G.add_node(mp, label=mp_data[mp]["name"], party=mp_data[mp]["party"], constituency=mp_data[mp]["constituency"])

  # Process all known edges
  for (mp_tuple,agr_data) in edges.items():

    agreements = agr_data[0]
    agreement_rate = agr_data[2]

    # Depending on the selection criteria, filter out relationships
    if agreement_rate < 85:
      continue    

    # Determine a (normalized) weight, again depending on the desired graph
    # edge_wt = agreements
    range_min = 85
    range_max = 100
    weight_base = agreement_rate - range_min
    edge_wt = ( float(weight_base) / float(range_max - range_min) )

    G.add_edge(mp_tuple[0],mp_tuple[1], agreement=agreement_rate, weight=edge_wt )

  nx.write_graphml(G, "mp_agreement.graphml")
Exemple #17
0
def nC_network_to_graphml(project, graphml_file_path='test.graphml'):
    # extract cell position records
    cell_positions = [(pos_record.x_pos, pos_record.y_pos, pos_record.z_pos) for pos_record in project.generatedCellPositions.getAllPositionRecords()]

    # create graph object
    graph = nx.Graph()
    graph.add_nodes_from(range(len(cell_positions)))

    # add node properties for positions. Permute x,y,z to get a nicer
    # default visualisation in Gephi when opening the resulting
    # graphml file.
    for k, position in enumerate(cell_positions):
        graph.node[k]['x'] = position[2]
        graph.node[k]['y'] = position[0]
        graph.node[k]['z'] = position[1]

    # add edges
    for conn_name in project.generatedNetworkConnections.getNamesNonEmptyNetConns():
        conns = project.generatedNetworkConnections.getSynapticConnections(conn_name)
        assert len(conns) == 1
        conn = conns[0]
        source = conn.sourceEndPoint.cellNumber
        target = conn.targetEndPoint.cellNumber
        graph.add_edge(source, target, weight=conn.props[0].weight)

    # save to disk
    nx.write_graphml(graph, graphml_file_path)
    
    return graph
Exemple #18
0
 def test_preserve_multi_edge_data(self):
     """
     Test that data and keys of edges are preserved on consequent
     write and reads
     """
     G = nx.MultiGraph()
     G.add_node(1)
     G.add_node(2)
     G.add_edges_from([
         # edges with no data, no keys:
         (1, 2),
         # edges with only data:
         (1, 2, dict(key='data_key1')),
         (1, 2, dict(id='data_id2')),
         (1, 2, dict(key='data_key3', id='data_id3')),
         # edges with both data and keys:
         (1, 2, 103, dict(key='data_key4')),
         (1, 2, 104, dict(id='data_id5')),
         (1, 2, 105, dict(key='data_key6', id='data_id7')),
     ])
     fh = io.BytesIO()
     nx.write_graphml(G, fh)
     fh.seek(0)
     H = nx.read_graphml(fh, node_type=int)
     assert_edges_equal(
         G.edges(data=True, keys=True), H.edges(data=True, keys=True)
     )
     assert_equal(G._adj, H._adj)
Exemple #19
0
def filterNet(DG,mindegree):
	if addUserFriendships==1:
		DG=addFocus(DG,user,typ)
	mindegree=int(mindegree)
	filter=[]
	filter= [n for n in DG if DG.degree(n)>=mindegree]
	H=DG.subgraph(filter)
	print "Filter set:",filter
	print H.order(),H.size()
	LH=labelGraph(H,filter)

	now = datetime.datetime.now()
	ts = now.strftime("_%Y-%m-%d-%H-%M-%S")
  
	nx.write_graphml(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".graphml"]))

	nx.write_edgelist(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".txt"]),data=False)
	#delimiter=''

	#indegree=sorted(nx.indegree(DG).values(),reverse=True)
	indegree=H.in_degree()
	outdegree=H.out_degree()

	inout = [indegree, outdegree]
	inoutpair = {}
	for k in indegree.iterkeys():
		inoutpair[k] = tuple(inoutpair[k] for inoutpair in inout)
    
	fig = plt.figure()
	ax = fig.add_subplot(111)
	#ax.plot(indegree,outdegree, 'o')
	#ax.set_title('Indegree vs outdegree')
	degree_sequence=sorted(indegree.values(),reverse=True)
	plt.loglog(degree_sequence)
	plt.savefig( '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+"outdegree_histogram.png"]))
def main():
    arg_parser = ArgumentParser(description='add edge weights to tree')
    arg_parser.add_argument('--input', required=True,
                            help='inpput file')
    arg_parser.add_argument('--output', required=True,
                            help='outpput file')
    arg_parser.add_argument('--seed', type=int, default=None,
                            help='seed for random number generator')
    arg_parser.add_argument('--delim', dest='delimiter', default=' ',
                            help='delimiter for edge list')
    arg_parser.add_argument('--no-data', action='store_true',
                            dest='no_data', help='show edge data')
    arg_parser.add_argument('--edge-list', action='store_true',
                            help='generate edge list output')
    options = arg_parser.parse_args()
    random.seed(options.seed)
    tree = nx.read_graphml(options.input)
    add_edge_weights(tree)
    if options.edge_list:
        nx.write_edgelist(tree, options.output,
                          delimiter=options.delimiter,
                          data=not options.no_data)
    else:
        nx.write_graphml(tree, options.output)
    return 0
Exemple #21
0
 def graphMLfromCSV(self, csvfile, filter):     
 
    filter_score = float(filter)
    if filter_score is False:
       filter_score = float(0.0)
       
    csv = self.csv.csvaslist(csvfile)      
    graphfile = csvfile.split('.', 1)[0] + "-graph.xml"      
    nodes = []      
    
    for line in csv:
       #we should have a correctly formatted CSV to work with
       if 'score' in line and 'source' in line and 'target' in line:
          source = line[u'source']
          target = line[u'target']
          score = float(line[u'score']) / 100
          if score >= filter_score:
             if source not in nodes:
                self.G.add_node(source)
                nodes.append(source)
             if target not in nodes:
                self.G.add_node(target)
                nodes.append(target)
             self.G.add_edge(source,target,weight=score)
             print score
    nx.write_graphml(self.G, graphfile)
def main():    
    universe = nx.read_graphml(sys.argv[1])
    
    beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True))
    clients = filter(lambda x: x[1]["type"] == "client", universe.nodes(data=True))
    firm = filter(lambda x: x[1]["type"] == "firm", universe.nodes(data=True))        
    print len(beings)
    print len(clients)
    print len(firm)
    
    for b in beings:
        ns = nx.neighbors(universe,b[0])
        rep = ns[0]
        for n in ns[1:]:
            for nn in nx.neighbors(universe,n):
                universe.add_edge(rep,nn) #doesn't preserve directions or properties, yolo
            universe.remove_node(n)
        universe.remove_node(b[0])
        
    beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True))
    clients = filter(lambda x: x[1]["type"] == "client", universe.nodes(data=True))
    firm = filter(lambda x: x[1]["type"] == "firm", universe.nodes(data=True))        
    print len(beings)
    print len(clients)
    print len(firm)
            
    nx.write_graphml(universe,"simplified-{}.graphml".format(int(time.time())))
def finish_graph():
	nx.draw(g)
	file_name='host_hops1.png'
	file_nameg='host_hops1.graphml'
	plt.savefig(file_name)
	nx.write_graphml(g, file_nameg)
	print ("Graph Drawn")
Exemple #24
0
    def build_graph(self):
        self.node_list = [self.like_minded_friend, self.books_reco_from, self.movies_reco_from, self.music_reco_from,
                          self.sport_reco_from, 'Read a book? Ask me', 'Watch a movie? Ask me', 'Sing along with',
                          'Sports arena?', 'Similar tastes?']
        #self.node_list.append(self.friends_likes[self.like_minded_friend][:10])
        self.node_list.append(self.user_name)

        self.G.add_nodes_from(self.node_list)

        self.G.add_edge(self.user_name, 'Similar tastes?', color = 'purple')
        self.G.add_edge('Similar tastes?', self.like_minded_friend,color = 'purple' )

        self.G.add_edge(self.user_name, 'Read a book? Ask me', color = 'blue')
        self.G.add_edge('Read a book? Ask me', self.books_reco_from, color = 'blue')

        self.G.add_edge(self.user_name, 'Watch a movie? Ask me', color = 'green')
        self.G.add_edge('Watch a movie? Ask me', self.movies_reco_from, color = 'green')

        self.G.add_edge(self.user_name, 'Sing along with', color = 'yellow')
        self.G.add_edge('Sing along with', self.music_reco_from, color = 'yellow')

        self.G.add_edge(self.user_name, 'Sports arena?', color = 'orange')
        self.G.add_edge('Sports arena?', self.sport_reco_from, color = 'orange')

        self.G.add_edge(self.user_name, 'Pages you might like!', color = 'red')
        for node in self.friends_likes[self.like_minded_friend][:10]:
            self.G.add_edge('Pages you might like', node, color = 'red')


        nx.write_graphml(self.G, 'FBViz' + ".graphml")
Exemple #25
0
    def start(self):
        for id in self.oidRootNamePairs:
            self.oidNamePairs,currIDs=Utils.getoidNames(self.oidNamePairs,id,Def.typ)
            Utils.report('Processing current IDs: '+str(currIDs))
            flip=(Def.typ=='fr')
            self.addDirectedEdges(id, currIDs,flip=flip)
            n=len(currIDs)
            Utils.report('Total amount of IDs: '+str(n))
            c=1
            for cid in currIDs:
                Utils.report('\tSub-level run: getting '+Def.typ2,str(c)+'of'+str(n)+Def.typ+cid)
                self.oidNamePairs,ccurrIDs=Utils.getoidNames(self.oidNamePairs,cid,Def.typ2)
                self.addDirectedEdges( cid, ccurrIDs)
                c=c+1
        for id in self.oidRootNamePairs:
            if id not in self.oidNamePairs:
                self.oidNamePairs[id]=self.oidRootNamePairs[id]
        self.labelNodes(self.oidNamePairs)
        Utils.report(nx.info(self.DG))

        now = datetime.datetime.now()
        timestamp = now.strftime("_%Y-%m-%d-%H-%M-%S")

        fname=UserID._name.replace(' ','_')
        nx.write_graphml(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".graphml"]))
        nx.write_edgelist(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".txt"]),data=False)
 def draw_base_graph(self):
     print 'writing base graphml ...'
     G = nx.Graph()
     G.add_nodes_from(xrange(self.num_nodes))
     G.add_edges_from(self.E_base)
     nx.write_graphml(G,'exodus.graphml')
     print 'done ... (load in Gephi)'
Exemple #27
0
def main():
    
    print "time_evol module is the main code."
    ## to import a network of 3-node example
    EDGE_FILE = 'C:\Boolean_Delay_in_Economics\Manny\EDGE_FILE.dat'
    NODE_FILE = 'C:\Boolean_Delay_in_Economics\Manny\NODE_FILE.dat'
    
    net = inet.read_network_from_file(EDGE_FILE, NODE_FILE)
    nodes_list = inet.build_nodes_list(NODE_FILE)
    '''
    ## to obtain time series data for all possible initial conditions for 3-node example network
    timeSeriesData = ensemble_time_series(net, nodes_list, 2, 10)#, Nbr_States=2, MAX_TimeStep=20)
    initState = 1
    biStates = decimal_to_binary(nodes_list, initState)
    print 'initial state', biStates
    
    ## to print time series data for each node: a, b, c starting particualr decimal inital condition 1
    print 'a', timeSeriesData['a'][1]
    print 'b', timeSeriesData['b'][1]
    print 'c', timeSeriesData['c'][1]
    '''
    
    ## to obtain and visulaize transition map in the network state space
    decStateTransMap = net_state_transition(net, nodes_list)
    nx.write_graphml(decStateTransMap,'C:\Boolean_Delay_in_Economics\Manny\Results\BDE.graphml')
    '''
Exemple #28
0
def create_graph():
    G = nx.Graph()
    data = []
    reposts = []
    client = MongoClient()
    db = client["vk_db"]
    collection = db["valid_communities_info"]
    result = collection.find()
    for res in result:
        data.append(res)

    db=client["reposts"]
    collection = db["general"]
    answer = collection.find()
    for res in answer:
        reposts.append(res)

    for each in data:
        G.add_node(each["screen_name"])
        G.node[each["screen_name"]]['weight'] = each["weight"]

    for each in reposts:
        G.add_edge(get_name_by_id(each["owner"]), get_name_by_link(each["link"]), weight=each["times"])

    nx.write_graphml(G,'vk.graphml')
Exemple #29
0
 def to_file(self, name_graphml='AST2NX.graphml'):
     """ write to a graphml file which can be read by a lot of professional visualization tools such as Cytoscape.
     """
     if name_graphml.endswith('.graphml'):
         nx.write_graphml(self.NetworkX, name_graphml)
     else:
         nx.write_graphml(self.NetworkX, name_graphml + '.graphml')
def get_topology():
    G = nx.Graph()
    G.add_node("poi-1", packetloss=0.0, ip="0.0.0.0", geocode="US", bandwidthdown=17038, bandwidthup=2251, type="net", asn=0)
    G.add_edge("poi-1", "poi-1", latency=50.0, jitter=0.0, packetloss=0.05)
    s = StringIO()
    nx.write_graphml(G, s)
    return s.getvalue()
 def saveGraph(self):
     networkx.write_graphml(self.Graph,
                            self.get_abs_file_path(self.GraphFile))
Exemple #32
0
def model_calcs(networks, args):
    """
    Function for generating null models and carrying out calculations.
    :param networks: Dictionary with folder name as key and values as tuples (name, network object).
    :param args: Settings for running anuran
    :return:
    """
    if args['core'] < 1:
        args['core'] = 1
        logger.info("Setting cores for multiprocessing to 1.")
    # export intersections
    for size in args['size']:
        for group in networks:
            shared_edges = _intersection(networks[group], float(size), sign=args['sign'], edgelist=True)
            g = _construct_intersection(networks[group], shared_edges)
            nx.write_graphml(g, args['fp'] + '_' + group + '_' + str(size) + '_intersection.graphml')
    # first generate null models
    try:
        random, degree = generate_null(networks, n=args['perm'], npos=args['gperm'], core=args['core'], fraction=args['cs'],
                                       prev=args['prev'])
    except Exception:
        logger.error('Could not generate null models!', exc_info=True)
        sys.exit()
    set_sizes = None
    try:
        set_sizes = generate_sizes(networks, random, degree, core=args['core'],
                                   sign=args['sign'],
                                   fractions=args['cs'], prev=args['prev'],
                                   perm=args['nperm'], sizes=args['size'])
        set_sizes.to_csv(args['fp'] + '_sets.csv')
        set_differences = generate_size_differences(set_sizes, sizes=args['size'])
        set_differences.to_csv(args['fp'] + '_set_differences.csv')
        logger.info('Set sizes exported to: ' + args['fp'] + '_sets.csv')
    except Exception:
        logger.error('Failed to calculate set sizes!', exc_info=True)
        sys.exit()
    centralities = None
    if args['centrality']:
        try:
            centralities = generate_ci_frame(networks, random, degree,
                                             fractions=args['cs'], prev=args['prev'],
                                             perm=args['nperm'], core=args['core'])
            centralities.to_csv(args['fp'] + '_centralities.csv')
            logger.info('Centralities exported to: ' + args['fp'] + '_centralities.csv')
        except Exception:
            logger.error('Could not rank centralities!', exc_info=True)
            sys.exit()
    if args['network']:
        try:
            graph_properties = generate_graph_frame(networks, random, degree,
                                                    fractions=args['cs'], core=args['prev'],
                                                    perm=args['nperm'])
            graph_properties.to_csv(args['fp'] + '_graph_properties.csv')
            logger.info('Graph properties exported to: ' + args['fp'] + '_graph_properties.csv')
        except Exception:
            logger.error('Could not estimate graph properties!', exc_info=True)
            sys.exit()
    samples = None
    if args['sample']:
        try:
            samples = generate_sample_sizes(networks, random, degree,
                                            sign=args['sign'], core=args['core'],
                                            fractions=args['cs'], perm=args['nperm'], prev=args['prev'],
                                            sizes=args['size'], limit=args['sample'], number=args['number'])
            samples.to_csv(args['fp'] + '_subsampled_sets.csv')
            logger.info('Subsampled set sizes exported to: ' + args['fp'] + '_subsampled_sets.csv')
        except Exception:
            logger.error('Failed to subsample networks!', exc_info=True)
            sys.exit()
    central_stats = None
    if args['stats']:
        if args['stats'] == 'True':
            args['stats'] = True
        # add code for pvalue estimation
        set_stats = compare_set_sizes(set_sizes)
        set_stats.to_csv(args['fp'] + '_set_stats.csv')
        difference_stats = compare_set_sizes(set_differences)
        difference_stats.to_csv(args['fp'] + '_difference_stats.csv')
        if args['centrality'] and centralities is not None:
            central_stats = compare_centralities(centralities, mc=args['stats'])
            central_stats.to_csv(args['fp'] + '_centrality_stats.csv')
        if args['network']:
            graph_stats = compare_graph_properties(graph_properties)
            graph_stats.to_csv(args['fp'] + '_graph_stats.csv')
    # check if there is an order in the filenames
    for group in networks:
        prefixes = [x[0].split('_')[0] for x in networks[group]]
        try:
            prefixes = [int(x) for x in prefixes]
        except ValueError:
            pass
        if all(isinstance(x, int) for x in prefixes):
            centrality_correlation = correlate_centralities(group, centralities, mc=args['stats'])
            centrality_correlation.to_csv(args['fp'] + '_centrality_correlation.csv')
            graph_correlation = correlate_graph_properties(group, graph_properties)
            graph_correlation.to_csv(args['fp'] + '_centrality_correlation.csv')
    if args['draw']:
        try:
            for x in networks:
                subset_sizes = set_sizes[set_sizes['Group'] == x]
                draw_sets(subset_sizes, args['fp'] + '_' + x)
                subset_differences = set_differences[set_differences['Group'] == x]
                draw_set_differences(subset_differences, args['fp'] + '_' + x)
                if args['centrality']:
                    subset_centralities = centralities[centralities['Group'] == x]
                    draw_centralities(subset_centralities, args['fp'] + '_' + x)
                if args['sample']:
                    subset_samples = samples[samples['Group'] == x]
                    draw_samples(subset_samples, args['fp'] + '_' + x)
                if args['network']:
                    subset_graphs = graph_properties[graph_properties['Group'] == x]
                    draw_graphs(subset_graphs, args['fp'] + '_' + x)
        except Exception:
            logger.error('Could not draw data!', exc_info=True)
            sys.exit()
    if central_stats is not None:
        return central_stats
if __name__ == "__main__":

    # File containing all tweets data
    fname = 'data/stream_.jsonl'

    if len(sys.argv) == 2 and len(sys.argv[1]) > 0:
        term = sys.argv[1]
        export_fname = 'data/' + term + '_co_occurrences.csv'
        tuples = analyze_co_occurrence(fname=fname)
        filtered_tuples = filter_tuples_containing_term(tuples=tuples,
                                                        term=term)
        export_co_occurrence(term=term,
                             tuples=filtered_tuples,
                             export_fname=export_fname)
        tuples = filtered_tuples
    else:
        print("No term provided for analysis ...")
        print("Going to analyze all the terms and their co-occurrences.")
        print(
            "Export file will contain all the tuples with their co-occurrence count."
        )
        export_fname = 'data/all_co_occurrences.csv'
        tuples = analyze_co_occurrence(fname=fname)
        export_co_occurrence(term='', tuples=tuples, export_fname=export_fname)

    # Directed Graph
    export_fname = 'data/co_occurrence.graphml'
    digraph = co_occurrence_network(tuples)
    nx.write_graphml(digraph, export_fname)
    print("Co-occurrence Graph is exported at [%s]" % export_fname)
Exemple #34
0
G = nx.Graph()
#manually adding the start nodes
G.add_node('#newyork')
G.add_node('#dog')
#will crawl the 2 main hashtags
for url, main_tag in urls:
    #making a variable of the function return
    queue = hasher(url)
    #keeping visited updated, to not visit it twice
    visited.append(url)
    #loop through the list of hashtags that will be crawled
    for link, tag in queue:
        page = main_url+link
        if page not in visited and len(visited) <= 20:
            #add node for all 20 hashtags found in newyork
            G.add_node(tag)
            #add edge from all found hashtags to newyork
            G.add_edge(main_tag,tag)
            visited.append(page)
            #will go into the already found hashtags to find their own hashtags
            new_queue = hasher(page)
            #adding nodes from a list of hashtags
            G.add_nodes_from([tag for link, tag in  new_queue])
            #same with edges
            for link, taggy in new_queue:
                G.add_edge(tag, taggy)
        else:
            pass

nx.write_graphml(G, "newyorkdog.graphml")
Exemple #35
0
#Edge attributes
g.edge["Alan"]["Bob"]["relationship"] = "Friends"
g.edge["Carol"]["Denise"]["relationship"] = "Friends"
g.edge["Carol"]["Bob"]["relationship"] = "Married"

#New edge with an attribute
g.add_edges_from([["Carol", "Alan", {"relationship": "Friends"}]])

for e in g.edges_iter():
    n1 = e[0]
    n2 = e[1]
    print("{0} and {1} are {2}".format(n1, n2, g.edge[n1][n2]["relationship"]))

#Save g to the file my_graph.graphml in graphml format
#prettyprint will make it nice for a human to read
nx.write_graphml(g, "my_graph.graphml", prettyprint=True)
#Layout g with the Fruchterman-Reingold force-directed
#algorithm and save the result to networkx_graph.png
#with_labels will label each node with its id

nx.draw_spring(g, with_labels=True)
plt.savefig("networkx_graph.png")
plt.clf()  #Clear plot

print 'Information about the graph'
print nx.info(g)
#print g.number_of_nodes()
print g.nodes(data=True)
#print g.number_of_edges()
print g.edges(data=True)
print 'Average degree of nodes'
Exemple #36
0
    print("Getting map from service: ", map_service_name)
    rospy.wait_for_service(map_service_name)

    graph_file = rospy.get_param("~graph_file", None)
    map_msg = rospy.ServiceProxy(map_service_name, GetMap)().map
    map_info = map_msg.info

    spaceDimension = 3

    if spaceDimension == 3:
        bases = [2,3,5]

    lower = numpy.array([map_info.origin.position.x, map_info.origin.position.y,0.0])
    upper = numpy.array([map_info.origin.position.x+map_info.resolution*map_info.width, map_info.origin.position.y+map_info.resolution*map_info.height, 2*numpy.pi])

    # Settings
    halton_points = 2 # TODO: Set this appropriately
    disc_radius = 1 # TODO: Set this appropriately
    print(disc_radius)

    for i in range(1):
        print i
        numpy.random.seed()
        offset = numpy.random.random_sample(spaceDimension,)
        riskmapFile = 'haltonGraph.graphml'

        # Generate the graph
        print 'Generating the graph'
        G = euclidean_halton_graph(halton_points, disc_radius, bases, lower, upper, None, None, map_msg)
        nx.write_graphml(G, riskmapFile)
Exemple #37
0
        dists.append(dist)

# Sort the list by ascending distance
dists.sort(key=lambda _tuple: _tuple[-1])

# Get the top connections
top_conns = dists[:num_top_conns]

# Make a network
g = nx.Graph()
for word1, word2, dist in top_conns:
    weight = 1 - dist  # cosine similarity for weight
    g.add_edge(word1, word2, weight=float(weight))

# Write the network
nx.write_graphml(
    g, "./semanticNetwork/semanticNetwork.graphml")  # Readable by Gephi

A = nx.adjacency_matrix(g)
adjmat = A.todense()

numpy.savetxt("./semanticNetwork/semanticNetworkAdjmat.txt",
              adjmat,
              delimiter=' ')

###########################
# reload and clean text without lemmatization and without
# spell-checking to leave words as original as possible

for ID in IDs:  # loop through papers
    print(ID)
    with open(ID) as paper:
    # Retrieve new node to process
    node = queue.pop()
    # BFS execution

    # Don't process an already visited node
    if node in visited:
        continue

    else:
        visited.add(node)
        comments_df = apiParser(node, min_utc, max_utc, comments_df)

        # Process df to find links to other subreddits
        comments_df = matchFinder(comments_df, node)

        # Verify there are edges to be added to graph
        if len(comments_df) == 0:
            continue
        else:
            network, queue = graphAdder(comments_df, network, node, queue)

    time.sleep(0.1)

else:
    print('queue empty')

# Write network to disk
nx.write_graphml(
    network, 'network_{one}_{two}.graphml'.format(one=origin_node,
                                                  two=min_utc))
def findCommunites(threshold=0.5, sector=None, k=5, force=False):
    th = re.sub(r'([0-9]*)\.([0-9]*)', r'\1\2', str(threshold))
    if sector != None:
        graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_" + sector + "_th" + th + ".xml"
        graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_" + sector + "_th" + th + "_k" + str(
            k) + ".xml"
        outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_" + sector + "_th" + th + "_k" + str(
            k) + ".csv"
    else:
        graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_th" + th + ".xml"
        graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx" + "_th" + th + "_k" + str(
            k) + ".xml"
        outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx" + "_th" + th + "_k" + str(
            k) + ".csv"

    print "reading graph from file: ", graphInFilename
    print "writing graph with community info to file: ", outFilename
    print "writing community details in csv format to file: ", outFilename

    if force or not isfile(graphOutFilename):
        g = nx.read_graphml(graphInFilename)
        #freq = findFreqOfCliquesInGraph(g)
        #plotHistFromDict(freq)

        comm = nx.k_clique_communities(g, k)
        communities = []
        for c in comm:
            communities.append(c)

        numCommunities = len(communities)
        print "number of communities found: ", numCommunities

        colors = range(numCommunities)

        i = 0
        for c in communities:
            for v in c:
                g.node[v]['cluster'] = colors[i] + 1
            i += 1

        nx.write_graphml(g, graphOutFilename)

        import csv
        with open(outFilename, "wb") as f:
            writer = csv.writer(f,
                                delimiter='|',
                                quotechar="'",
                                quoting=csv.QUOTE_MINIMAL)
            writer.writerow(["sector", "symbol", "name", "cluster"])
            for v in g:
                writer.writerow([
                    g.node[v]['sector'], g.node[v]['symbol'],
                    g.node[v]['name'], g.node[v]['cluster']
                ])

        results = PROCESSED_FILE_LOC + "results.csv"
        with open(results, "a") as f1:
            f1.write(
                str(dt.datetime.today()) + "," + outFilename + "," +
                str(numCommunities) + "," +
                str(calculateModularity(graphOutFilename)) + "\n")

        drawGraph(graphOutFilename, "gt")
Exemple #40
0
        final_G.node[node]['voltages'] = str(final_G.node[node]['voltages'])

# throw away isolated components (this step is optional)
removed_nodes = list()
components = sorted(nx.connected_components(final_G), key=len, reverse=True)
for component_idx in range(1, len(components)):
    isolated_component = components[component_idx]
    print('isolated component {} = {}'.format(component_idx,
                                              isolated_component))
    removed_nodes.extend(isolated_component)
    final_G.remove_nodes_from(isolated_component)
print('node count without isolated components = {}'.format(
    final_G.number_of_nodes()))

# export graph in GraphML format
nx.write_graphml(final_G, parsed_graph_fpath)

# draw the final graph

pos = dict()
first_it = True
for node in final_G.nodes():
    x = final_G.node[node]['x']
    y = final_G.node[node]['y']
    pos[node] = [x, y]

    if first_it is True:
        x_min = x
        y_min = y
        x_max = x
        y_max = y
thres = np.percentile(weights, 99.9)
fTopicGraph = prune(topicGraph, thres)
F = nx.DiGraph(A)
for (u, v) in A.edges_iter():
    has_edge = False
    if bool(term2top[u].intersection(term2top[v])):
        F.remove_edge(u, v)
        continue
    for i in term2top[u]:
        for j in term2top[v]:
            if fTopicGraph.has_edge(i, j):
                has_edge = True
    if has_edge == False:
        F.remove_edge(u, v)

nx.write_graphml(F, '/scratch/balash/pruned-graph.graphml')

inv_top = word2topic()
G = utils.read_pickle(
    '/scratch/balash/final-output/lasso/lasso_alpha_5_6_network')
H = topic_subgraphs(G, inv_top)
utils.write_pickle(H, '/scratch/balash/final-output/topic_subgraphs')

#Interesting subgraphs
for i in range(100):
    if W[i].number_of_nodes() < 40 and W[i].number_of_edges() > 5:
        print(i, W[i].number_of_nodes(), W[i].number_of_edges())

import networkx as nx

from bokeh.io import show, output_file
Exemple #42
0
nx.relabel_nodes(nut_network, labels, copy=False)

nx.relabel_nodes(nut_network, nutr_def.to_dict()['NutrDesc'], copy=False)

#Finds modularity of best partitions, describes clusters.
partition = community.best_partition(nut_network)
print("Modularity:", community.modularity(partition, nut_network))

HOW_MANY = 10


def describe_cluster(x):
    # x is a frame; select the matching rows from "domain"
    rows = nut_data.ix[x.index]
    # Calculate row sums, sort them, get the last HOW_MANY
    top_N = rows.sum(axis=1).sort_values(ascending=False)[:HOW_MANY]
    # What labels do they have?
    return top_N.index.values


word_clusters = pd.DataFrame({"part_id": pd.Series(partition)})
results = word_clusters.groupby("part_id").apply(describe_cluster)
_ = [print("--", "; ".join(r.tolist())) for r in results]

#Saves to file.
if not os.path.isdir("results"):
    os.mkdir("results")
with open("results/nut_data.graphml", "wb") as ofile:
    nx.write_graphml(nut_network, ofile)
Exemple #43
0
    def _run_interface(self, runtime):
        print("================================================")
        print(" > Creation of rs-fMRI connectome maps")
        print("   .. BOLD file :" + self.inputs.func_file)
        print("   .. parcellation : %s" % self.inputs.parcellation_scheme)
        print("================================================")

        fdata = nib.load(self.inputs.func_file).get_data()
        tp = fdata.shape[3]

        if self.inputs.parcellation_scheme != "Custom":
            if self.inputs.parcellation_scheme == "NativeFreesurfer":
                resolutions = get_parcellation(self.inputs.parcellation_scheme)
            else:  # Lausanne2018
                resolutions = get_parcellation(self.inputs.parcellation_scheme)
                for parkey, parval in list(resolutions.items()):
                    for vol, graphml in zip(self.inputs.roi_volumes, self.inputs.roi_graphmls):
                        if parkey in vol:
                            roi_fname = vol
                        if parkey in graphml:
                            roi_graphml_fname = graphml
                    roi = nib.load(roi_fname)
                    roiData = roi.get_data()
                    resolutions[parkey]["number_of_regions"] = roiData.max()
                    resolutions[parkey]["node_information_graphml"] = os.path.abspath(roi_graphml_fname)

                del roi, roiData
        else:
            resolutions = self.inputs.atlas_info

        # loop throughout all the resolutions ('scale33', ..., 'scale500')
        for parkey, parval in list(resolutions.items()):
            print("------------------------------------------------")
            print("Resolution = " + parkey)
            print("------------------------------------------------")

            # Open the corresponding ROI
            for vol in self.inputs.roi_volumes:
                if (parkey in vol) or (len(self.inputs.roi_volumes) == 1):
                    roi_fname = vol

            roi = nib.load(roi_fname)
            mask = roi.get_data()

            # Compute average time-series
            print("  ************************************************")
            print("  >> Compute average rs-fMRI signal for each cortical ROI ")
            nROIs = parval["number_of_regions"]  # number of ROIs for current resolution

            # matrix number of rois vs timepoints
            ts = np.zeros((nROIs, tp), dtype=np.float32)

            # loop throughout all the ROIs (current resolution)
            for i in range(1, nROIs + 1):
                ts[i - 1, :] = fdata[mask == i].mean(axis=0)

            # Save average roi time-series
            np.save(os.path.abspath("averageTimeseries_%s.npy" % parkey), ts)
            sio.savemat(os.path.abspath("averageTimeseries_%s.mat" % parkey), {"ts": ts})

            # Create graph, add node information from parcellation and recover ROI indexes
            print("  ************************************************")
            print("  >> Load %s to initialize graph " % parval["node_information_graphml"])
            G = nx.Graph()
            gp = nx.read_graphml(parval["node_information_graphml"])
            ROI_idx = []
            for u, d in gp.nodes(data=True):
                G.add_node(int(u))
                for key in d:
                    G.nodes[int(u)][key] = d[key]
                # Compute a position for the node based on the mean position of the
                # ROI in voxel coordinates (segmentation volume )
                G.nodes[int(u)]["dn_position"] = tuple(
                    np.mean(np.where(mask == int(d["dn_multiscaleID"])), axis=1)
                )
                ROI_idx.append(int(d["dn_multiscaleID"]))

            # Apply scrubbing (if enabled)
            if self.inputs.apply_scrubbing:
                print("  ************************************************")
                print("  >> Apply scrubbing")
                # Load scrubbing FD and DVARS series
                FD = np.load(self.inputs.FD)
                DVARS = np.load(self.inputs.DVARS)
                # Evaluate scrubbing mask
                FD_th = self.inputs.FD_th
                DVARS_th = self.inputs.DVARS_th
                FD_mask = np.array(np.nonzero(FD < FD_th))[0, :]
                DVARS_mask = np.array(np.nonzero(DVARS < DVARS_th))[0, :]
                index = np.sort(np.unique(np.concatenate((FD_mask, DVARS_mask)))) + 1
                index = np.concatenate(([0], index))
                log_scrubbing = (
                    "  .. INFO: DISCARDED time points after scrubbing: "
                    + str(FD.shape[0] - index.shape[0] + 1)
                    + " over "
                    + str(FD.shape[0] + 1)
                )
                print(log_scrubbing)
                np.save(os.path.abspath("tp_after_scrubbing.npy"), index)
                sio.savemat(os.path.abspath("tp_after_scrubbing.mat"), {"index": index})
                ts_after_scrubbing = ts[:, index]
                np.save(
                    os.path.abspath(
                        "averageTimeseries_%s_after_scrubbing.npy" % parkey
                    ),
                    ts_after_scrubbing,
                )
                sio.savemat(
                    os.path.abspath(
                        "averageTimeseries_%s_after_scrubbing.mat" % parkey
                    ),
                    {"ts": ts_after_scrubbing},
                )
                ts = ts_after_scrubbing

            # Compute pairwise ROI time-series correlation
            print("  ************************************************")
            print("  >> Compute pairwise ROI time-series correlation")
            nnodes = ts.shape[0]
            i = -1
            for i_signal in ts:
                i += 1
                for j in range(i, nnodes):
                    j_signal = ts[j, :]
                    value = np.corrcoef(i_signal, j_signal)[0, 1]
                    G.add_edge(ROI_idx[i], ROI_idx[j])
                    G[ROI_idx[i]][ROI_idx[j]]["corr"] = value

            # Get the edge attributes/keys/weights from the first edge and then break.
            # Change w.r.t networkx2
            edge_keys = []
            for _, _, d in G.edges(data=True):
                edge_keys = list(d.keys())
                break

            # Save the computed connectivity matrix
            print("  ************************************************")
            print("  >> Save functional connectome map as:")

            print("    - connectome_%s.tsv" % parkey)
            with open("connectome_%s.tsv" % parkey, "w") as out_file:
                tsv_writer = csv.writer(out_file, delimiter="\t")
                header = ["source", "target"]
                header = header + [key for key in edge_keys]
                tsv_writer.writerow(header)

            with open("connectome_%s.tsv" % parkey, "ab") as out_file:
                nx.write_edgelist(
                    G,
                    out_file,
                    comments="#",
                    delimiter="\t",
                    data=edge_keys,
                    encoding="utf-8",
                )

            # storing network
            if "gPickle" in self.inputs.output_types:
                print("    - connectome_%s.gpickle" % parkey)
                nx.write_gpickle(G, "connectome_%s.gpickle" % parkey)

            if "mat" in self.inputs.output_types:
                print("    - connectome_%s.mat" % parkey)
                edge_struct = {}
                for edge_key in edge_keys:
                    edge_struct[edge_key] = nx.to_numpy_matrix(G, weight=edge_key)

                # Number of ROIs (nodes)
                size_nodes = len(list(G.nodes()))

                # Get the node attributes/keys from the first node and then break.
                # Change w.r.t networkx2
                for u, d in G.nodes(data=True):
                    node_keys = list(d.keys())
                    break

                node_struct = {}
                for node_key in node_keys:
                    if node_key == "dn_position":
                        node_arr = np.zeros([size_nodes, 3], dtype=np.float)
                    else:
                        node_arr = np.zeros(size_nodes, dtype=np.object_)
                    node_n = 0
                    for _, node_data in G.nodes(data=True):
                        node_arr[node_n] = node_data[node_key]
                        node_n += 1
                    node_struct[node_key] = node_arr

                sio.savemat("connectome_%s.mat" % parkey, mdict={"sc": edge_struct, "nodes": node_struct})

            if "graphml" in self.inputs.output_types:
                print("    - connectome_%s.graphml" % parkey)
                g2 = nx.Graph()
                # Create graph nodes
                for u_gml, d_gml in G.nodes(data=True):
                    g2.add_node(u_gml)
                    g2.nodes[u_gml]["dn_multiscaleID"] = d_gml["dn_multiscaleID"]
                    g2.nodes[u_gml]["dn_fsname"] = d_gml["dn_fsname"]
                    g2.nodes[u_gml]["dn_hemisphere"] = d_gml["dn_hemisphere"]
                    g2.nodes[u_gml]["dn_name"] = d_gml["dn_name"]
                    g2.nodes[u_gml]["dn_position_x"] = d_gml["dn_position"][0]
                    g2.nodes[u_gml]["dn_position_y"] = d_gml["dn_position"][1]
                    g2.nodes[u_gml]["dn_position_z"] = d_gml["dn_position"][2]
                    g2.nodes[u_gml]["dn_region"] = d_gml["dn_region"]
                # Create graph edges
                for u_gml, v_gml, d_gml in G.edges(data=True):
                    g2.add_edge(u_gml, v_gml)
                    g2[u_gml][v_gml]["corr"] = float(d_gml["corr"])
                # Save the graph
                nx.write_graphml(g2, "connectome_%s.graphml" % parkey)

        print("[ DONE ]")
        return runtime
def createGraph(threshold=0.5, sector=None, lib="nx", force=False):
    #sectors = pd.read_json("sector_industry_company.json")
    sectors = pd.read_csv(SECTOR_INFO_FILE)

    th = re.sub(r'([0-9]*)\.([0-9]*)', r'\1\2', str(threshold))
    if sector != None:
        filename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "corr_matrix_" + sector + ".json"
        outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_" + lib + "_" + sector + "_th" + th + ".xml"
        #industry = sectors[sectors['sector_name'] == sector]
        industry = sectors[sectors['Sector'] == sector]
    else:
        filename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "corr_matrix.json"
        outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_" + lib + "_th" + th + ".xml"
        industry = sectors

    print "reading correlation matrix from file: ", filename
    print "writing graph to file: ", outFilename

    if force or not isfile(outFilename):
        #company = dict(zip(industry['company_symbol'], zip(industry['company_name'], industry['sector_name'])))
        company = dict(
            zip(industry['Symbol'], zip(industry['Name'], industry['Sector'])))
        #print company

        corrMat = pd.read_json(filename)
        #print corrMat.head()
        symbols = corrMat.columns
        numStocks = len(symbols)

        if lib == "nx":
            g = nx.Graph()
            for i, sym in enumerate(symbols):
                cluster = 0  #randint(1, 5)
                if sym in company:
                    companyName, sectorName = company.get(sym)
                else:
                    companyName, sectorName = None, None
                if companyName == None or len(companyName) == 0:
                    companyName = "Unavailable"
                if sectorName == None or len(sectorName) == 0:
                    sectorName = "Unavailable"
                g.add_node(i,
                           symbol=sym,
                           name=companyName,
                           sector=sectorName,
                           cluster=cluster)

            for i in range(numStocks):
                for j in range(i + 1, numStocks):
                    w = corrMat[symbols[i]][symbols[j]]
                    if abs(w) >= threshold:
                        #print "adding edge: (", symbols[i], ",", symbols[j], ",", w, ")"
                        g.add_edge(i, j, weight=float(w))

            print g.number_of_nodes(), g.number_of_edges()
            nx.write_graphml(g, outFilename)

        elif lib == "gt":
            g = Graph(directed=False)
            g.add_vertex(numStocks)

            v_symbol = g.new_vertex_property("string")
            g.vp.symbol = v_symbol
            v_name = g.new_vertex_property("string")
            g.vp.name = v_name
            v_cluster = g.new_vertex_property("int")
            g.vp.cluster = v_cluster
            for i in range(numStocks):
                v = g.vertex(i)
                g.vp.symbol[v] = symbols[i]
                g.vp.name[v] = company.get(symbols[i])
                g.vp.cluster[v] = 0

            e_weight = g.new_edge_property("double")
            g.ep.weight = e_weight

            for i in range(numStocks - 1):
                for j in range(i + 1, numStocks):
                    w = corrMat[symbols[i]][symbols[j]]
                    if abs(w) >= threshold:
                        #print "adding edge: (", symbols[i], ",", symbols[j], ",", w, ")"
                        g.add_edge(i, j)
                        g.ep.weight[g.edge(i, j)] = w

            print g.num_vertices(), g.num_edges()
            g.save(outFilename, fmt="graphml")

        drawGraph(outFilename)
    else:
        g = nx.read_graphml(outFilename)

    getGraphStats(threshold, sector, lib)

    return g
Exemple #45
0
 def write(self, target):
     for k in self.MU.keys():
         nx.write_graphml(self.MU[k],
                          '{0}_topic_{1}.graphml'.format(target, k))

		
	elif(output_file_type=='pickle'):
		
			try:
				G=nx.write_gpickle(G,output_file_path)
		#if the file format isin --Pickle---  write  graph G
				break
			except IOError:
				print("The out put type:"+output_file_type+"please select another output file path\n")

	elif(output_file_type=='graphML'):
		
			try:
				G = nx.write_graphml(G,output_file_path)
		#if the file format isin ---GraphML----  write  graph Gkl
				break
			except IOError:
				print("The out put type:"+output_file_type+"please select another output file path\n")
		
		
	elif(output_file_type=='YAML'):
		
			try:
				G= nx.write_yaml(G,output_file_path)
		#if te file format isin ---YAML---  write  graph G
				break
			except IOError:
				print("The out put type:"+output_file_type+"please select another output file path\n")
		
Exemple #47
0
def main(family, quantile_ass=.99):
    data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data')
    #load a pickle generated from "associate_env.py script"
    store = pickle.load(open(data_folder + '/pickles/' + family + '.pkl',
                             'rb'))

    used_environment = store['used_env'].copy()

    full_freq_m = store['full_freq_m'].copy()

    reactome = store['reactome'].copy()
    model_sample = store['model_sample'].copy()
    transporter = store['transporter'].copy()

    #replace nan values by the average
    av_used_env = np.nanmean(used_environment, 0)
    inds = np.where(np.isnan(used_environment))
    used_environment[inds] = np.take(av_used_env, inds[1])

    #for reaction frequency
    av_freq_m = np.mean(full_freq_m, axis=0)
    diff_freq_m = full_freq_m - av_freq_m

    #filter out noise and find reactions that are driven by the environment
    env_d_score1 = np.round(np.max(diff_freq_m, axis=0), 4)
    env_d_score1 = env_d_score1 / max(np.abs(env_d_score1))
    env_d_score2 = np.round(np.min(diff_freq_m, axis=0), 4)
    env_d_score2 = env_d_score2 / max(np.abs(env_d_score2))
    env_d_score = np.zeros(len(env_d_score1))
    for i in range(len(env_d_score1)):
        if abs(env_d_score2[i]) > abs(env_d_score1[i]):
            env_d_score[i] = env_d_score2[i]
        else:
            env_d_score[i] = env_d_score1[i]

    m_diff_freq_m = np.abs(env_d_score)

    env_driven_reactome = reactome  #[m_diff_freq_m>.005]
    diff_freq_m_envd = diff_freq_m.T  #[m_diff_freq_m>.005].T
    reaction_frequency = full_freq_m.T  #[m_diff_freq_m>.005].T

    clss_freq_m = np.zeros(diff_freq_m_envd.shape)
    for i, v in enumerate(diff_freq_m_envd):
        clss_freq_m[i] = v  #assign_to_rank(v, fpc,fnc)

    #for the environment
    av_used_env = np.mean(used_environment, axis=0)

    diff_used_env = used_environment - av_used_env

    #filter out noise and find metabolites that are driven by the environment
    m_diff_used_env = np.max(np.abs(diff_used_env), axis=0)
    driving_mets = transporter  #[m_diff_used_env>0.005]
    diff_used_env_envd = diff_used_env.T  #[m_diff_used_env>0.005].T
    used_env = used_environment.T  #[m_diff_used_env>0.005].T
    clss_used_env = np.zeros(diff_used_env_envd.shape)

    for i, v in enumerate(diff_used_env_envd):
        clss_used_env[i] = v  #assign_to_rank(v, epc, enc)

    s_clss_fm = np.sum(np.abs(clss_freq_m), axis=0)
    s_clss_ue = np.sum(np.abs(clss_used_env), axis=0)

    #env_driven_reactome

    envd_reactions = env_driven_reactome[s_clss_fm != 0]

    #driving_metabolites
    dm = driving_mets.copy()
    dm = dm[s_clss_ue != 0]

    #profiles
    envd_prof = clss_freq_m.T[s_clss_fm != 0].T
    dm_prof = clss_used_env.T[s_clss_ue != 0].T

    #regression terms
    x = reaction_frequency.T[s_clss_fm != 0].T
    y = used_env.T[s_clss_ue != 0].T

    cosine_dict = {}

    for i, reac in enumerate(envd_prof.T):
        cosine_dict[envd_reactions[i]] = np.array(
            [cosine(reac.flatten(), metab.flatten()) for metab in dm_prof.T])

    cosine_pool = np.array(list(cosine_dict.values())).flatten()
    pc = np.quantile(cosine_pool[cosine_pool > 0], quantile_ass)
    nc = np.quantile(cosine_pool[cosine_pool < 0], 1 - quantile_ass)
    association_d = {}

    for i, reac in enumerate(envd_prof.T):
        v = cosine_dict[envd_reactions[i]]

        association_d[envd_reactions[i]] = assign_to_rank(v, pc, nc)

    g = build_association_network(association_d, envd_reactions, dm)
    nx.write_graphml(
        g,
        os.path.join(
            Path(os.getcwd()).parents[0], 'files', 'networks', family) +
        '.graphml')

    #find metabolite concentrations for models
    from sklearn.linear_model import MultiTaskElasticNetCV as EN
    enet = EN(cv=3, verbose=1, n_jobs=7, max_iter=10000)
    print(x.shape, y.shape)
    mod = enet.fit(x, y)
    evolved_env = np.zeros((len(model_sample), len(dm)))

    for i, mod_prof in enumerate(model_sample):
        print(family, i)
        v = mod_prof[m_diff_freq_m > .005]

        p = mod.predict(v[s_clss_fm != 0].reshape(1, -1))
        p = p.flatten()
        p = p + abs(min(p))
        p = p / max(p)
        evolved_env[i] = p.copy()

    #av_mod_diff = np.arctanh(av_mod_diff)
    met_prof = get_evolved_met_prof(evolved_env, dm, transporter)

    return transporter, met_prof
Exemple #48
0
def cmat(
    intrk,
    roi_volumes=None,
    roi_graphmls=None,
    parcellation_scheme=None,
    compute_curvature=True,
    additional_maps=None,
    output_types=None,
    atlas_info=None,
):
    """Create the connection matrix for each resolution using fibers and ROIs.

    Parameters
    ----------
    intrk : TRK file
        Reconstructed tractogram

    roi_volumes : list
        List of parcellation files for a given parcellation scheme

    roi_graphmls : list
        List of graphmls files that describes parcellation nodes

    parcellation_scheme : ['NativeFreesurfer', 'Lausanne2018', 'Custom']

    compute_curvature : Boolean

    additional_maps : dict
        A dictionary of key/value for each additional map where the value
        is the path to the map

    output_types : ['gPickle','mat','graphml']

    atlas_info : dict
        Dictionary storing information such as path to files related to a
        parcellation atlas / scheme.
    """
    if additional_maps is None:
        additional_maps = {}
    if atlas_info is None:
        atlas_info = {}
    if output_types is None:
        output_types = ["gPickle"]

    print("================================================")
    print(" > Creation of connectome maps")
    print("   .. tractogram :" + intrk)
    print("   .. parcellation : %s" % parcellation_scheme)
    print("================================================")

    # create the endpoints for each fibers
    en_fname = "endpoints.npy"
    en_fnamemm = "endpointsmm.npy"
    curv_fname = "meancurvature.npy"

    fib, hdr = nib.trackvis.read(intrk, False)
    n = len(fib)  # number of fibers

    if parcellation_scheme != "Custom":
        if parcellation_scheme != "Lausanne2018":
            resolutions = get_parcellation(parcellation_scheme)
        else:
            resolutions = get_parcellation(parcellation_scheme)
            for parkey, parval in list(resolutions.items()):
                for vol, graphml in zip(roi_volumes, roi_graphmls):
                    if parkey in vol:
                        roi_fname = vol
                    if parkey in graphml:
                        roi_graphml_fname = graphml

                roi = nib.load(roi_fname)
                roiData = roi.get_data()
                resolutions[parkey]["number_of_regions"] = roiData.max()
                resolutions[parkey]["node_information_graphml"] = op.abspath(
                    roi_graphml_fname
                )

            del roi, roiData
    else:
        resolutions = atlas_info

    # Previously, load_endpoints_from_trk() used the voxel size stored
    # in the track hdr to transform the endpoints to ROI voxel space.
    # This only works if the ROI voxel size is the same as the DSI/DTI
    # voxel size.  In the case of DTI, it is not.
    # We do, however, assume that all of the ROI images have the same
    # voxel size, so this code just loads the first one to determine
    # what it should be
    firstROIFile = roi_volumes[0]
    firstROI = nib.load(firstROIFile)
    roiVoxelSize = firstROI.get_header().get_zooms()

    (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize, True)
    np.save(en_fname, endpoints)
    np.save(en_fnamemm, endpointsmm)

    # Only compute curvature if required
    if compute_curvature:
        meancurv = compute_curvature_array(fib)
        np.save(curv_fname, meancurv)

    streamline_wrote = False
    for parkey, parval in list(resolutions.items()):
        print("------------------------------------------------")
        print("Resolution = " + parkey)
        print("------------------------------------------------")

        # create empty fiber label array
        fiberlabels = np.zeros((n, 2))
        final_fiberlabels = []
        final_fibers_idx = []

        # Open the corresponding ROI:
        # scale1 for lausanne2008/18
        # first volume for nativefreesurfer
        for vol in roi_volumes:
            if (parkey in vol) or (len(roi_volumes) == 1):
                roi_fname = vol
        roi = nib.load(roi_fname)
        roiData = roi.get_data()

        # Create the matrix
        print(
            "  >> Create the connection matrix (%s rois)" % parval["number_of_regions"]
        )

        nROIs = parval["number_of_regions"]
        G = nx.Graph()

        # Add node information from parcellation
        gp = nx.read_graphml(parval["node_information_graphml"])
        n_nodes = len(gp)
        pc = -1
        cnt = -1

        for u, d in gp.nodes(data=True):

            # Percent counter
            cnt += 1
            pcN = int(round(float(100 * cnt) / n_nodes))
            if pcN > pc and pcN % 10 == 0:
                pc = pcN
                print("%4.0f%%" % pc)

            G.add_node(int(u))
            for key in d:
                G.nodes[int(u)][key] = d[key]
            # compute a position for the node based on the mean position of the
            # ROI in voxel coordinates (segmentation volume )
            G.nodes[int(u)]["dn_position"] = tuple(
                np.mean(np.where(roiData == int(d["dn_multiscaleID"])), axis=1)
            )
            G.nodes[int(u)]["roi_volume"] = np.sum(
                roiData == int(d["dn_multiscaleID"])
            )

        dis = 0

        # Prepare: compute the measures
        t = [c[0] for c in fib]
        h = np.array(t, dtype=np.object)

        mmap = additional_maps
        mmapdata = {}
        print("  >> Maps to be processed :")
        for k, v in list(mmap.items()):
            print("     - %s map" % k)
            da = nib.load(v)
            mdata = da.get_data()
            print(mdata.max())
            mdata = np.nan_to_num(mdata)
            print(mdata.max())
            mmapdata[k] = (mdata, da.get_header().get_zooms())

        print("  ************************")
        print("  >> Processing fibers and computing metrics (%s fibers)" % n)
        pc = -1
        for i in range(n):  # n: number of fibers
            # Percent counter
            pcN = int(round(float(100 * i) / n))
            if pcN > pc and pcN % 10 == 0:
                pc = pcN
                print("%4.0f%%" % pc)

            # ROI start => ROI end
            try:
                startvox = np.zeros((3, 1)).astype(int)
                startvox[0] = np.int(endpoints[i, 0, 0])
                startvox[1] = np.int(endpoints[i, 0, 1])
                startvox[2] = np.int(endpoints[i, 0, 2])

                endvox = np.zeros((3, 1)).astype(int)
                endvox[0] = np.int(endpoints[i, 1, 0])
                endvox[1] = np.int(endpoints[i, 1, 1])
                endvox[2] = np.int(endpoints[i, 1, 2])

                # Endpoints from create_endpoints_array
                startROI = int(roiData[startvox[0], startvox[1], startvox[2]])
                endROI = int(roiData[endvox[0], endvox[1], endvox[2]])

            except IndexError:
                print(" .. ERROR: An index error occured for fiber %s. " % i)
                print("           This means that the fiber start or endpoint is outside the volume. Continue.")
                print("           Continue.")
                continue

            # Filter
            if startROI == 0 or endROI == 0:
                dis += 1
                fiberlabels[i, 0] = -1
                continue

            if startROI > nROIs or endROI > nROIs:
                print(" .. ERROR: Start or endpoint of fiber terminate in a voxel which is labeled higher")
                print("           than is expected by the parcellation node information.")
                print("           Start ROI: %i, End ROI: %i" % (startROI, endROI))
                print("           This needs bugfixing!")
                print("           Continue.")
                continue

            # Switch the rois in order to enforce startROI < endROI
            if endROI < startROI:
                tmp = startROI
                startROI = endROI
                endROI = tmp

            # TODO: Refine fibers ending in thalamus
            # if (startROI in thalamic_labels) or (endROI in thalamic_labels):
            # Extract all thalamic nuclei the fiber is passing through
            # Refine start/endROI connecting to the most probable nucleus

            # Update fiber label
            fiberlabels[i, 0] = startROI
            fiberlabels[i, 1] = endROI

            final_fiberlabels.append([startROI, endROI])
            final_fibers_idx.append(i)

            # Add edge to graph
            if G.has_edge(startROI, endROI):
                G[startROI][endROI]["fiblist"].append(i)
            else:
                G.add_edge(startROI, endROI, fiblist=[i])

        print(
            "  ... INFO - Found %i (%f percent out of %i fibers) fibers " % (dis, dis * 100.0 / n, n) +
            "that start or terminate in a voxel which is not labeled. (orphans)"
        )
        print(
            "  ... INFO - Valid fibers: %i (%f percent)"
            % (n - dis, 100 - dis * 100.0 / n)
        )

        # create a final fiber length array
        finalfiberlength = []
        for idx in final_fibers_idx:
            # compute length of fiber
            finalfiberlength.append(length(fib[idx][0]))

        # convert to array
        final_fiberlength_array = np.array(finalfiberlength)

        # make final fiber labels as array
        final_fiberlabels_array = np.array(final_fiberlabels, dtype=np.int32)

        total_fibers = 0
        total_volume = 0
        u_old = -1
        for u, v, d in G.edges(data=True):
            total_fibers += len(d["fiblist"])
            if u != u_old:
                total_volume += G.nodes[int(u)]["roi_volume"]
            u_old = u

        G_out = copy.deepcopy(G)

        # Update edges
        # New connectivity measures can be added here
        # FIXME treat case of self-connection that gives di['fiber_length_mean'] = 0.0
        for u, v, d in G.edges(data=True):
            # Check for diagonal elements that raise an error when the edge is visited a second time
            G_out.remove_edge(u, v)

            if len(list(G[u][v].keys())) == 1:
                di = {"number_of_fibers": len(G[u][v]["fiblist"])}

                # additional measures
                # compute mean/std of fiber measure
                if u <= v:
                    idx = np.where(
                        (final_fiberlabels_array[:, 0] == int(u))
                        & (final_fiberlabels_array[:, 1] == int(v))
                    )[0]
                else:
                    idx = np.where(
                        (final_fiberlabels_array[:, 0] == int(v))
                        & (final_fiberlabels_array[:, 1] == int(u))
                    )[0]

                di["fiber_length_mean"] = float(
                    np.nanmean(final_fiberlength_array[idx])
                )
                di["fiber_length_median"] = float(
                    np.nanmedian(final_fiberlength_array[idx])
                )
                di["fiber_length_std"] = float(np.nanstd(final_fiberlength_array[idx]))

                di["fiber_proportion"] = float(
                    100.0 * (di["number_of_fibers"] / float(total_fibers))
                )

                # Compute density
                # Formula: density = (#fibers / mean_fibers_length) * (2 / (area_roi_u + area_roi_v))
                if di["fiber_length_mean"] > 0.0:
                    di["fiber_density"] = float(
                        (float(di["number_of_fibers"]) / float(di["fiber_length_mean"]))
                        * float(
                            2.0
                            / (
                                G.nodes[int(u)]["roi_volume"]
                                + G.nodes[int(v)]["roi_volume"]
                            )
                        )
                    )
                    di["normalized_fiber_density"] = float(
                        (
                            (float(di["number_of_fibers"]) / float(total_fibers))
                            / float(di["fiber_length_mean"])
                        )
                        * (
                            (2.0 * float(total_volume))
                            / (
                                G.nodes[int(u)]["roi_volume"]
                                + G.nodes[int(v)]["roi_volume"]
                            )
                        )
                    )
                else:
                    di["fiber_density"] = 0.0
                    di["normalized_fiber_density"] = 0.0
                # This is indexed into the fibers that are valid in the sense of touching start
                # and end roi and not going out of the volume
                if u <= v:
                    idx_valid = np.where(
                        (fiberlabels[:, 0] == int(u)) & (fiberlabels[:, 1] == int(v))
                    )[0]
                else:
                    idx_valid = np.where(
                        (fiberlabels[:, 0] == int(v)) & (fiberlabels[:, 1] == int(u))
                    )[0]

                for k, vv in list(mmapdata.items()):
                    val = []
                    for i in idx_valid:
                        # retrieve indices
                        try:
                            idx2 = (h[i] / vv[1]).astype(np.uint32)
                            val.append(vv[0][idx2[:, 0], idx2[:, 1], idx2[:, 2]])
                        except IndexError as e:
                            print(
                                "  ... ERROR - Index error occured when trying extract scalar values for measure",
                                k,
                            )
                            print(
                                "  ... ERROR - Discard fiber with index ",
                                i,
                                "Exception: ",
                                e,
                            )

                    if len(val) > 0:
                        da = np.concatenate(val)

                        if k == "shore_rtop":
                            di[k + "_mean"] = da.astype(np.float64).mean()
                            di[k + "_std"] = da.astype(np.float64).std()
                            di[k + "_median"] = np.median(da.astype(np.float64))
                        else:
                            di[k + "_mean"] = da.mean().astype(np.float)
                            di[k + "_std"] = da.std().astype(np.float)
                            di[k + "_median"] = np.median(da).astype(np.float)

                        del da
                        del val

                G_out.add_edge(u, v)
                for key in di:
                    G_out[u][v][key] = di[key]

        del G

        print("  ************************************************")
        print("  >> Save structural connectome maps as :")
        # Get the edge attributes/keys/weights from the first edge and then break.
        # Change w.r.t networkx2
        edge_keys = []
        for u, v, d in G_out.edges(data=True):
            edge_keys = list(d.keys())
            break

        # Storing network/graph in TSV format (by default to be BIDS compliant)
        print("    - connectome_%s.tsv" % parkey)
        # Write header fields
        with open("connectome_%s.tsv" % parkey, "w") as out_file:
            tsv_writer = csv.writer(out_file, delimiter="\t")
            header = ["source", "target"]
            header = header + [key for key in edge_keys]
            tsv_writer.writerow(header)
        # Write list of graph edges with all connectivity metrics (edge_keys)
        with open("connectome_%s.tsv" % parkey, "ab") as out_file:
            nx.write_edgelist(
                G_out,
                out_file,
                comments="#",
                delimiter="\t",
                data=edge_keys,
                encoding="utf-8",
            )

        # Storing network/graph in other formats that might be prefered by the user
        if "gPickle" in output_types:
            print("    - connectome_%s.gpickle" % parkey)
            nx.write_gpickle(G_out, "connectome_%s.gpickle" % parkey)

        if "mat" in output_types:
            edge_struct = {}
            for edge_key in edge_keys:
                if edge_key != "fiblist":
                    edge_struct[edge_key] = nx.to_numpy_matrix(G_out, weight=edge_key)

            # nodes
            size_nodes = len(list(G_out.nodes(data=True)))

            # Get the node attributes/keys from the first node and then break.
            # Change w.r.t networkx2
            for u, d in G_out.nodes(data=True):
                node_keys = list(d.keys())
                break

            node_struct = {}
            for node_key in node_keys:
                if node_key == "dn_position":
                    node_arr = np.zeros([size_nodes, 3], dtype=np.float)
                else:
                    node_arr = np.zeros(size_nodes, dtype=np.object_)

                node_n = 0
                for _, node_data in G_out.nodes(data=True):
                    node_arr[node_n] = node_data[node_key]
                    node_n += 1
                node_struct[node_key] = node_arr
            print("    - connectome_%s.mat" % parkey)
            sio.savemat(
                "connectome_%s.mat" % parkey,
                long_field_names=True,
                mdict={"sc": edge_struct, "nodes": node_struct},
            )

        if "graphml" in output_types:
            g2 = nx.Graph()
            for u_gml, v_gml, d_gml in G_out.edges(data=True):
                g2.add_edge(u_gml, v_gml)
                for key in d_gml:
                    g2[u_gml][v_gml][key] = d_gml[key]
            for u_gml, d_gml in G_out.nodes(data=True):
                g2.add_node(u_gml)
                g2.nodes[u_gml]["dn_multiscaleID"] = d_gml["dn_multiscaleID"]
                g2.nodes[u_gml]["dn_fsname"] = d_gml["dn_fsname"]
                g2.nodes[u_gml]["dn_hemisphere"] = d_gml["dn_hemisphere"]
                g2.nodes[u_gml]["dn_name"] = d_gml["dn_name"]
                g2.nodes[u_gml]["dn_position_x"] = d_gml["dn_position"][0]
                g2.nodes[u_gml]["dn_position_y"] = d_gml["dn_position"][1]
                g2.nodes[u_gml]["dn_position_z"] = d_gml["dn_position"][2]
                g2.nodes[u_gml]["dn_region"] = d_gml["dn_region"]
            print("    - connectome_%s.graphml" % parkey)
            nx.write_graphml(g2, "connectome_%s.graphml" % parkey)

        # Storing final fiber length array
        fiberlabels_fname = "final_fiberslength_%s.npy" % str(parkey)
        np.save(fiberlabels_fname, final_fiberlength_array)

        # Storing all fiber labels (with orphans)
        fiberlabels_fname = "filtered_fiberslabel_%s.npy" % str(parkey)
        np.save(
            fiberlabels_fname,
            np.array(fiberlabels, dtype=np.int32),
        )

        # Storing final fiber labels (no orphans)
        fiberlabels_noorphans_fname = "final_fiberlabels_%s.npy" % str(parkey)
        np.save(fiberlabels_noorphans_fname, final_fiberlabels_array)

        if not streamline_wrote:
            print("  > Filtering tractography - keeping only no orphan fibers")
            finalfibers_fname = "streamline_final.trk"
            save_fibers(hdr, fib, finalfibers_fname, final_fibers_idx)

    print("Done.")
    print("========================")
Exemple #49
0
def save_graphml(G, filename='graph.graphml', folder=None, gephi=False):
    """
    Save graph as GraphML file to disk.

    Parameters
    ----------
    G : networkx multidigraph
    filename : string
        the name of the graphml file (including file extension)
    folder : string
        the folder to contain the file, if None, use default data folder
    gephi : bool
        if True, give each edge a unique key to work around Gephi's
        restrictive interpretation of the GraphML specification

    Returns
    -------
    None
    """

    start_time = time.time()
    if folder is None:
        folder = settings.data_folder

    # create a copy to convert all the node/edge attribute values to string
    G_save = G.copy()

    if gephi:

        gdf_nodes, gdf_edges = graph_to_gdfs(G_save,
                                             nodes=True,
                                             edges=True,
                                             node_geometry=True,
                                             fill_edge_geometry=True)

        # turn each edge's key into a unique ID for Gephi compatibility
        gdf_edges['key'] = range(len(gdf_edges))

        # gephi doesn't handle node attrs named x and y well, so rename
        gdf_nodes['xcoord'] = gdf_nodes['x']
        gdf_nodes['ycoord'] = gdf_nodes['y']
        G_save = gdfs_to_graph(gdf_nodes, gdf_edges)

        # remove graph attributes as Gephi only accepts node and edge attrs
        G_save.graph = {}

    else:
        # if not gephi, keep graph attrs and stringify them
        for dict_key in G_save.graph:
            # convert all the graph attribute values to strings
            G_save.graph[dict_key] = make_str(G_save.graph[dict_key])

    # stringify node and edge attributes
    for _, data in G_save.nodes(data=True):
        for dict_key in data:
            if gephi and dict_key in ['xcoord', 'ycoord']:
                # don't convert x y values to string if saving for gephi
                continue
            else:
                # convert all the node attribute values to strings
                data[dict_key] = make_str(data[dict_key])

    for _, _, data in G_save.edges(keys=False, data=True):
        for dict_key in data:
            # convert all the edge attribute values to strings
            data[dict_key] = make_str(data[dict_key])

    if not os.path.exists(folder):
        os.makedirs(folder)

    nx.write_graphml(G_save, os.path.join(folder, filename))
    log('Saved graph "{}" to disk as GraphML at "{}" in {:,.2f} seconds'.
        format(G_save.name, os.path.join(folder, filename),
               time.time() - start_time))
plt.ylim([-0.3, 1])
plt.xlabel("Spearman Correlation Coefficient")
plt.ylabel("Kendall $\\tau$ Correlation Coefficient")
plt.tight_layout()
plt.savefig("spearman_vs_tau.png")

# Create some example MSTs for us to draw
G_pearson = Graphs_pearson[0]
G_spearman = Graphs_spearman[0]
G_tau = Graphs_tau[0]

mst_pearson = nx.minimum_spanning_tree(correlation_to_distance(G_pearson))
mst_spearman = nx.minimum_spanning_tree(correlation_to_distance(G_spearman))
mst_tau = nx.minimum_spanning_tree(correlation_to_distance(G_tau))

nx.write_graphml(mst_pearson, "mst_pearson_0.graphml")
nx.write_graphml(mst_spearman, "mst_spearman_0.graphml")
nx.write_graphml(mst_tau, "mst_tau_0.graphml")

max_eig_df = pd.DataFrame()
max_eig_df['Pearson'] = pearson_largest_eig
max_eig_df['Spearman'] = spearman_largest_eig
max_eig_df['$\\tau$'] = tau_largest_eig
max_eig_df.index = dt
max_eig_df.plot()
plt.ylabel("$\lambda_{\max}$")
plt.tight_layout()
plt.savefig("max_eig.png")

edge_life_df = pd.DataFrame()
edge_life_df['Pearson'] = edges_life_pearson
Exemple #51
0
def convert_outputs(prefix, temporal_context):
    Path("flavors.json").write_text(
        json.dumps([*json.loads(Path("flavors.json").read_text()), prefix],
                   indent=4))
    label_key = 'name' if temporal_context else 'generic_name'
    if not Path(f'{prefix}.json').exists():
        generate_graph(grapfn=f'{prefix}.json',
                       keep_temporal_context=temporal_context)
    graph = json.loads(Path(f'{prefix}.json').read_text())
    if not Path(f'{prefix}_metrics.json').exists():
        Path(f'{prefix}_metrics.json').write_text(
            json.dumps(embed_metrics(graph), indent=2))
    metrics = json.loads(Path(f'{prefix}_metrics.json').read_text())
    if not Path(f'{prefix}_metrics_distances.json').exists():
        Path(f'{prefix}_metrics_distances.json').write_text(
            json.dumps(embed_metrics_distance(graph, metrics)))
    # to_networkx
    g = networkx.DiGraph()
    g.add_nodes_from([node[label_key] for node in graph.values()])
    g.add_edges_from([(node_source[label_key], graph[target][label_key])
                      for node_source in graph.values()
                      for target in node_source['mention_freq'].keys()])
    networkx.write_graphml(g, f'{prefix}_unweighted.graphml')
    for src in graph.values():
        srcnm = src[label_key]
        for tgt, w in src['mention_freq'].items():
            tgtnm = graph[tgt][label_key]
            g[srcnm][tgtnm]['weight'] = w
    networkx.write_graphml(g, f'{prefix}_weighted.graphml')
    g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_unweighted.graphml'))
    g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_weighted.graphml'))
    # to_sqlite
    if Path(f'{prefix}.db').exists():
        Path(f'{prefix}.db').unlink()
    sqldb = sqlite3.connect(f'{prefix}.db')
    cur = sqldb.cursor()
    cur.execute('''CREATE TABLE node (
        name VARCHAR(255),
        generic_name VARCHAR(255),
        type VARCHAR(255),
        doc_id VARCHAR(255),
        monitored bool,
        pub_date VARCHAR(255),
        in_force bool)''')
    cur.execute('''CREATE TABLE edge (
        node_src INTEGER,
        node_dst INTEGER,
        mentions INTEGER,
        FOREIGN KEY(node_src) REFERENCES node(rowid) ON UPDATE CASCADE ON DELETE CASCADE,
        FOREIGN KEY(node_dst) REFERENCES node(rowid) ON UPDATE CASCADE ON DELETE CASCADE)'''
                )
    cur.execute(f'''CREATE VIEW nodes AS
        SELECT
            rowid as id,
            {label_key} as label
        FROM node''')
    cur.execute('''CREATE VIEW edges AS
        SELECT
            rowid as id,
            node_src as source,
            node_dst as target,
            mentions as weight
        FROM edge''')
    node_name_to_id = dict()
    for node in graph.values():
        cur.execute(
            '''INSERT INTO node(
                name,
                generic_name,
                type,
                doc_id,
                monitored,
                pub_date,
                in_force
            ) VALUES(?,?,?,?,?,?,?)''',
            (node['name'], node['generic_name'], node['type'], node['doc_id'],
             node['monitored'], node['pub_date'], node['in_force']))
        node_name_to_id[node['name']] = cur.lastrowid
    for node in graph.values():
        node_src_nm = node['name']
        node_src = node_name_to_id[node_src_nm]
        for node_dst_nm, frequency in node['mention_freq'].items():
            node_dst = node_name_to_id[node_dst_nm]
            cur.execute(
                '''INSERT INTO edge(node_src,node_dst,mentions) VALUES(?,?,?)''',
                (node_src, node_dst, frequency))
    cur.close()
    sqldb.commit()
    Path(f'{prefix}.sql').write_text('\n'.join(sqldb.iterdump()))
    sqldb.close()
    # to_csv
    with open(f'{prefix}.csv', 'w') as file:
        file.write('%s,%s,%s\n' % ("source", "target", "weight"))
        for node in graph.values():
            node_src_nm = node['name']
            for node_dst_nm, frequency in node['mention_freq'].items():
                file.write('%s,%s,%d\n' %
                           (graph[node_src_nm][label_key],
                            graph[node_dst_nm][label_key], frequency))
    # to_graphviz
    gv = graphviz.Digraph()
    for node in graph.values():
        gv.node(str(node_name_to_id[node['name']]),
                label='\n'.join(
                    list(
                        map(
                            str,
                            filter(lambda a: a is not None, [
                                node['type'], node['doc_id'], node['pub_date']
                            ])))))
    for node in graph.values():
        node_src_nm = node['name']
        node_src = node_name_to_id[node_src_nm]
        for node_dst_nm, frequency in node['mention_freq'].items():
            node_dst = node_name_to_id[node_dst_nm]
            gv.edge(str(node_src), str(node_dst), str(frequency))
    gv.save(f'{prefix}.gv')  # takes "forever" to render, "never" finishes
    # connectivity
    g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_unweighted.graphml'))
    if not Path(f'{prefix}_metrics_connectivity.json').exists():
        Path(f'{prefix}_metrics_connectivity.json').write_text(
            json.dumps(embed_metrics_connectivity(graph, metrics, g,
                                                  label_key),
                       indent=2))
    # matplotlib rendering
    if not Path(f'{prefix}_unweighted.pdf').exists() or not Path(
            f'{prefix}_unweighted.png').exists():
        g = networkx.DiGraph(
            networkx.read_graphml(f'{prefix}_unweighted.graphml'))
        networkx.draw(g)
        plt.savefig(f'{prefix}_unweighted.pdf')
        plt.savefig(f'{prefix}_unweighted.png')
        plt.close()
    if not Path(f'{prefix}_weighted.pdf').exists() or not Path(
            f'{prefix}_weighted.png').exists():
        g = networkx.DiGraph(
            networkx.read_graphml(f'{prefix}_weighted.graphml'))
        networkx.draw(g)
        plt.savefig(f'{prefix}_weighted.pdf')
        plt.savefig(f'{prefix}_weighted.png')
        plt.close()
    # Leave root document explicit
    if not Path(f'{prefix}_root.json').exists():
        Path(f'{prefix}_root.json').write_text(
            json.dumps(graph[find_rootdoc()['name']]))
    # Plot quadrants
    for weight in [True, False]:
        desc = ('un' * int(not weight)) + 'weighted'
        if not Path(f'{prefix}_quads_{desc}.pdf').exists() or not Path(
                f'{prefix}_quads_{desc}.png').exists():
            key = 'weight' if weight else 'degree'
            dimen_cutoff = draw_degree_quadrants(graph, metrics['degree'], key)
            plt.savefig(f'{prefix}_quads_{desc}.pdf', bbox_inches='tight')
            plt.savefig(f'{prefix}_quads_{desc}.png', bbox_inches='tight')
            Path(f'{prefix}_quads_{desc}.json').write_text(
                json.dumps(dimen_cutoff, indent=4))
    for weight in [True, False]:
        desc = ('un' * int(not weight)) + 'weighted'
        if True or not Path(f'{prefix}_quads_{desc}.csv').exists():
            key = 'weight' if weight else 'degree'
            dimen_cutoff = json.loads(
                Path(f'{prefix}_quads_{desc}.json').read_text())
            with open(f'{prefix}_quads_{desc}.csv', 'w') as file:
                fmt = ','.join(['%s'] * (4 + int(weight))) + '\n'
                hr = (dimen_cutoff['halfrange']['x'],
                      dimen_cutoff['halfrange']['y'])
                file.write(fmt %
                           ("source", "target", *(["weight"] * int(weight)),
                            "source_color", "target_color"))
                for node in graph.values():
                    node_src_nm = node['name']
                    src_metric = metrics['degree'][node_src_nm]
                    for node_dst_nm, frequency in node['mention_freq'].items():
                        dst_metric = metrics['degree'][node_dst_nm]
                        file.write(fmt % (
                            graph[node_src_nm][label_key],
                            graph[node_dst_nm][label_key],
                            *([frequency] * int(weight)),
                            QUADRANT_COLOR[get_quadrant(
                                src_metric[f'{key}_in'],
                                src_metric[f'{key}_out'], *hr) - 1],
                            QUADRANT_COLOR[get_quadrant(
                                dst_metric[f'{key}_in'],
                                dst_metric[f'{key}_out'], *hr) - 1],
                        ))
            with open(f'{prefix}_quads_{desc}_nodst3rdquad.csv', 'w') as file:
                fmt = ','.join(['%s'] * (4 + int(weight))) + '\n'
                hr = (dimen_cutoff['halfrange']['x'],
                      dimen_cutoff['halfrange']['y'])
                file.write(fmt %
                           ("source", "target", *(["weight"] * int(weight)),
                            "source_color", "target_color"))
                for node in graph.values():
                    node_src_nm = node['name']
                    src_metric = metrics['degree'][node_src_nm]
                    for node_dst_nm, frequency in node['mention_freq'].items():
                        dst_metric = metrics['degree'][node_dst_nm]
                        if get_quadrant(dst_metric[f'{key}_in'],
                                        dst_metric[f'{key}_out'], *hr) == 3:
                            continue
                        file.write(fmt % (
                            graph[node_src_nm][label_key],
                            graph[node_dst_nm][label_key],
                            *([frequency] * int(weight)),
                            QUADRANT_COLOR[get_quadrant(
                                src_metric[f'{key}_in'],
                                src_metric[f'{key}_out'], *hr) - 1],
                            QUADRANT_COLOR[get_quadrant(
                                dst_metric[f'{key}_in'],
                                dst_metric[f'{key}_out'], *hr) - 1],
                        ))
            with open(f'{prefix}_quads_{desc}_nosrc3rdquad.csv', 'w') as file:
                fmt = ','.join(['%s'] * (4 + int(weight))) + '\n'
                hr = (dimen_cutoff['halfrange']['x'],
                      dimen_cutoff['halfrange']['y'])
                file.write(fmt %
                           ("source", "target", *(["weight"] * int(weight)),
                            "source_color", "target_color"))
                for node in graph.values():
                    node_src_nm = node['name']
                    src_metric = metrics['degree'][node_src_nm]
                    if get_quadrant(src_metric[f'{key}_in'],
                                    src_metric[f'{key}_out'], *hr) == 3:
                        continue
                    for node_dst_nm, frequency in node['mention_freq'].items():
                        dst_metric = metrics['degree'][node_dst_nm]
                        file.write(fmt % (
                            graph[node_src_nm][label_key],
                            graph[node_dst_nm][label_key],
                            *([frequency] * int(weight)),
                            QUADRANT_COLOR[get_quadrant(
                                src_metric[f'{key}_in'],
                                src_metric[f'{key}_out'], *hr) - 1],
                            QUADRANT_COLOR[get_quadrant(
                                dst_metric[f'{key}_in'],
                                dst_metric[f'{key}_out'], *hr) - 1],
                        ))
            with open(f'{prefix}_quads_{desc}_no3rdquad.csv', 'w') as file:
                fmt = ','.join(['%s'] * (4 + int(weight))) + '\n'
                hr = (dimen_cutoff['halfrange']['x'],
                      dimen_cutoff['halfrange']['y'])
                file.write(fmt %
                           ("source", "target", *(["weight"] * int(weight)),
                            "source_color", "target_color"))
                for node in graph.values():
                    node_src_nm = node['name']
                    src_metric = metrics['degree'][node_src_nm]
                    if get_quadrant(src_metric[f'{key}_in'],
                                    src_metric[f'{key}_out'], *hr) == 3:
                        continue
                    for node_dst_nm, frequency in node['mention_freq'].items():
                        dst_metric = metrics['degree'][node_dst_nm]
                        if get_quadrant(dst_metric[f'{key}_in'],
                                        dst_metric[f'{key}_out'], *hr) == 3:
                            continue
                        file.write(fmt % (
                            graph[node_src_nm][label_key],
                            graph[node_dst_nm][label_key],
                            *([frequency] * int(weight)),
                            QUADRANT_COLOR[get_quadrant(
                                src_metric[f'{key}_in'],
                                src_metric[f'{key}_out'], *hr) - 1],
                            QUADRANT_COLOR[get_quadrant(
                                dst_metric[f'{key}_in'],
                                dst_metric[f'{key}_out'], *hr) - 1],
                        ))
    if True:
        folder_out = Path(f'{prefix}_quads_unweighted_no2nd3rdquad')
        folder_out.mkdir(parents=True, exist_ok=True)
        for node in graph.values():
            node_src_nm = node['name']
            src_metric = metrics['degree'][node_src_nm]
            if get_quadrant(src_metric[f'{key}_in'], src_metric[f'{key}_out'],
                            *hr) in [2, 3]:
                continue
            with folder_out.joinpath(f'{node["generic_name"]}.csv').open(
                    'w') as file:
                fmt = ','.join(['%s'] * 5) + '\n'
                hr = (dimen_cutoff['halfrange']['x'],
                      dimen_cutoff['halfrange']['y'])
                file.write(fmt % ("source", "target", "source_color",
                                  "target_color", "similarity"))
                srcWC = None
                srcCacheKey = graph[node_src_nm]['filepath'][6:]
                if len(srcCacheKey) > 0:
                    srcDoc = PlainCachedDocument(srcCacheKey, None).parse(' ')
                    srcWC = WordCounter(srcDoc)
                for node_dst_nm, frequency in node['mention_freq'].items():
                    dst_metric = metrics['degree'][node_dst_nm]
                    # if get_quadrant(dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) == 3:
                    #     continue
                    similarity = '?'
                    dstCacheKey = graph[node_dst_nm]['filepath'][6:]
                    if len(dstCacheKey) > 0:
                        dstDoc = PlainCachedDocument(dstCacheKey,
                                                     None).parse(' ')
                        dstWC = WordCounter(dstDoc)
                        if srcWC is not None:
                            similarity = srcWC.vectorSimilarity(dstWC)
                            similarity = str(similarity[0][0])
                    file.write(fmt % (
                        graph[node_src_nm][label_key],
                        graph[node_dst_nm][label_key],
                        QUADRANT_COLOR[
                            get_quadrant(src_metric[f'{key}_in'],
                                         src_metric[f'{key}_out'], *hr) - 1],
                        QUADRANT_COLOR[
                            get_quadrant(dst_metric[f'{key}_in'],
                                         dst_metric[f'{key}_out'], *hr) - 1],
                        similarity,
                    ))
    if True or not Path(f'{prefix}_pagerank.json').exists():
        g = networkx.DiGraph(
            networkx.read_graphml(f'{prefix}_unweighted.graphml'))
        pr = networkx.pagerank(g)
        Path(f'{prefix}_pagerank.json').write_text(json.dumps(pr, indent=2))
        spr = sorted([(k, v) for k, v in pr.items()],
                     key=lambda a: (-a[1], a[0]))
        Path(f'{prefix}_pagerank_ranked.json').write_text(
            json.dumps(spr, indent=2))
        # dirLink = {k: set(v['mention_freq'].keys()) for k, v in graph.items()}
        revLink = {graph[k][label_key]: set() for k in graph.keys()}
        for ks, v in graph.items():
            ks = graph[ks][label_key]
            for kd in v['mention_freq'].keys():
                kd = graph[kd][label_key]
                revLink[kd].add(ks)
        sptr = {spr[0][0]: spr[0][0]}
        for node, rank in spr[1:]:
            maxNode = sorted([x for x in revLink[node] if x != node],
                             key=lambda a: -pr[a])[0]
            sptr[node] = maxNode
        Path(f'{prefix}_pagerank_ranked_spannedtree.json').write_text(
            json.dumps(sptr, indent=2))
        table = ["source,target,source_weight,target_weight"]
        for ns, nd in sptr.items():
            ws = "%.32f" % pr[ns]
            wd = "%.32f" % pr[nd]
            table.append(f"{ns},{nd},{ws},{wd}")
        Path(f'{prefix}_pagerank_ranked_spannedtree.csv').write_text(
            '\n'.join(table) + '\n')
Exemple #52
0
def get_subgraph(V,E,label_filepath,dataset_name,level=1,subgraph_count=5,ignore_deg=None,root_node=None):
    """
    # total_points: total number of data points
    # feature_dm: number of features per datapoint
    # number_of_labels: total number of labels
    # X: feature matrix of dimension total_points * feature_dm
    # Y: list of size total_points. Each element of the list containing labels corresponding to one datapoint
    # V: list of all labels (nodes)
    # E: dict of edge tuple -> weight, eg. {(1, 4): 1, (2, 7): 3}
    """

    # get a dict of label -> textual_label
    label_dict = get_label_dict(label_filepath)

    # an utility function to relabel nodes of upcoming graph with textual label names
    def mapping(v):
        """
        An utility function to relabel nodes of upcoming graph with textual label names
        :param v: label id (int)
        :return: returns the texual label of the node id [v]
        """
        v = int(v)
        if v in label_dict:
            return label_dict[v]
        return str(v)

    # build a unweighted graph of all edges
    g = nx.Graph()
    g.add_edges_from(E.keys())

    # Below section will try to build a smaller subgraph from the actual graph for visualization
    subgraph_lists = []
    for sg in range(subgraph_count):
        if root_node is None:
            # select a random vertex to be the root
            np.random.shuffle(V)
            v = V[0]
        else:
            v = root_node

        # two files to write the graph and label information
        # Remove characters like \, /, <, >, :, *, |, ", ? from file names,
        # windows can not have file name with these characters
        label_info_filepath = 'samples/'+str(dataset_name)+'_Info[{}].txt'.format(str(int(v)) + '-' + remove_special_chars(mapping(v)))
        label_graph_filepath = 'samples/'+str(dataset_name)+'_G[{}].graphml'.format(str(int(v)) + '-' + remove_special_chars(mapping(v)))
        # label_graph_el = 'samples/'+str(dataset_name)+'_E[{}].el'.format(str(int(v)) + '-' + mapping(v)).replace(' ','_')

        print('Label:[' + mapping(v) + ']')
        label_info_file = open(label_info_filepath,'w')
        label_info_file.write('Label:[' + mapping(v) + ']' + "\n")

        # build the subgraph using bfs
        bfs_q = Queue()
        bfs_q.put(v)
        bfs_q.put(0)
        node_check = {}
        ignored = []

        sub_g = nx.Graph()
        lvl = 0
        while not bfs_q.empty() and lvl <= level:
            v = bfs_q.get()
            if v == 0:
                lvl += 1
                bfs_q.put(0)
                continue
            elif node_check.get(v,True):
                node_check[v] = False
                edges = list(g.edges(v))
                # label_info_file.write('\nNumber of edges: ' + str(len(edges)) + ' for node: ' + mapping(v) + '[' +
                # str(v) + ']' + '\n')
                if ignore_deg is not None and len(edges) > ignore_deg:
                    # label_info_file.write('Ignoring: [' + mapping(v) + '] \t\t\t degree: [' + str(len(edges)) + ']\n')
                    ignored.append("Ignoring: deg [" + mapping(v) + "] = [" + str(len(edges)) + "]\n")
                    continue
                for uv_tuple in edges:
                    edge = tuple(sorted(uv_tuple))
                    sub_g.add_edge(edge[0],edge[1],weight=E[edge])
                    bfs_q.put(uv_tuple[1])
            else:
                continue

        # relabel the nodes to reflect textual label
        nx.relabel_nodes(sub_g,mapping,copy=False)
        print('sub_g:',sub_g)

        label_info_file.write(str('\n'))
        # Writing some statistics about the subgraph
        label_info_file.write(str(nx.info(sub_g)) + '\n')
        label_info_file.write('density: ' + str(nx.density(sub_g)) + '\n')
        label_info_file.write('list of the frequency of each degree value [degree_histogram]: ' +
                              str(nx.degree_histogram(sub_g)) + '\n')
        for nodes in ignored:
            label_info_file.write(str(nodes) + '\n')
        # TODO: Add other statistics for better understanding of the subgraph.
        # subg_edgelist = nx.generate_edgelist(sub_g,label_graph_el)
        label_info_file.close()
        nx.write_graphml(sub_g,label_graph_filepath)

        subgraph_lists.append(sub_g)

        print('Graph generated at: ' + label_graph_filepath)

        if root_node:
            print("Root node provided, will generate only one graph file.")
            break

    return subgraph_lists
Exemple #53
0
servers = "server1:30080"

G = nx.DiGraph()

G.add_node("start", serverport="30080", peers=servers)
G.add_node("transfer1", type="get", protocol="tcp", size="5 KiB")
G.add_node("transfer2", type="get", protocol="tcp", size="6 KiB")
G.add_node("transfer3", type="get", protocol="tcp", size="7 KiB")
G.add_node("transfer4", type="get", protocol="tcp", size="5 KiB")
G.add_node("transfer5", type="get", protocol="tcp", size="6 KiB")
G.add_node("transfer6", type="get", protocol="tcp", size="7 KiB")
G.add_node("transfer7", type="get", protocol="tcp", size="5 KiB")
G.add_node("transfer8", type="get", protocol="tcp",
           size="8 KiB")  # random noise
G.add_node("transfer9", type="get", protocol="tcp", size="7 KiB")
# etc... for the entire stream, potentially hundreds of these

G.add_edge("start", "transfer1")
G.add_edge("transfer1", "transfer2")
G.add_edge("transfer2", "transfer3")
G.add_edge("transfer3", "transfer4")
G.add_edge("transfer4", "transfer5")
G.add_edge("transfer5", "transfer6")
G.add_edge("transfer6", "transfer7")
G.add_edge("transfer7", "transfer8")
G.add_edge("transfer8", "transfer9")
G.add_edge("transfer9", "start")

nx.write_graphml(G, "tgen.client.graphml.xml")
Exemple #54
0
def addEntity(graph, s, type, stock):
    typeEntity = F'{type}'
    stockEntity =  stock
    graph.add_node(s, type = typeEntity, stock = stockEntity)

def addRoad(graph, s, e, cap, gas, tax):
    start = f'{s}'
    end = f'{e}'
    RoadCap = cap
    RoadGas = gas
    RoadTax = tax
    graph.add_edge(start, end, capacity = RoadCap, Gas = RoadGas, Tax = RoadTax)

#############################  MAIN RUNNING TEST READER  #############################

path = Path(__file__)
newpath = path.parent.parent.resolve()
dataDir = newpath / 'data'
InstancePath = dataDir / 'truck_instance_less_customers.data'

file_path = InstancePath

graph, entete = extract_graph(file_path)
nx.draw(graph)
# decomenter la ligne ci dessous pour afficher le graph avec matplotlib
#plt.show()
nx.write_graphml(graph, 'projet_RO_LAGNIAUX_JEAN_DENES_THEO\output_files\graphDenesLagniaux.graphml')
print()
print(colored('le graph a été créer et on peut le trouve dans le fichier => output_files', 'red'))
print()
Exemple #55
0
import networkx as nx

f = open('raw/uscn_co_filtered.txt')
# edges = list()
graph = nx.Graph()

for i, line in enumerate(f.readlines()):
    line = line.strip()
    line = line.split(',')
    graph.add_node(i, labelV='US', author_id=line[1])
    graph.add_node(i + 1000000, labelV='CN', author_id=line[2])
    graph.add_edge(i, i + 1000000, labelE='Cooperates')
    # edges.append((line[1], line[2]))

# graph.add_edges_from(edges)
print(graph.number_of_nodes(), graph.number_of_edges())

nx.write_graphml(graph, "planb.xml")
Exemple #56
0
#%% Save

out_graphs = []
[out_graphs.append(i) for i in nx_graphs_raw.values()]
[print(i) for i in nx_graphs_raw.keys()]
save_names = ["Gaa", "Gad", "Gda", "Gdd"]
[out_graphs.append(i) for i in nx_graphs_norm.values()]
[print(i) for i in nx_graphs_norm.keys()]
save_names += ["Gaan", "Gdan", "Gadn", "Gddn"]
out_graphs.append(nx_all_raw)
save_names.append("G")
out_graphs.append(nx_all_norm)
save_names.append("Gn")

for name, graph in zip(save_names, out_graphs):
    nx.write_graphml(graph, output_path / (name + ".graphml"))

meta_data_df.to_csv(output_path / "meta_data.csv")

#%% verify things are right
print("\n\n\n\nChecking graphs are the same when saved")
for name, graph_wrote in zip(save_names, out_graphs):
    print(name)
    graph_read = nx.read_graphml(output_path / (name + ".graphml"))
    adj_read = nx.to_numpy_array(graph_read)
    adj_wrote = nx.to_numpy_array(graph_wrote)
    print(np.array_equal(adj_read, adj_wrote))
    graph_loader = load_networkx(name, version=data_date_graphs)
    adj_loader = nx.to_numpy_array(graph_loader)
    print(np.array_equal(adj_wrote, adj_loader))
    print()
Exemple #57
0
fc3 = prep.get_fc(path,3)

df3 = prep.get_dataframe(fc3)
df3 = prep.calcIds(df3,CONFIDENCE)
df0 = prep.get_dataframe(fc0)
df0 = prep.calcIds(df0,CONFIDENCE)

df2 = prep.get_dataframe(fc2)
df2 = prep.calcIds(df2,CONFIDENCE)
df1 = prep.get_dataframe(fc1)
df1 = prep.calcIds(df1,CONFIDENCE)

df0.xpos = df0.xpos + xmax
df1.xpos = df1.xpos + xmax

side0 = pd.concat([df3, df0])
side1 = pd.concat([df2, df1])

close1 = prep.get_close_bees(side0, DISTANCE)
close2 = prep.get_close_bees(side1, DISTANCE)

close = pd.concat([close1,close2])

p = prep.bee_pairs_to_timeseries(close)

i = prep.extract_interactions(p,LENGTH)

G = prep.create_graph2(i)

nx.write_graphml(G, filename + ".graphml")
Exemple #58
0
def saveToGraphml(graph, filename, **kwargs):
    nx.write_graphml(graph, filename, **kwargs)
Exemple #59
0
 def gera_graphml(self, path):
     nx.write_graphml(self.G, path)
Exemple #60
0
	heap = []
	for i in range(0, points.shape[0]):
		for j in range(0, points.shape[0]):
			if i != j:
				# Calculate ratio = d/SP
				# Sort ratio in a nondecreasing order
				ratio = direct_matrix[i,j]/SP_matrix[i][j]
				if ratio < 1.0:
					hq.heappush(heap, (ratio, i, j))
	# Pick the first node pair on the list, connect them with an edge
	min_ratio, x, y = hq.heappop(heap)
	print 'min_ratio', min_ratio
	# Stop when min_ratio >= threshold
	if min_ratio >= threshold:
		break
	# Update the graph
	path_graph.add_edge(x, y, weight = float(direct_matrix[x,y]))
	threshold_graph.add_edge(x, y, weight = direct_matrix[x,y])
	

# plt.clf()
plt.plot(points[:,0], points[:,1], 'o')
#change the first point to another shape
plt.plot(points[0,0], points[0,1], 'D')
nx.draw_networkx_edges(threshold_graph, pos = pos_dict, width=3, edge_color='b')
plt.savefig("threshold_"+ name + "_" + str(threshold) + ".png")

nx.write_graphml(path_graph, "threshold_"+ name + "_" + str(threshold) + ".graphml")

SP_matrix = nx.shortest_path_length(path_graph, weight = "weight")