예제 #1
1
파일: GML.py 프로젝트: hklarner/TomClass
def copy_layout(from_fname, to_fname):
    if not from_fname[-4:]  =='.gml': from_name +='.gml'
    if not to_fname[-4:]    =='.gml': to_name   +='.gml'

    print 'reading A=', from_fname,'..',
    g1 =  NX.read_gml(from_fname)
    labels1 = NX.get_node_attributes(g1, 'label')
    n1 = set(labels1.values())
    print len(n1),'nodes'
    
    print 'reading B=', to_fname,'..',
    g2 =    NX.read_gml(to_fname)
    labels2 = NX.get_node_attributes(g2, 'label')
    n2 = set(labels2.values())
    print len(n2),'nodes'

    intersection = len(n2.intersection(n1))
    percent=100.*intersection/len(n2)
    print 'B.intersect(A)=',intersection,'(%.1f%%)'%percent

    print 'copying layout..',
    mapping = {}
    for L1 in labels1:
        for L2 in labels2:
            if labels1[L1]==labels2[L2]:
                mapping[L1] = L2
                break

    layout = NX.get_node_attributes(g1, 'graphics')
    attr = dict([  (  mapping[ID],  {'x':layout[ID]['x'],'y':layout[ID]['y']}  )   for ID in mapping])
    
    NX.set_node_attributes(g2, 'graphics', attr)
    NX.write_gml(g2, to_fname)
    print 'done.'
def test_modularity_measure(function):

    def print_info(graph, name):
        print name, "N:", len(graph), "M:", graph.size()
        print "Q:", round(function(graph)[0], 3)

    graph = nx.read_gml(archive.extractfile("networks/karate-newman-1977.gml"))
    print_info(graph, "Karate")
    graph = nx.read_gml(archive.extractfile("networks/yeast_protein_interaction-barabasi-2001.tsv"))
    print_info(graph, "Protein Interaction")
    graph = pp.read_pajek(zipfile.ZipFile("networks/jazz.zip",
        "r").open("jazz.net"))
    print_info(graph, "Jazz musicians")
    graph = pp.read_pajek(zipfile.ZipFile("networks/celegans_metabolic.zip",
        "r").open("celegans_metabolic.net"))
    print_info(graph, "Metabolic")
    graph = nx.read_edgelist(zipfile.ZipFile("networks/email.zip",
        "r").open("email.txt"), data=False)
    print_info(graph, "E-mail")
    graph = pp.read_pajek(zipfile.ZipFile("networks/PGP.zip",
        "r").open("PGPgiantcompo.net"))
    print_info(graph, "Key signing")
    graph = nx.read_gml(zipfile.ZipFile("networks/cond-mat-2003.zip",
        "r").open("cond-mat-2003.gml"))
    print_info(graph, "Physicists")
예제 #3
0
파일: f2.py 프로젝트: Zhengzi/maple
def load_3(gml1,gml2,name):
	g1 = nx.read_gml(gml1)			
	g2 = nx.read_gml(gml2)
	
	q1 = qucik_hash(g1)
	q2 = qucik_hash(g2)
	
	if not q1:
		return 0
		
	if not q2:
		return 0
	
	v1 = q1[1]
	v2 = q2[1]
	s1 = q1[0]
	s2 = q2[0]
	
	if v1 == v2:
		#print "skip"
		return 0
	#print s1	
	#print s2
	
	to_write = []

	to_write.append(name)
	
	with open("result_openssl.txt", "a") as myfile:
		for item in to_write:
			myfile.write(item)
			myfile.write("\n")
	return 1
예제 #4
0
    def generateGraph(self, ticket, bnglContents, graphtype):
        print ticket
        pointer = tempfile.mkstemp(suffix='.bngl', text=True)
        with open(pointer[1], 'w') as f:
            f.write(bnglContents)
        try:
            if graphtype in ['regulatory', 'contactmap']:
                consoleCommands.setBngExecutable(bngDistro)
                consoleCommands.generateGraph(pointer[1], graphtype)
                name = pointer[1].split('.')[0].split('/')[-1]
                with open('{0}_{1}.gml'.format(name, graphtype), 'r') as f:
                    graphContent = f.read()

                gml = networkx.read_gml('{0}_{1}.gml'.format(name, graphtype))
                result = gml2cyjson(gml, graphtype=graphtype)
                jsonStr = json.dumps(result, indent=1, separators=(',', ': '))

                result = {'jsonStr': jsonStr, 'gmlStr': graphContent}
                self.addToDict(ticket, result)
                os.remove('{0}_{1}.gml'.format(name, graphtype))
                print 'success', ticket

            elif graphtype in ['sbgn_er']:
                consoleCommands.setBngExecutable(bngDistro)
                consoleCommands.generateGraph(pointer[1], 'contactmap')
                name = pointer[1].split('.')[0].split('/')[-1]
                # with open('{0}_{1}.gml'.format(name,'contactmap'),'r') as f:
                #   graphContent = f.read()
                graphContent = networkx.read_gml(
                    '{0}_{1}.gml'.format(name, 'contactmap'))
                sbgn = libsbgn.createSBNG_ER_gml(graphContent)
                self.addToDict(ticket, sbgn)
                os.remove('{0}_{1}.gml'.format(name, 'contactmap'))
                print 'success', ticket
            elif graphtype in ['std']:
                consoleCommands.setBngExecutable(bngDistro)
                consoleCommands.bngl2xml(pointer[1])
                xmlFileName = pointer[1].split('.')[0] + '.xml'
                xmlFileName = xmlFileName.split(os.sep)[-1]

                graph = stdgraph.generateSTDGML(xmlFileName)
                gmlGraph = networkx.generate_gml(graph)


                #os.remove('{0}.gml'.format(xmlFileName))
                result = gml2cyjson(graph, graphtype=graphtype)
                jsonStr = json.dumps(result, indent=1, separators=(',', ': '))

                result = {'jsonStr': jsonStr, 'gmlStr': ''.join(gmlGraph)}

                #self.addToDict(ticket, ''.join(gmlGraph))
                self.addToDict(ticket, result)
                print 'success', ticket
        except:
            import traceback
            traceback.print_exc()
            self.addToDict(ticket,-5)
            print 'failure',ticket
        finally:
            task.deferLater(reactor, 600,  freeQueue, ticket)
예제 #5
0
def ato_write_gml(graph, fileName, labelGraphics):
    def writeDict(gml, key, label, contents, space, labelGraphics=None):
        gml.write('{1}{0} [\n'.format(key, space))
        for subKey in contents:
            if type(contents[subKey]) in [str]:
                gml.write('{2}\t{0} "{1}"\n'.format(subKey, contents[subKey], space))
            elif type(contents[subKey]) in [int]:
                gml.write('{2}\t{0} {1}\n'.format(subKey, contents[subKey], space))
            elif type(contents[subKey]) in [dict]:
                writeDict(gml, subKey, subKey, contents[subKey], space + '\t')
            if labelGraphics and label in labelGraphics:
                for labelInstance in labelGraphics[label]:
                    writeDict(gml, 'LabelGraphics', 'LabelGraphics', labelInstance, space + '\t')
        gml.write('{0}]\n'.format(space))

    gml = StringIO.StringIO()
    gml.write('graph [\n')
    gml.write('\tdirected 1\n')
    for node in graph.node:

        writeDict(gml, 'node', node, graph.node[node], '\t', labelGraphics)

    flag = False
    for x in nx.generate_gml(graph):
        if 'edge' in x and not flag:
            flag = True
        if flag:
            gml.write(x + '\n')
            

    #gml.write(']\n')
    with open(fileName, 'w') as f:
        f.write(gml.getvalue())
    nx.read_gml(fileName)
	def __init__(self, ppi1filename, ppi2filename, matchfilename, outmatchfilename):
		self.ppi1 = nx.read_gml(ppi1filename, relabel = True)
		self.ppi2 = nx.read_gml(ppi2filename, relabel = True)
		in_to_node1 = {}
		in_to_node2 = {}
		for n1 in self.ppi1.nodes():
			in_to_node1[self.ppi1.node[n1]['index']] = n1
		for n2 in self.ppi2.nodes():
			in_to_node2[self.ppi2.node[n2]['index']] = n2

		matchfile = open(matchfilename)
		outmatchfile = open(outmatchfilename, "w")
		self.I = []
		for line in matchfile:
			if line[0] == "!":
				outmatchfile.write(line)
				continue
			cols = line.split()
			outmatchfile.write(in_to_node1[int(cols[0])] + " ")
			outmatchfile.write(in_to_node2[int(cols[1])] + "\n")
			self.I.append([in_to_node1[int(cols[0])], in_to_node2[int(cols[1])]])

		#check if it's a legal match
		for (i, j) in itertools.product(self.I, self.I):
			if i!= j:
				if (i[0] == j[0]) or (i[1] == j[1]):
					print "not a legal match: ", (i,j)
					return
		print "Legal match" 


		matchfile.close()
		outmatchfile.close()
예제 #7
0
파일: a3.py 프로젝트: Zhengzi/maple
def main():
	#extract_intra_function_cfg("C:\\Users\\Xu Zhengzi\\Desktop\\oh\\")
	cfg1 = nx.read_gml("C:\\Users\\Xu Zhengzi\\Desktop\\og\\dtls1_reassemble_fragment.gml")
	cfg2 = nx.read_gml("C:\\Users\\Xu Zhengzi\\Desktop\\oh\\dtls1_reassemble_fragment.gml")
	nodes1 = ['0x80c0b14', '0x80c0b9a', '0x80c0c3c', '0x80c0c57', '0x80c0c5d', '0x80c0c8c', '0x80c0ccc', '0x80c0d0a', '0x80c0d2c', '0x80c0e83', '0x80c0fb4', '0x80c0eb6', '0x80c0f53', '0x80c0b97', '0x80c0d88', '0x80c0de1', '0x80c0db5', '0x80c0fac', '0x80c0f73', '0x80c0dd9']
	extract_trace(cfg1, 3, nodes1)

	print "Finish"
예제 #8
0
def main():
    favorites_graph = nx.read_gml(FAVORITES_GML_OUTPUT_PATH)
    output_metrics(favorites_graph)
    favorites_graph = None

    comments_graph = nx.read_gml(COMMENTS_GML_OUTPUT_PATH)
    output_metrics(comments_graph)
    comments_graph = None
예제 #9
0
def read_graph(path):
    if path.endswith('.txt'):
        A = np.loadtxt(path)
        
        G = nx.from_numpy_matrix(A)
        return G

    if path.endswith('.gml'):
        A = nx.read_gml(path, 'label')
        return nx.read_gml(path, 'label')
예제 #10
0
def dolphins():
    """ Loads the dolphin social graph
    """
    try:
        gml_graph = nx.read_gml(DATA_PATH_1 + DOLPHINS)
    except:
        gml_graph = nx.read_gml(DATA_PATH_2 + DOLPHINS)
        
    dgraph = nx.Graph()
    dgraph.add_nodes_from(gml_graph.nodes(), size=1.)
    edges = gml_graph.edges()
    edges = [(u, v, {'weight': 1.}) for (u,v) in edges]
    dgraph.add_edges_from(edges)
    return dgraph
예제 #11
0
    def test_output(self):
        """
        test the output management function. should only output to file if an output
        format is given. otherwise output to console in adj list text.
        :return:
        """
        custom_filename = 'custom.out'

        try:
            yt_script.generate_output(self.MOCK_GRAPH, None, self.MOCK_FILE_OUTPUT)
            self.assertFalse(os.path.exists(self.MOCK_FILE_OUTPUT))
        except AttributeError:
            self.fail()

        try:
            yt_script.generate_output(self.MOCK_GRAPH, 'gml', self.MOCK_FILE_OUTPUT)
            result_graph = nx.read_gml(self.MOCK_FILE_OUTPUT)
            for node in self.MOCK_GRAPH.nodes():
                self.assertIn(node, result_graph.nodes())
            for edge in self.MOCK_GRAPH.edges():
                try:
                    self.assertIn(edge, result_graph.edges())
                except AssertionError:
                    edge = (edge[1], edge[0])
                    self.assertIn(edge, result_graph.edges())
                    continue
        except AttributeError:
            self.fail()

        try:
            yt_script.generate_output(self.MOCK_GRAPH, 'gml', custom_filename)
            result_graph = nx.read_gml(custom_filename)
            for node in self.MOCK_GRAPH.nodes():
                self.assertIn(node, result_graph.nodes())
            for edge in self.MOCK_GRAPH.edges():
                try:
                    self.assertIn(edge, result_graph.edges())
                except AssertionError:
                    edge = (edge[1], edge[0])
                    self.assertIn(edge, result_graph.edges())
                    continue
        except AttributeError:
            self.fail()

        self.assertRaises(RuntimeError, yt_script.generate_output,
                          self.MOCK_GRAPH, 'fake_format', custom_filename)

        if os.path.exists(custom_filename):
            os.remove(custom_filename)
예제 #12
0
파일: f2.py 프로젝트: Zhengzi/maple
def load(gml1,gml2,name):
	g1 = nx.read_gml(gml1)			
	g2 = nx.read_gml(gml2)	
	s1 = t(g1)
	s2 = t(g2)
	#print s1	
	#print s2
	
	with open("result.txt", "a") as myfile:
		myfile.write(name)
		myfile.write("\n") 
		
		m = lcs(s1,s2)
		
		index = find_index(m)
		
		
		match1 = []
		match2 = []
		
		for item in index:
		
			myfile.write(hex(s1[item[0]][0]) + " " + hex(s2[item[1]][0]))
			myfile.write("\n") 
		
			#print hex(s1[item[0]][0]) + " " + hex(s2[item[1]][0])
			match1.append(s1[item[0]][0])
			match2.append(s2[item[1]][0])
			
		myfile.write("o")
		myfile.write("\n") 	
		
		for item in s1:
			if item[0] not in match1:
				#print hex(item[0])
				myfile.write(hex(item[0]))
				myfile.write("\n") 
				
		myfile.write("p")
		myfile.write("\n") 
		
		for item in s2:
			if item[0] not in match2:
				#print hex(item[0])
				myfile.write(hex(item[0]))
				myfile.write("\n") 
				
		#print 	
	return 0
예제 #13
0
def graph_product(G_file):
    
    #TODO: take in a graph (eg when called from graphml) rather than re-reading the graph again
    LOG.info("Applying graph product to %s" % G_file)
    H_graphs = {}
    try:
        G = nx.read_graphml(G_file).to_undirected()
    except IOError:
        G = nx.read_gml(G_file).to_undirected()
        return
    G = remove_yed_edge_id(G)
    G = remove_gml_node_id(G)
#Note: copy=True causes problems if relabelling with same node name -> loses node data
    G = nx.relabel_nodes(G, dict((n, data.get('label', n)) for n, data in G.nodes(data=True)))
    G_path = os.path.split(G_file)[0]
    H_labels  = defaultdict(list)
    for n, data in G.nodes(data=True):
        H_labels[data.get("H")].append(n)

    for label in H_labels.keys():
        try:
            H_file = os.path.join(G_path, "%s.graphml" % label)
            H = nx.read_graphml(H_file).to_undirected()
        except IOError:
            try:
                H_file = os.path.join(G_path, "%s.gml" % label)
                H = nx.read_gml(H_file).to_undirected()
            except IOError:
                LOG.warn("Unable to read H_graph %s, used on nodes %s" % (H_file, ", ".join(H_labels[label])))
                return
        root_nodes = [n for n in H if H.node[n].get("root")]
        if len(root_nodes):
# some nodes have root set
            non_root_nodes = set(H.nodes()) - set(root_nodes)
            H.add_nodes_from( (n, dict(root=False)) for n in non_root_nodes)
        H = remove_yed_edge_id(H)
        H = remove_gml_node_id(H)
        nx.relabel_nodes(H, dict((n, data.get('label', n)) for n, data in H.nodes(data=True)), copy=False)
        H_graphs[label] = H

    G_out = nx.Graph()
    G_out.add_nodes_from(node_list(G, H_graphs))
    G_out.add_nodes_from(propagate_node_attributes(G, H_graphs, G_out.nodes()))
    G_out.add_edges_from(intra_pop_links(G, H_graphs))
    G_out.add_edges_from(inter_pop_links(G, H_graphs))
    G_out.add_edges_from(propagate_edge_attributes(G, H_graphs, G_out.edges()))
#TODO: need to set default ASN, etc?
    return G_out
예제 #14
0
파일: graphs.py 프로젝트: rkdarst/pcd
def polblogs(relabel=True):
    """Network of political blogs.

    A directed network of hyperlinks between weblogs on US politics,
    recorded in 2005 by Adamic and Glance. Please cite L. A. Adamic
    and N. Glance, 'The political blogosphere and the 2004 US
    Election', in Proceedings of the WWW-2005 Workshop on the
    Weblogging Ecosystem (2005). Thanks to Lada Adamic for permission
    to post these data on this web site.

    http://www-personal.umich.edu/~mejn/netdata/polblogs.zip
    """
    fname = os.path.join(os.path.dirname(__file__), "data/polblogs.gml")
    g = networkx.read_gml(fname, relabel=True)
    g = networkx.Graph(g)
    g.graph["Creator"] = g.graph["Creator"].split('"')[1]
    # node_map = { }
    for n, data in g.nodes_iter(data=True):
        dict_values_to_str(data)
        # data['label'] = str(data['label'])
        data["cmty"] = str(data["value"])
        # node_map[n] = data['label']
        del data["value"], data["id"], data["label"]
        # print data
    # if relabel:
    #    g = networkx.relabel_nodes(g, node_map)
    return g
예제 #15
0
파일: graphs.py 프로젝트: rkdarst/pcd
def polbooks(relabel=True):
    """Network of political books.

    A network of books about US politics published around the time of
    the 2004 presidential election and sold by the online bookseller
    Amazon.com. Edges between books represent frequent copurchasing of
    books by the same buyers. The network was compiled by V. Krebs and
    is unpublished, but can found on Krebs' web site. Thanks to Valdis
    Krebs for permission to post these data on this web site.

    Communities stored in g.node[n]['cmty'], node names are the titles
    of books.

    http://www-personal.umich.edu/~mejn/netdata/polbooks.zip"""
    fname = os.path.join(os.path.dirname(__file__), "data/polbooks.gml")
    g = networkx.read_gml(fname, relabel=True)
    g = networkx.Graph(g)
    g.graph["Creator"] = g.graph["Creator"].split('"')[1]
    # node_map = { }
    for n, data in g.nodes_iter(data=True):
        dict_values_to_str(data)
        data["cmty"] = str(data["value"])
        # node_map[n] = str(data['label'])
        # del data['value'], data['id'], data['label']
    # if relabel:
    #    g = networkx.relabel_nodes(g, node_map)
    return g
예제 #16
0
def save_commuting_graph():
    
#    print G.nodes()
#    print G.edges(data=True)
#    
#    nx.write_gml(G, "/home/sscepano/D4D res/allstuff/User movements graphs/communting patterns/1/total_commuting_G.gml")
#    
#    print GA.nodes()
#    print GA.edges(data=True)
#    
#    nx.write_gml(G, "/home/sscepano/D4D res/allstuff/User movements graphs/communting patterns/1/total_commuting_GA.gml")

    #v.map_commuting_all(G)
    
    #map_communities_and_commutes(G)
    
#    G = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G.gml")
#    
#    G1 = process_weights(G)
#    nx.write_gml(G1, "/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml")
#    
#    print G1.edges(data=True)

    G1 = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml")
    
#    print G1.nodes(data=True)
#    
#    print G1.nodes(data=True)[1][1]['label']

    map_communities_and_commutes(G1)
    
    return G1
def main():
    graph = nx.read_gml("../data/network.gml")

    nodes = [{
        "id": data['id'],
        "name": data.get("name") or "(Null)",
        "group": data['group'],
        "x": data["graphics"]["x"],
        "y": data["graphics"]["y"],
        "w": data["graphics"]["w"],
        "h": data["graphics"]["h"],
        "weight": data["weight"],
        "fixed": True,
    } for _, data in graph.nodes(data=True)]

    links = [{
        "source": get_node_index(nodes, source),
        "target": get_node_index(nodes, target),
        "name": data["label"],
        "value": data["value"]
    } for source, target, data in graph.edges(data=True)]

    json.dump({
        "nodes": nodes,
        "links": links
    }, open("../data/network.json", "w"), indent=4)
예제 #18
0
def main():
  G = nx.read_gml(filename, relabel=False)
  power_law_est(G)
  #power_law_est_igraph(filename)
  max_degrees(G)
  max_bcentrality(G)
  max_pagerank(G)
예제 #19
0
def loadNetwork(f, ext):
  if ext == "gml":
    try:
      return nx.read_gml(f)
    except Exception, e:
      print("Couldn't load " + f + " as gml.")
      return False
예제 #20
0
    def _read_celltype_graph(self, celltypes_file, format="gml"):
        """
        Read celltype-celltype connectivity graph from file.

        celltypes_file -- the path of the file containing
        the graph.
        
        format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml.

        """
        start = datetime.now()
        celltype_graph = None
        try:
            if format == "gml":
                celltype_graph = nx.read_gml(celltypes_file)
            elif format == "edgelist":
                celltype_graph = nx.read_edgelist(celltypes_file)
            elif format == "graphml":
                celltype_graph = nx.read_graphml(celltypes_file)
            elif format == "pickle":
                celltype_graph = nx.read_gpickle(celltypes_file)
            elif format == "yaml":
                celltype_graph = nx.read_yaml(celltypes_file)
            else:
                print "Unrecognized format %s" % (format)
        except Exception, e:
            print e
예제 #21
0
    def _read_cell_graph(self, filename, format):
        """Load the cell-to-cell connectivity graph from a
        file. 

        Returns None if any error happens.
        """
        cell_graph = None
        if filename:
            try:
                start = datetime.now()
                if format == "gml":
                    cell_graph = nx.read_gml(filename)
                elif format == "pickle":
                    cell_graph = nx.read_gpickle(filename)
                elif format == "edgelist":
                    cell_graph = nx.read_edgelist(filename)
                elif format == "yaml":
                    cell_graph = nx.read_yaml(filename)
                elif format == "graphml":
                    cell_graph = cell_graph = nx.read_graphml(filename)
                else:
                    print "Unrecognized format:", format
                end = datetime.now()
                delta = end - start
                config.BENCHMARK_LOGGER.info(
                    "Read cell_graph from file %s of format %s in %g s"
                    % (filename, format, delta.seconds + 1e-6 * delta.microseconds)
                )
            except Exception, e:
                print e
예제 #22
0
def parse_input(folder_name):
    '''
        Parses an input and returns the corresponding graph and parameters

        Inputs:
            folder_name - a string representing the path to the input folder

        Outputs:
            (graph, num_buses, size_bus, constraints)
            graph - the graph as a NetworkX object
            num_buses - an integer representing the number of buses you can allocate to
            size_buses - an integer representing the number of students that can fit on a bus
            constraints - a list where each element is a list vertices which represents a single rowdy group
    '''
    graph = nx.read_gml(folder_name + "/graph.gml")
    parameters = open(folder_name + "/parameters.txt")
    num_buses = int(parameters.readline())
    size_bus = int(parameters.readline())
    constraints = []

    for line in parameters:
        line = line[1: -2]
        curr_constraint = [num.replace("'", "") for num in line.split(", ")]
        constraints.append(curr_constraint)

    return graph, num_buses, size_bus, constraints
예제 #23
0
파일: GML.py 프로젝트: hklarner/TomClass
def copy_layout_GML2NX(Fname, Graph, verbose=1):
    if not Fname[-4:]=='.gml': Fname+='.gml'
    print 'Copying layout from', Fname+'..'
    
    g1 =  NX.read_gml( Fname )
    labels1 = NX.get_node_attributes(g1, 'label')
    n1 = set(labels1.values())
    
    nodes = set( Graph.nodes() )

    if not n1:
        print '   empty layout graph'
        return
    if not nodes:
        print '   empty target graph'
        return

    mapping = {}
    for L1 in labels1:
        for name in nodes:
            if labels1[L1]==name:
                mapping[L1] = name
                break

    intersection = len(nodes.intersection(n1))
    percent=100.*intersection/len(nodes)
    print '   %.1f%%'%percent,'(%i positions)'%intersection

    layout = NX.get_node_attributes(g1, 'graphics')
    attr = dict([  (  mapping[ID],  {'x':layout[ID]['x'],'y':layout[ID]['y']}  )   for ID in mapping])
    
    NX.set_node_attributes( Graph, 'graphics', attr)
예제 #24
0
파일: GML.py 프로젝트: hklarner/TomClass
def import_layout(from_fname, to_graph):
    if not from_fname[-4:]  =='.gml': from_fname +='.gml'

    print 'importing layout from', from_fname+'..'
    g1 =  NX.read_gml(from_fname)
    labels1 = NX.get_node_attributes(g1, 'label')
    n1 = set(labels1.values())
    
    g2 =    to_graph
    n2 = set(g2.nodes())

    if not n1:
        print '   empty target graph'
        return
    if not n2:
        print '   empty layout graph'
        return

    mapping = {}
    for L1 in labels1:
        for name in n2:
            if labels1[L1]==name:
                mapping[L1] = name
                break

    intersection = len(n2.intersection(n1))
    percent=100.*intersection/len(n2)
    print '   %.1f%%'%percent,'(%i positions)'%intersection

    layout = NX.get_node_attributes(g1, 'graphics')
    attr = dict([  (  mapping[ID],  {'x':layout[ID]['x'],'y':layout[ID]['y']}  )   for ID in mapping])
    
    NX.set_node_attributes(g2, 'graphics', attr)
예제 #25
0
def q1():
  lada = nx.read_gml("../../data/network_analysis/LadaFacebookAnon.gml")
  print_stats(lada, "LadaFacebookAnon")
  p = compute_edge_creation_probability(
    lada.number_of_nodes(), lada.number_of_edges())
  erg = nx.erdos_renyi_graph(lada.number_of_nodes(), p)
  print_stats(erg, "Erdos-Renyi Random")
예제 #26
0
파일: gml2jie.py 프로젝트: shuchu/graph
def gml2jie(file_name):
    G = nx.read_gml(file_name)
    num_of_nodes = G.number_of_nodes()
    num_of_edges = G.number_of_edges()

    #debug for duplicated edges
    ##print "num of edges: %d \n" % num_of_edges
    for e in G.edges_iter():
      num_edges = G.number_of_edges(e[0],e[1])
      if num_edges > 1:
        print "find one: %d,%d" % (e[0],e[1])
        G.remove_edge(*e)
    
    fnames = file_name.strip().split('.')
    base_name = fnames[0]
    ofname = base_name + '.jie'

    with open(ofname,'w') as ofile:
        ### write Node Node Edges
        ofile.write(str(G.number_of_nodes()) + " " \
                        + str(G.number_of_nodes()) + " "\
                        + str(G.number_of_edges())+'\n')
        for e in G.edges_iter():
            """ index of Jie Chen's file start from 1.
            but networkx starts from 0
            """
            ofile.write(str(e[0]+1) + " " + str(e[1]+1) + "\n")

    return 0;
예제 #27
0
def classify(request, pk):
	#gets object based on id given
	graph_file = get_object_or_404(Document, pk=pk)
	#reads file into networkx graph based on extension
	if graph_file.extension() == ".gml":
		G = nx.read_gml(graph_file.uploadfile)
	else:
		G = nx.read_gexf(graph_file.uploadfile)
	#closes file so we can delete it
	graph_file.uploadfile.close()
	#loads the algorithm and tests the algorithm against the graph
	g_json = json_graph.node_link_data(G)
	#save graph into json file
	with open(os.path.join(settings.MEDIA_ROOT, 'graph.json'), 'w') as graph:
			json.dump(g_json, graph)
	with open(os.path.join(settings.MEDIA_ROOT, 'rf_classifier.pkl'), 'rb') as malgo:
		algo_loaded = pickle.load(malgo, encoding="latin1")
		dataset = np.array([G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)])
		print (dataset)
		#creates X to test against
		X = dataset
		prediction = algo_loaded.predict(X)
		
		
		
		graph_type = check_prediction(prediction)
		graph = GraphPasser(G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G))
	#gives certain variables to the view

	return render(
		request,
		'classification/classify.html',
		{'graph': graph, 'prediction': graph_type}
		)
예제 #28
0
def usufyToGmlExport(d, fPath):
    '''
        Workaround to export to a gml file.
        :param d: Data to export.
        :param fPath: File path.
    '''
    # Reading the previous gml file      
    try:
        oldData=nx.read_gml(fPath)
    except UnicodeDecodeError as e:
        print "UnicodeDecodeError:\t" + str(e)
        print "Something went wrong when reading the .gml file relating to the decoding of UNICODE."
        import time as time
        fPath+="_" +str(time.time()) 
        print "To avoid losing data, the output file will be renamed to use the timestamp as:\n" + fPath + "_" + str(time.time()) 
        print        
        # No information has been recovered
        oldData = nx.Graph()
    except Exception as e:
        # No information has been recovered
        oldData = nx.Graph()
    newGraph = _generateGraphData(d, oldData)

    # Writing the gml file
    nx.write_gml(newGraph,fPath)
예제 #29
0
def run_graph(graph_file, output_file, neighborhood, flag = None):
    print "==== New graph ===="
    print "Input:", graph_file
    print "Output:", output_file
    print "Neighborhood:", neighborhood
    print ""

    print "Loading graph"
    print "Start time: %s" % (datetime.now())
    g=nx.read_gml(graph_file)
    print "End time: %s" % (datetime.now())

    gflag = None
    if flag == None:
        print "Creating flag complex"
        print "Start time: %s" % (datetime.now())
        gedgelist = map(list,g.edges(data=False))
        gflag = sh.flag(gedgelist,4)
        print "End time: %s" % (datetime.now())
    else:
        print "Using passed in flag complex"
        gflag = flag

    for n in range(neighborhood+1):
        print "Finding the local homology (n = %d)" % (n)
        print "Start time: %s" % (datetime.now())
        graph = graph_file.split("/")[-1]
        print output_file
        ofile = open(output_file, 'w')
        ofile.write("Local Homology (neighborhood=%d) of flag complex generated by %s\n" % (n, graph))
        locHomTable(gflag, {}, n, ofile)
        print "End time: %s" % (datetime.now())
        print "\n"

    return gflag
예제 #30
0
 def __init__(self, fileName = GML_FILE):
     if os.path.exists(fileName):
         print "Read From File.\n"
         self.G = nx.read_gml(fileName)
     else:
         print "File Not Found. Creating It..."
         self.G = nx.DiGraph()
def get_row_exhaustive(general_path, pattern_result, pattern_path):
    row = {}
    print "Pattern exhaustive ", pattern_result
    print "Pattern path: ", pattern_path
    pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml'))
    nr_randvar_values = man.count_nr_randvars_in_graph(pattern)
    cycles = man.is_there_cycle_in_graph(pattern)
    max_degree = man.get_maximum_node_degree(pattern)
    average_degree = man.get_average_node_degree(pattern)
    n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern)
    parent_id = get_parent_id(os.path.join(pattern_path))
    #get nr embeddings of exhaustive
    nr_emb = None
    time = None
    print general_path.split('/')
    pattern_name = general_path.split('/')[-1]
    if pattern_name == "":
        pattern_name = general_path.split('/')[-2]
    nr_obs = None
    print "Exists? ", os.path.join(
        general_path, 'exhaustive_approach',
        'results_' + pattern_name + '.res'), os.path.exists(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + pattern_name + '.res'))
    if os.path.exists(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + pattern_name + '.res')):
        nr_emb, time, nr_obs = extract_nr_embeddings(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + pattern_name + '.res'))

    #get the results
    if os.path.exists(os.path.join(pattern_result, 'monitoring')):
        embeddings, stdev, klds = get_stat(
            os.path.join(pattern_result, 'monitoring'), 'exhaustive')
    else:
        embeddings = [None] * 120
        klds = [None] * 120

    is_timeout = False
    if os.path.exists(
            os.path.join(general_path, 'exhaustive_approach', 'timeout.info')):
        is_timeout = True
    print "Nr of records for embeddings: ", len(embeddings)
    nodes, edges = man.get_readable_text_format(pattern)
    row['pattern_name'] = pattern_result
    row['parent_id'] = parent_id
    row['nr_randvar_values'] = int(nr_randvar_values)
    row['nodes'] = nodes
    row['edges'] = edges
    row['has_cycles'] = cycles
    row['density'] = nx.density(pattern)
    row['max_degree'] = float(max_degree)
    row['avg_deg'] = float(average_degree)
    row['nr_targets'] = int(n_target_nodes)
    if nr_emb:
        row['exh_emb'] = float(nr_emb)
    else:
        row['exh_emb'] = nr_emb
    row['time'] = time
    row['timeout'] = is_timeout
    row['nr_observations'] = nr_obs
    for i in xrange(1, len(embeddings) + 1):
        if embeddings[i - 1] == None:
            row["emb_" + str(i)] = None
        else:
            row["emb_" + str(i)] = float(embeddings[i - 1])
    return row
def correlation_edge_centrality(name):
    current_path = "../../data"
    network = nx.read_gml(current_path + "/" + name + "/" + name + ".gml")
    edge_centrality(network)
예제 #33
0
import numpy as np
import math as ma
import networkx as nx
from matplotlib import pyplot as plt

#Abre el primer conjunto de datos guarda el original y una copia, la copia es la que modificare

data_dolphins = nx.read_gml('dolphins.gml')

dd = data_dolphins

#Abre la info del genero de los delfines

archivo2 = open('dolphinsGender.txt', 'r').readlines()

#Arreglos limpios para llenar con la informacion extra para los nodos

nombres = []
generos = []
gender_dict = {}
numeros = {}
colores = []
machos = 0
hembras = 0
incognita = 0

#Agrega info a los arreglos ya creados, incluido el mapa de color para los nodos que sigue al genero de los delfines

k = 0
for i in archivo2:
    j = i.split("\t")
예제 #34
0
def basic_operation():
	# Create a graph.
	G = nx.Graph()

	# Nodes.
	G.add_node(1)
	G.add_nodes_from([2, 3])

	H = nx.path_graph(10)  # Creates a graph.
	G.add_nodes_from(H)
	G.add_node(H)

	#print('G.nodes =', G.nodes)
	print('G.nodes =', list(G.nodes))

	# Edges.
	G.add_edge(1, 2)
	e = (2, 3)
	G.add_edge(*e)  # Unpack edge tuple.

	G.add_edges_from([(1, 2), (1, 3)])

	G.add_edges_from(H.edges)

	#print('G.edges =', G.edges)
	print('G.edges =', list(G.edges))

	# Remove all nodes and edges.
	G.clear()

	#--------------------
	G.add_edges_from([(1, 2), (1, 3)])
	G.add_node(1)
	G.add_edge(1, 2)
	G.add_node('spam')  # Adds node 'spam'.
	G.add_nodes_from('spam')  # Adds 4 nodes: 's', 'p', 'a', 'm'.
	G.add_edge(3, 'm')

	print('G.number_of_nodes() =', G.number_of_nodes())
	print('G.number_of_edges() =', G.number_of_edges())

	# Set-like views of the nodes, edges, neighbors (adjacencies), and degrees of nodes in a graph.
	print('G.adj[1] =', list(G.adj[1]))  # or G.neighbors(1).
	print('G.degree[1] =', G.degree[1])  # The number of edges incident to 1.

	# Report the edges and degree from a subset of all nodes using an nbunch.
	# An nbunch is any of: None (meaning all nodes), a node, or an iterable container of nodes that is not itself a node in the graph.
	print("G.edges([2, 'm']) =", G.edges([2, 'm']))
	print('G.degree([2, 3]) =', G.degree([2, 3]))

	# Remove nodes and edges from the graph in a similar fashion to adding.
	G.remove_node(2)
	G.remove_nodes_from('spam')
	print('G.nodes =', list(G.nodes))
	G.remove_edge(1, 3)

	# When creating a graph structure by instantiating one of the graph classes you can specify data in several formats.
	G.add_edge(1, 2)
	H = nx.DiGraph(G)  # Creates a DiGraph using the connections from G.
	print('H.edges() =', list(H.edges()))

	edgelist = [(0, 1), (1, 2), (2, 3)]
	H = nx.Graph(edgelist)

	#--------------------
	# Access edges and neighbors.
	print('G[1] =', G[1])  # Same as G.adj[1].
	print('G[1][2] =', G[1][2])  # Edge 1-2.
	print('G.edges[1, 2] =', G.edges[1, 2])

	# Get/set the attributes of an edge using subscript notation if the edge already exists.
	G.add_edge(1, 3)
	G[1][3]['color'] = 'blue'
	G.edges[1, 2]['color'] = 'red'

	# Fast examination of all (node, adjacency) pairs is achieved using G.adjacency(), or G.adj.items().
	# Note that for undirected graphs, adjacency iteration sees each edge twice.
	FG = nx.Graph()
	FG.add_weighted_edges_from([(1, 2, 0.125), (1, 3, 0.75), (2, 4, 1.2), (3, 4, 0.375)])
	for n, nbrs in FG.adj.items():
		for nbr, eattr in nbrs.items():
			wt = eattr['weight']
			if wt < 0.5: print(f'({n}, {nbr}, {wt:.3})')

	# Convenient access to all edges is achieved with the edges property.
	for (u, v, wt) in FG.edges.data('weight'):
		if wt < 0.5: print(f'({u}, {v}, {wt:.3})')

	#--------------------
	# Attributes.

	# Graph attributes.
	G = nx.Graph(day='Friday')
	print('G.graph =', G.graph)

	G.graph['day'] = 'Monday'

	# Node attributes: add_node(), add_nodes_from(), or G.nodes.
	G.add_node(1, time='5pm')
	G.add_nodes_from([3], time='2pm')
	print('G.nodes[1] =', G.nodes[1])
	G.nodes[1]['room'] = 714
	print('G.nodes.data() =', G.nodes.data())

	# Edge attributes: add_edge(), add_edges_from(), or subscript notation.
	G.add_edge(1, 2, weight=4.7)
	G.add_edges_from([(3, 4), (4, 5)], color='red')
	G.add_edges_from([(1, 2, {'color': 'blue'}), (2, 3, {'weight': 8})])
	G[1][2]['weight'] = 4.7
	G.edges[3, 4]['weight'] = 4.2
	print('G.edges.data() =', G.edges.data())

	#--------------------
	# Directed graphs.

	DG = nx.DiGraph()
	DG.add_weighted_edges_from([(1, 2, 0.5), (3, 1, 0.75)])
	print("DG.out_degree(1, weight='weight') =", DG.out_degree(1, weight='weight'))
	print("DG.degree(1, weight='weight') =", DG.degree(1, weight='weight'))  # The sum of in_degree() and out_degree().
	print('DG.successors(1) =', list(DG.successors(1)))
	print('DG.neighbors(1) =', list(DG.neighbors(1)))

	# Convert G to undirected graph.
	#H = DG.to_undirected()
	H = nx.Graph(DG)

	#--------------------
	# Multigraphs: Graphs which allow multiple edges between any pair of nodes.

	MG = nx.MultiGraph()
	#MDG = nx.MultiDiGraph()
	MG.add_weighted_edges_from([(1, 2, 0.5), (1, 2, 0.75), (2, 3, 0.5)])
	print("MG.degree(weight='weight') =", dict(MG.degree(weight='weight')))

	GG = nx.Graph()
	for n, nbrs in MG.adjacency():
			for nbr, edict in nbrs.items():
				minvalue = min([d['weight'] for d in edict.values()])
				GG.add_edge(n, nbr, weight = minvalue)
	print('nx.shortest_path(GG, 1, 3) =', nx.shortest_path(GG, 1, 3))

	#--------------------
	# Classic graph operations:

	"""
	subgraph(G, nbunch):		induced subgraph view of G on nodes in nbunch
	union(G1,G2):				graph union
	disjoint_union(G1,G2):		graph union assuming all nodes are different
	cartesian_product(G1,G2):	return Cartesian product graph
	compose(G1,G2):				combine graphs identifying nodes common to both
	complement(G):				graph complement
	create_empty_copy(G):		return an empty copy of the same graph class
	to_undirected(G):			return an undirected representation of G
	to_directed(G):				return a directed representation of G
	"""

	#--------------------
	# Graph generators.

	# Use a call to one of the classic small graphs:
	petersen = nx.petersen_graph()
	tutte = nx.tutte_graph()
	maze = nx.sedgewick_maze_graph()
	tet = nx.tetrahedral_graph()

	# Use a (constructive) generator for a classic graph:
	K_5 = nx.complete_graph(5)
	K_3_5 = nx.complete_bipartite_graph(3, 5)
	barbell = nx.barbell_graph(10, 10)
	lollipop = nx.lollipop_graph(10, 20)

	# Use a stochastic graph generator:
	er = nx.erdos_renyi_graph(100, 0.15)
	ws = nx.watts_strogatz_graph(30, 3, 0.1)
	ba = nx.barabasi_albert_graph(100, 5)
	red = nx.random_lobster(100, 0.9, 0.9)

	#--------------------
	# Read a graph stored in a file using common graph formats, such as edge lists, adjacency lists, GML, GraphML, pickle, LEDA and others.

	nx.write_gml(red, 'path.to.file')
	mygraph = nx.read_gml('path.to.file')
예제 #35
0
import networkx as nx, community as comm, pylab

g = nx.read_gml("lesmis/lesmis.gml")
bp = comm.best_partition(g)
nx.draw(g, node_color=[bp[i] for i in bp])
pylab.show()
예제 #36
0
import numpy as np
import networkx as nx

F = np.load("./numpy_files/citeseer_poisson_v2_son.npy")
#F = np.load("../poisson/numpy_files/citeseer_gaussian_iter_250_T.npy")

g = nx.read_gml("../datasets/citeseer.gml")

n = g.number_of_nodes()

with open("citeseer_poisson_v2_son.embedding", 'w') as f:
    f.write("{} {}\n".format(n, F.shape[1]))
    for node in g.nodes():
        line = [str(val) for val in F[int(node), :]]

        f.write("{} {}\n".format(node, " ".join(line)))
예제 #37
0
def carrega_grafo(nome_rede):
    """Carrega o grafo .gml"""
    grafo = nx.read_gml(nome_rede + ".gml")
    return grafo
예제 #38
0
def load_igraph(filepath):
    return nx.read_gml("example.gml")
예제 #39
0
true_sbm = true
adj = nx.adjacency_matrix(G).todense()
np.savetxt('file.csv', adj, delimiter='\t')
dists_gt = test_clustering_structure()
print("GT : {0}, {1}".format(np.mean(dists_gt), np.std(dists_gt)))
dists_gt = ensemble_density_huge("file.csv", "\\t")
dist_dense = pd.read_csv("./matrix.csv", delimiter="\t", header=None).values
dist_dense = dist_dense[:, :-1]
dist.append(dist_dense)

####################

####################

print("--Football--")
G = nx.read_gml("../data/football.gml")
true = []
adj = nx.adjacency_matrix(G).todense()

with open("../data/football.gml") as f:
    for line in f:
        values = line.split(" ")
        if (len(values) >= 5):
            if (values[4] == "value"):
                true.append(values[5])
encoder = LabelEncoder()
true = encoder.fit_transform(true)
true_football = true
model_hac = hac(n_clusters=len(set(true)),
                affinity="precomputed",
                linkage="average")
 def carrega_grafo(self):
     """carrega um grafo e gera uma lista de vértices"""
     rede = nx.read_gml(self.nome_rede)
     self.lista_nos = rede.nodes()
예제 #41
0
 def _validate_(self, level):
     try:
         _ = nx.read_gml(str(self.path))
     except nx.NetworkXError:
         raise ValidationError('Not a valid GML file')
        sp_emds.append(sp_emd)
        sgl2s.append(sgl2)
    return sp_emds, sgl2s


if __name__ == "__main__":
    graph_name = sys.argv[1]
    RW_x = np.loadtxt(
        'plots/lesmis_walk_wa/trainingIteration_3200_expectedGraph.txt'.format(
            graph_name))
    edge_x = np.loadtxt(
        'plots/lesmis_edge_wa/trainingIteration_3200_expectedGraph.txt'.format(
            graph_name))
    #RW_c = np.loadtxt('plots/{}_rw_expected_correct.txt'.format(graph_name))

    G = nx.read_gml('../data/{}.gml'.format(graph_name))
    _A_obs = nx.adjacency_matrix(G)
    A = _A_obs.todense()
    N = A.shape[0]

    L = nx.normalized_laplacian_matrix(G).todense()
    eig_vals, eig_vecs = linalg.eig(L)
    eig_list = zip(eig_vals, np.transpose(eig_vecs))
    eig_list.sort(key=lambda x: x[0])
    u = np.asarray([u_i.real for u_i in eig_list[-2][1]])[0][0]

    x, y = expected_against_fiedler(RW_x, u, N)
    fig, ax = plt.subplots()
    plt.scatter(x, y, color='r', s=10, label='Degree Random Walk From Uniform')

    #x_c,y = expected_against_fiedler(RW_c,u,N)
예제 #43
0
                    G.node[i]['pi'] = U

    # Com base na tabela de predecessores cria a MST
    MST = nx.Graph()
    for i in range(0, len(Q)):
        MST.add_node(i, label=G.node[i]['label'])

    for i in range(0, len(Q)):
        if K == 1:
            if (i != Raizes[0]):
                MST.add_edge(i, G.node[i]['pi'])
        elif K == 2:
            if (i != Raizes[0]) and (i != Raizes[1]):
                MST.add_edge(i, G.node[i]['pi'])
        elif K == 3:
            if (i != Raizes[0]):
                if (i != Raizes[1]):
                    if (i != Raizes[2]):
                        MST.add_edge(i, G.node[i]['pi'])
    return MST


G = nx.read_gml("football.gml")
K = 3  #Numero de raizes
MST = Dijkstra(G, K)

#Plota a MST
pos = nx.spring_layout(MST, k=0.10, iterations=100)
nx.draw(MST, pos, with_labels=True)
plt.savefig("1")
from math import pi,sin,cos,sqrt
import pylab
from matplotlib.patches import Wedge, Polygon
import matplotlib.ticker as ticker
from matplotlib.widgets import Slider, Button
#import nx_pylab2 as nx2
from matplotlib.patches import FancyArrowPatch, Circle
#from colorsnew import cmap_discretize
import matplotlib as m




graph_name1 = "method2_50/networks/method2_50_adherent.gml"      ### Method3

G = nx.read_gml(graph_name1)
G = nx.connected_component_subgraphs(G)[0]
listsort1 =  [[u'38481', u'40842', u'46549', u'17427', u'41063', u'7264', u'35755', u'16584', u'42825', u'39816', u'28627', u'44291', u'36915', u'38164', u'2573', u'30898', u'43373', u'45655', u'38629', u'46451', u'46254', u'45064', u'39167', u'39953', u'39600', u'43282', u'3753', u'30300', u'38798', u'45392', u'36268', u'45758', u'32683', u'34064', u'32242', u'46601', u'4094', u'32108', u'46118', u'38479', u'35766'], [u'18956', u'39714', u'27171', u'46480', u'4655', u'41720', u'41906', u'3869', u'27995', u'45364', u'38641', u'14142', u'29881', u'39252', u'28743', u'41480', u'5842', u'33422', u'36783', u'35561', u'20494', u'6137', u'43821', u'29802', u'42270', u'39823', u'23012', u'20972', u'22783', u'29540', u'28308', u'46643', u'30050', u'4597', u'28945', u'7330', u'18531', u'45676', u'33143', u'27847', u'3217', u'43334', u'29250', u'27127', u'32340', u'46600', u'15479', u'4455', u'11729', u'35309', u'44916'], [u'42877', u'5820', u'42380', u'42510', u'3144', u'35059', u'14867', u'29053', u'33282', u'24629', u'44334', u'41561', u'46162', u'5536', u'46271', u'44757', u'31246', u'42348', u'39055', u'41851', u'8034', u'46721', u'41225', u'41165', u'29376', u'39770', u'32508', u'18656', u'44449', u'38029', u'40653', u'34754', u'41261', u'5613', u'17086', u'45916', u'41728', u'32395', u'27851', u'41745', u'40296', u'42000', u'42401', u'42087', u'34450', u'46663', u'32892', u'24642', u'28118', u'20377', u'45922', u'32637', u'45190', u'4800', u'43617', u'17097', u'46389', u'29314', u'41999', u'29480', u'44188', u'40476', u'36088', u'36343', u'45154'], [u'44121', u'3557', u'41794', u'24308', u'967', u'43573', u'47061', u'43981', u'24575', u'41830', u'21994', u'44866', u'46798', u'9486', u'15389', u'42637', u'46315', u'45116', u'41049', u'20553', u'41656', u'37258', u'36157', u'42467', u'7073', u'46612', u'45795', u'38044', u'9831', u'42765', u'6322', u'46998', u'12252', u'36591', u'41509', u'17484', u'80', u'19515', u'46496', u'2832', u'14722', u'40854', u'29840', u'46782', u'42807', u'8371', u'45303', u'17755', u'15311', u'43437', u'45374', u'45771', u'32214', u'33340', u'30166', u'43252', u'21010', u'12048', u'12484', u'3719', u'37781', u'46026', u'36865', u'998', u'39130', u'44265', u'310', u'44263', u'23266', u'26102', u'45298', u'10127', u'833', u'13662', u'41669', u'18106', u'19501', u'10542', u'29018', u'29930', u'44884', u'36255', u'36701', u'25991', u'13790', u'3864', u'42886', u'36411', u'35493', u'43435', u'28845', u'46582', u'29070', u'18647', u'33242', u'17923', u'38945', u'9429', u'18790', u'43402', u'40232', u'8997', u'19311', u'8638', u'36351', u'24180', u'42911', u'35135', u'39485', u'45670', u'25619', u'17369', u'36917', u'32662', u'31021', u'38972', u'43442', u'13518', u'30372', u'29826', u'10924', u'29415', u'43150', u'42384', u'16541', u'45557', u'40582', u'21215', u'10378', u'40402', u'27756', u'3424'], [u'33322', u'28851', u'13739', u'39065', u'39898', u'33425', u'3284', u'32833', u'38955', u'41792', u'35531', u'44195', u'44199', u'42457', u'43572', u'37747', u'32167', u'3815', u'27301', u'42484', u'42585', u'40520', u'40443', u'34221', u'45437', u'5817', u'39690', u'10411', u'46941', u'34752', u'37000', u'38315', u'36925', u'41510', u'41160', u'38914', u'39155', u'6613', u'41194', u'45060', u'44166', u'41584', u'20312', u'40324', u'40803', u'27345', u'40484', u'31124', u'5068', u'6627', u'26718', u'38101', u'47043', u'27958', u'5580', u'41899', u'42608', u'31513', u'2158', u'45833', u'47038', u'41095', u'18967', u'35849', u'45148', u'40734', u'43192', u'45903', u'46121', u'40027', u'19708', u'12992', u'36237', u'42022', u'40284', u'45243', u'31547', u'39006', u'46785', u'45876', u'33441', u'46878', u'19', u'31425', u'42156', u'45009', u'29371', u'41292', u'18654', u'43953', u'14378', u'42471', u'27811', u'14376', u'19000', u'19943', u'29592', u'39342', u'40719', u'17258', u'28050', u'12798', u'41316', u'45220', u'45061', u'45784', u'46234', u'42612', u'42971', u'33717', u'32811', u'44214', u'43858', u'45760', u'33372', u'47067', u'30570', u'43848', u'31383', u'39017', u'40138', u'32171', u'43138', u'29399', u'46148', u'39978', u'33369', u'43177', u'30392', u'42714', u'28301', u'41553', u'36726', u'6197', u'45860', u'46689', u'9816', u'17598', u'37574', u'45261', u'35955', u'46326', u'18541', u'34955', u'42225', u'44352', u'19546', u'43401', u'4336', u'31076', u'29749', u'39948', u'36631', u'46329', u'45139', u'39478', u'28436', u'17857', u'11477', u'45513', u'46639', u'41435', u'39688', u'33062', u'1055', u'36529', u'41987', u'11926', u'34061', u'3944', u'13136', u'43446', u'10915', u'41187', u'12977', u'32349', u'20327', u'39383', u'42550', u'39971', u'41521', u'41970', u'28212', u'28688', u'36719', u'12115', u'45419', u'42319', u'42317', u'47052', u'31353', u'39809', u'27759', u'37143', u'37639', u'45527'], [u'35544', u'41015', u'12486', u'41371', u'41797', u'41559', u'39625', u'37188', u'45933', u'29453', u'46487', u'13731', u'46726', u'43599', u'43982', u'12013', u'40844', u'19450', u'46439', u'46269', u'39539', u'44169', u'39313', u'11613', u'10959', u'44791', u'32476', u'35274', u'35973', u'31391', u'40188', u'31954', u'20411', u'41586', u'19022', u'41769', u'47063', u'42018', u'43542', u'31826', u'46046', u'43541', u'46347', u'43543', u'614', u'41942', u'6390', u'39250', u'42263', u'42262', u'37648', u'42065', u'46533', u'26900', u'28182', u'15367', u'45175', u'38900', u'44614', u'41092', u'35847', u'44571', u'44570', u'32571', u'391', u'4046', u'46123', u'39003', u'40218', u'46613', u'18061', u'1432', u'33895', u'46153', u'27724', u'39022', u'26136', u'30067', u'45470', u'8592', u'31337', u'11313', u'26528', u'13803', u'46771', u'33161', u'18842', u'45379', u'2697', u'44539', u'44021', u'14977', u'31949', u'43318', u'22520', u'27101', u'39673', u'41131', u'40802', u'32915', u'37287', u'12681', u'42358', u'42574', u'37763', u'37600', u'30244', u'25486', u'36369', u'215', u'40656', u'4477', u'42419', u'29115', u'2464', u'41781', u'29522', u'10684', u'22711', u'30359', u'6568', u'46230', u'45506', u'43178', u'41401', u'13228', u'41310', u'28640', u'22903', u'44213', u'32741', u'29467', u'6933', u'39018', u'39019', u'44662', u'259', u'36222', u'7077', u'27511', u'32085', u'11427', u'42399', u'33985', u'39862', u'40666', u'46668', u'34775', u'5295', u'28719', u'20392', u'9590', u'34579', u'39207', u'28169', u'42146', u'44682', u'40161', u'43084', u'26751', u'39111', u'39059', u'44636', u'36176', u'41847', u'32700', u'42240', u'20206', u'28506', u'36330', u'11149', u'20527', u'40545', u'40782', u'18992', u'28342', u'34107', u'27575', u'30917', u'40434', u'3940', u'40539', u'45511', u'5564', u'39422', u'46819', u'47019', u'5286', u'39686', u'46812', u'30964', u'41242', u'44554', u'40052', u'6161', u'12449', u'36629', u'46062', u'45368', u'6883', u'33964', u'42418', u'13511', u'39438', u'727', u'29418', u'30264', u'43020', u'34065', u'17458', u'27973', u'45411', u'46772', u'42716', u'46850', u'47053', u'6083', u'25318', u'40811', u'32116', u'45528', u'34141', u'41203'], [u'36585', u'27490', u'40155', u'27284', u'27280', u'46820', u'39656', u'28854', u'43112', u'36092', u'32233', u'40929', u'41479', u'41373', u'37041', u'44855', u'19701', u'38954', u'30192', u'2915', u'41650', u'38959', u'41790', u'7129', u'28035', u'39624', u'46824', u'6651', u'41152', u'36611', u'43778', u'45545', u'29284', u'43807', u'21108', u'30776', u'39400', u'41724', u'39361', u'39364', u'42054', u'14694', u'43698', u'42053', u'43695', u'39567', u'28404', u'43053', u'40847', u'31778', u'29917', u'29031', u'29033', u'29527', u'43142', u'34110', u'45160', u'40037', u'27711', u'45702', u'33946', u'45565', u'42636', u'45569', u'36332', u'1930', u'45288', u'42227', u'45443', u'39693', u'35379', u'36819', u'5131', u'42226', u'18163', u'46891', u'28664', u'41331', u'39224', u'31487', u'44018', u'20418', u'29086', u'6758', u'29247', u'43307', u'7519', u'27132', u'27137', u'39032', u'43490', u'29962', u'43499', u'37755', u'43941', u'34270', u'45941', u'4030', u'38953', u'46042', u'27574', u'30154', u'41067', u'43394', u'27342', u'30741', u'37874', u'36376', u'27610', u'29662', u'28361', u'4916', u'40247', u'7189', u'27933', u'30736', u'2536', u'42063', u'27836', u'31461', u'47046', u'45627', u'30951', u'9813', u'39189', u'39811', u'36473', u'12375', u'4393', u'44835', u'6578', u'38214', u'46585', u'35201', u'41419', u'45050', u'44111', u'31067', u'33489', u'28601', u'45831', u'46029', u'19063', u'37424', u'39706', u'42116', u'33436', u'36115', u'2901', u'33549', u'40733', u'43865', u'46915', u'1067', u'44184', u'28335', u'43462', u'43461', u'27385', u'40146', u'37228', u'44527', u'43740', u'37617', u'32164', u'35930', u'43582', u'34341', u'28414', u'39411', u'41736', u'28793', u'43995', u'42029', u'42434', u'42432', u'44377', u'40853', u'43041', u'45173', u'28383', u'8316', u'45049', u'36991', u'40455', u'45573', u'36308', u'39301', u'34771', u'19476', u'2112', u'45476', u'39604', u'45656', u'41291', u'46655', u'4699', u'42527', u'46803', u'28775', u'39402', u'42150', u'30803', u'44385', u'7258', u'16058', u'40229', u'46809', u'13719', u'45099', u'39120', u'44158', u'40102', u'39044', u'3989', u'43890', u'29874', u'46180', u'32328', u'28516', u'45895', u'29476', u'46119', u'43009', u'41680', u'40777', u'40958', u'41689', u'30058', u'17578', u'46621', u'38893', u'29988', u'32039', u'43674', u'33960', u'45435', u'14475', u'37115', u'40252', u'29528', u'37111', u'46453', u'46020', u'31574', u'22923', u'44212', u'41929', u'42190', u'3445', u'45508', u'42610', u'40338', u'33493', u'3326', u'43210', u'28838', u'42993', u'36990', u'28890', u'42853', u'44663', u'41081', u'45265', u'37027', u'28387', u'33729', u'32337', u'46359', u'45342', u'32426', u'45610', u'45641', u'38932', u'44060', u'45279', u'44407', u'43434', u'27582', u'27854', u'40821', u'27396', u'40133', u'44103', u'30477', u'40613', u'29495', u'40597', u'44831', u'11864', u'16722', u'45966', u'42035', u'5960', u'43911', u'46412', u'31137', u'31044', u'40466', u'46365', u'46363', u'30000', u'45486', u'45497', u'34175', u'36312', u'35886', u'40501', u'42241', u'28305', u'25688', u'41895', u'28309', u'43812', u'45923', u'38121', u'31151', u'41369', u'45861', u'42947', u'29058', u'40235', u'27978', u'27976', u'27186', u'47060', u'5242', u'39201', u'44105', u'39220', u'42925', u'35812', u'44139', u'20625', u'14901', u'44521', u'41402', u'40603', u'43083', u'39768', u'37378', u'31384', u'43715', u'43717', u'44220', u'36071', u'29272', u'43969', u'2960', u'44353', u'29861', u'38052', u'45556', u'5470', u'42445', u'39442', u'42849', u'11285', u'43403', u'18799', u'44054', u'44037', u'44030', u'33235', u'41140', u'37976', u'4651', u'43813', u'45799', u'43762', u'30761', u'43581', u'40230', u'16468', u'30769', u'31261', u'43662', u'4331', u'40786', u'41807', u'42694', u'5513', u'25458', u'40627', u'2552', u'28022', u'27523', u'45422', u'35819', u'39573', u'30344', u'42590', u'39129', u'31566', u'46921', u'46507', u'7311', u'46881', u'43238', u'31585', u'33530', u'42678', u'6458', u'45077', u'26602', u'43295', u'42180', u'43612', u'38271', u'45330', u'42747', u'46186', u'18462', u'33460', u'43206', u'40999', u'43205', u'46959', u'37030', u'39152', u'32968', u'21754', u'32070', u'5705', u'42589', u'41248', u'39834', u'15726', u'47072', u'39836', u'41508', u'34217', u'38714', u'27798', u'38924', u'22593', u'34841', u'41021', u'42914', u'33665', u'40373', u'16735', u'2908', u'46478', u'41613', u'28473', u'2041', u'42559', u'42009', u'22594', u'27983', u'2362', u'43927', u'40680', u'27330', u'45902', u'30032', u'5369', u'44709', u'32397', u'39444', u'42259', u'42090', u'41693', u'42207', u'1440', u'6025', u'4044', u'37380', u'22613', u'35305', u'40400', u'5738', u'26179', u'41059', u'4849', u'12414', u'42179', u'30395', u'45150', u'35944']] 
#sorted(listcom,key=len)




H=nx.Graph()
wtchange = []
ynode = []

bminode = []
bminodep = []
bminodem = []
def get_row_NS(general_path, pattern_result, experiment_name):
    row = {}
    if not (os.path.exists(os.path.join(general_path, 'input_pattern.gml'))):
        row['pattern_name'] = pattern_result
        row['nr_randvar_values'] = "NC"
        row['nodes'] = "NC"
        row['edges'] = "NC"
        row['has_cycles'] = "NC"
        row['density'] = "NC"
        row['shape'] = "NC"
        row['max_degree'] = "NC"
        row['avg_deg'] = "NC"
        row['nr_targets'] = "NC"
        row['nr_emb'] = "NC"
        row['has_obd'] = "NC"
        row['unequal_size_warn'] = "NC"
        row['OBD'] = "NC"
        return row

    pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml'))
    nr_randvar_values = man.count_nr_randvars_in_graph(pattern)
    cycles = man.is_there_cycle_in_graph(pattern)
    max_degree = man.get_maximum_node_degree(pattern)
    average_degree = man.get_average_node_degree(pattern)
    n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern)
    nr_emb = None
    has_obd = True

    if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')):
        has_obd = False

    if os.path.exists(os.path.join(general_path, 'not_selected.info')):
        nr_emb = extract_nr_embeddings_NS(
            os.path.join(general_path, 'not_selected.info'))
    nodes, edges = man.get_readable_text_format(pattern)

    unequal_size_warning = False
    if os.path.exists(
            os.path.join(general_path, 'results_furer',
                         'unequal_size.warning')):
        unequal_size_warning = True
    OBD = None
    if os.path.exists(
            os.path.join(general_path, 'results_furer', 'OBDDecomp.info')):
        OBD = getOBDecomp(
            os.path.join(general_path, 'results_furer', 'OBDDecomp.info'))
    row['pattern_name'] = pattern_result
    row['nr_randvar_values'] = nr_randvar_values
    row['nodes'] = nodes
    row['edges'] = edges
    row['has_cycles'] = cycles
    row['density'] = nx.density(pattern)
    row['shape'] = man.get_graph_shape(pattern)
    row['max_degree'] = max_degree
    row['avg_deg'] = average_degree
    row['nr_targets'] = n_target_nodes
    row['nr_emb'] = nr_emb
    #row['has_obd']=has_obd
    #row['unequal_size_warn']=unequal_size_warning
    row['OBD'] = OBD
    return row
예제 #46
0
def loadgraph(filepath): 
    return  nx.read_gml(filepath)
예제 #47
0
import networkx as nx
from geopy.distance import great_circle

def generateDistances(G, verbose=False):
    for edge in G.edges:
        if verbose:
            print(*edge, end=" ")
        distance = great_circle(reversed(G.nodes[edge[0]]["Position"]), reversed(G.nodes[edge[1]]["Position"])).km
        if verbose:
            print(distance)
        G.edges[edge]["Distance"] = distance

    nx.write_gml(G, "Graphs/FullFWithDistance.gml")




if __name__ == '__main__':
    generateDistances(nx.read_gml("Graphs/FullFiltered.gml"), verbose=True)
예제 #48
0
 def input(self, filename):
     self.GGG = networkx.read_gml(filename)
예제 #49
0
def main(result, data, redo, write, monitoring_reports):
    print "reporting furer"
    flag_version = 'my'
    common_result_path = result
    output_path = os.path.join(result, 'results_furer')
    detailed_result_path = os.path.join(output_path, "monitoring")
    if (not redo) and os.path.exists(detailed_result_path) and len(
            os.listdir(detailed_result_path)) >= 100:
        print "Results already post-processed"
        row = csv_report.get_row(result, output_path, "furer",
                                 result.replace("RESULTS", "PATTERNS"))
        with open(os.path.join(output_path, "furer_row.info"), 'w') as f:
            f.write(str(row))
        sys.exit()
    exhaustive_approach_results_path = os.path.join(common_result_path,
                                                    "exhaustive_approach")
    try:
        data_graph = nx.read_gpickle(data)
    except:
        data_graph = nx.read_gml(data)
    #data_graph=nx.read_gpickle(data)
    pattern = nx.read_gml(os.path.join(common_result_path,
                                       'input_pattern.gml'))
    #load Plist
    pkl_file = open(os.path.join(output_path, 'Plist.pickle'), 'rb')
    Plist = pickle.load(pkl_file)
    #load monitoring marks
    pkl_file = open(os.path.join(output_path, 'monitoring_marks.pickle'), 'rb')
    monitoring_marks = pickle.load(pkl_file)
    #load monitoring_reports
    if os.path.exists(os.path.join(output_path, 'monitoring_reports.pickle')):
        pkl_file = open(os.path.join(output_path, 'monitoring_reports.pickle'),
                        'rb')
        monitoring_reports = pickle.load(pkl_file)
    print common_result_path, common_result_path.split("/")
    pattern_file_name = common_result_path.split("/")[-1]
    if pattern_file_name == "":
        pattern_file_name = common_result_path.split("/")[-2]
    print "Number of reports: ", len(monitoring_reports)
    print "pattern file name: ", pattern_file_name
    print "Do we need exhaustive dict: ", write
    fdict_exhaustive = None
    if write == True:
        picklename = os.path.join(
            exhaustive_approach_results_path,
            "fdict_exhaustive_%s.pickle" % pattern_file_name)
        pickin = open(picklename, 'rb')
        fdict_exhaustive = pickle.load(pickin)
    experiments.globals.output_path = output_path
    if pattern_file_name.startswith("dblp"):
        experiments.globals.experiment_name = "dblp"
    else:
        experiments.globals.experiment_name = "yeast"

    if (flag_version == 'my'):
        my_version_report(fdict_exhaustive, data_graph, pattern,
                          monitoring_marks, output_path, detailed_result_path,
                          monitoring_reports, exhaustive_approach_results_path,
                          Plist, 1, pattern_file_name,
                          write)  #print monitoring_reports
    if (flag_version == 'martin'):
        martin_version_report(fdict_exhaustive, data_graph, pattern,
                              monitoring_marks, output_path,
                              detailed_result_path, monitoring_reports,
                              exhaustive_approach_results_path, Plist, 1,
                              pattern_file_name)  #print monitoring_reports
    row = csv_report.get_row(result, output_path, "furer",
                             result.replace("RESULTS", "PATTERNS"))
    with open(os.path.join(output_path, "furer_row.info"), 'w') as f:
        f.write(str(row))
예제 #50
0
    preferences = build_preference_dict()
    place_preferences = build_place_preferences_table()

    edges = list()
    for k, v in place_preferences.to_dict().items():
        edge = (preferences[k[0]], places[k[1]], v)
        edges.append(edge)

    g = nx.Graph()
    g.add_weighted_edges_from(edges)

    return g


try:
    g = nx.read_gml("communities/community_graph.gml")
    print("Reading graph...")
except:
    print("Building graph...")
    g = build_community_graph()
    nx.write_gml(g, "communities/community_graph.gml")

print("Graph ready")

#first compute the best partition
partition = community.best_partition(g)

#drawing

plt.figure(figsize=(50, 50))
plt.title("eTurismo", fontsize=48)
예제 #51
0
    # Initialize parameters
    E = np.random.normal(size=(N, dim))

    nb_list = find_neighbors(g)

    #dist = find_distances(g)
    dist = []

    for iter in range(num_of_iters):
        if iter % 50 == 0:
            draw_points(E, "Karate", g, base=True)
        for node in range(N):

            node_grad_E = grad(g, E, nb_list, node, dist)

            E[node, :] += eta * node_grad_E

        score = compute_score(g, E, nb_list, dist)
        print("Iter: {} Score {}".format(iter, score))

    return E


edges = example5
#g = nx.Graph()
#g.add_edges_from(edges)
g = nx.read_gml("../datasets/karate.gml")

E = run(g, dim=2, num_of_iters=1000, eta=0.001)
#np.save("./numpy_files/citeseer_gaussian_v5", E)
draw_points(E, "Karate", g)
예제 #52
0
def main():
    #Takes a single GFF input, generates a graph and merges with a pre-existing graph
    args = get_options()

    # create directory if it isn't present already
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    args.input_dir = os.path.join(args.input_dir, "")
    args.output_dir = os.path.join(args.output_dir, "")

    # Create temporary directory
    temp_dir = os.path.join(tempfile.mkdtemp(dir=args.output_dir), "")

    directories = [args.input_dir, temp_dir]

    gff_file = [args.input_gff]

    filename = os.path.basename(args.input_gff).split(".")[0]

    if not args.quiet: print("Processing input")
    process_prokka_input(gff_list=gff_file,
                         output_dir=temp_dir,
                         quiet=args.quiet,
                         n_cpu=args.n_cpu)

    cd_hit_out = temp_dir + "combined_protein_cdhit_out.txt"

    run_cdhit(input_file=temp_dir + "combined_protein_CDS.fasta",
              output_file=cd_hit_out,
              id=args.id,
              quiet=args.quiet,
              n_cpu=args.n_cpu)

    if not args.quiet: print("Generating network")
    single_gml, centroid_contexts_single, seqid_to_centroid_single = generate_network(
        cluster_file=cd_hit_out + ".clstr",
        data_file=temp_dir + "gene_data.csv",
        prot_seq_file=temp_dir + "combined_protein_CDS.fasta",
        all_dna=args.all_seq_in_graph)

    if not args.quiet: print("Reformatting network")
    reformat_network(single_gml=single_gml,
                     output_dir=temp_dir,
                     isolateName=filename)

    merge_graphs(directories=directories,
                 temp_dir=temp_dir,
                 len_dif_percent=args.len_dif_percent,
                 pid=args.id,
                 family_threshold=args.family_threshold,
                 length_outlier_support_proportion=args.
                 length_outlier_support_proportion,
                 merge_paralogs=args.merge_paralogs,
                 output_dir=args.output_dir,
                 min_edge_support_sv=args.min_edge_support_sv,
                 aln=args.aln,
                 alr=args.alr,
                 core=args.core,
                 merge_single=True,
                 depths=[1],
                 n_cpu=args.n_cpu,
                 quiet=args.quiet)

    G = nx.read_gml(args.output_dir + "final_graph.gml")

    for index, name in enumerate(
            G.graph['isolateNames']
    ):  #Corrects isolate name for single gff being returned as list
        if name == 'x':
            G.graph['isolateNames'][index] = filename

    nx.write_gml(G, args.output_dir + "final_graph.gml")

    #remove temporary directory if dirty = True
    if not args.dirty:
        shutil.rmtree(temp_dir)

    sys.exit(0)
예제 #53
0
def master(struct_save_name="ProteinDict_ten_thousand",
           edge_type="ligands",
           edge_comm_num=3,
           property="processes",
           graph_filename="Protein-Protein_Graph_Default_Name",
           load_graph=False,
           print_dict_props=False,
           bipart_graph=False,
           bipartite_filename="Bipartite_Default_Name",
           show_plots=False,
           avg_clust=False,
           print_graph_props=False,
           degree_dist=False,
           k_clique=False,
           mod_max=False,
           fluid=False,
           louv=False,
           k_property=20,
           num_k_cliques=7,
           num_fluid_comms=100,
           std_val=-0.5,
           k_clique_opt=False,
           start_k_clique_opt=3,
           end_k_clique_opt=10,
           num_trials_k=3,
           opt_fluid=False,
           start_fluid_comms=100,
           end_fluid_comms=300,
           fluid_step_size=20,
           fluid_num_trials=3):
    Structure_Dict = {}
    Structure_Dict = hf.readDict(struct_save_name, Structure_Dict)

    #Here we print out some helpful information about the dataset we are using
    if print_dict_props == True:
        avg_ligands = hf.get_mean_property(Structure_Dict, "ligands")
        print("Average Number of Ligands:", avg_ligands)

        avg_subunits = hf.get_mean_property(Structure_Dict, "subunits")
        print("Average Number of Subunits:", avg_subunits)

        avg_functions = hf.get_mean_property(Structure_Dict, "functions")
        print("Average Number of Functions:", avg_functions)

        avg_processes = hf.get_mean_property(Structure_Dict, "processes")
        print("Average Number of Processes:", avg_processes)

        # Get Total Number of Ligands, Functions, Proccesses and Subunits
        num_ligands = len(hf.get_all_property(Structure_Dict, "ligands"))
        print("Number of Ligands:", num_ligands)

        num_subunits = len(hf.get_all_property(Structure_Dict, "subunits"))
        print("Number of Subunits:", num_subunits)

        num_functions = len(hf.get_all_property(Structure_Dict, "functions"))
        print("Number of Functions:", num_functions)

        num_processes = len(hf.get_all_property(Structure_Dict, "processes"))
        print("Number of Processes:", num_processes)

    #Here we create a bipartite graph of ligands and proteins, which can be analyzed on its own, or used to
    #to create a projected graph.
    if bipart_graph == True:
        Protein_Bipartite_Graph = nx.Graph()
        struct_name_set = set()
        # Create a bipartite graph in which there are structure nodes and ligand ndoes
        for (struct_name, struct) in Structure_Dict.items():
            struct_name_set.add(struct_name)
            hf.create_Edge(struct, Protein_Bipartite_Graph, property)

        print('Bipartite Nodes:', len(Protein_Bipartite_Graph.nodes()))
        print('Bipartite Edges:', len(Protein_Bipartite_Graph.edges()))
        nx.write_gml(Protein_Bipartite_Graph, bipartite_filename)

    #Here we create a new projected graph
    if load_graph == False:
        # Create a projected graph from the bipartite
        Protein_Graph = hf.create_projected_graph(Structure_Dict,
                                                  edge_comm_num, edge_type)
        # Get the Giant Component of graph
        Protein_Graph_GC = Protein_Graph.subgraph(
            sorted(nx.connected_components(Protein_Graph),
                   key=len,
                   reverse=True)[0])
        nx.write_gml(Protein_Graph, graph_filename)

    #If the garph has already been created, load in the graph to save time
    if load_graph == True:
        Protein_Graph = nx.read_gml(graph_filename)
        Protein_Graph_GC = Protein_Graph.subgraph(
            sorted(nx.connected_components(Protein_Graph),
                   key=len,
                   reverse=True)[0])

    #Print out some useful informatoion about the graph
    if print_graph_props == True:
        print('Protein_Graph Nodes:', len(Protein_Graph.nodes()))
        print('Protein_Graph Edges:', len(Protein_Graph.edges()))
        print('Protein_Graph Num connected Components:',
              nx.number_connected_components(Protein_Graph))
        print('Protein_Graph Num edges in largest Components:',
              len(Protein_Graph_GC.edges()))
        print('Protein_Graph Num nodes in largest Components:',
              len(Protein_Graph_GC.nodes()))

    # K-Clique Implementation
    if k_clique == True:
        print('Begin K_Clique')
        #Create a copy of the graph, which will be used when we lable nodes by community
        k_clique_graph = Protein_Graph_GC.copy()

        #You can use a predetermined k, or optimize the k for the graph
        if k_clique_opt == False:
            k_clique_comms_pre_del = nx.algorithms.community.k_clique_communities(
                Protein_Graph_GC, num_k_cliques)
            k_clique_comms_pre_del = list(list(k_clique_comms_pre_del))
        else:
            k_clique_comms_pre_del = hf.opt_k_clique(Protein_Graph_GC,
                                                     start_k_clique_opt,
                                                     end_k_clique_opt,
                                                     num_trials_k)

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in k_clique_comms_pre_del
                                ]) / len(k_clique_comms_pre_del)

        #Get the graph similiarty score
        K_clique_score_pre_del = hf.score_graph(k_clique_comms_pre_del,
                                                Structure_Dict,
                                                k_property,
                                                property,
                                                already_list=True)
        print(K_clique_score_pre_del, len(k_clique_comms_pre_del),
              avg_comm_pre_del, hf.num_nodes(k_clique_comms_pre_del))

        #Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(k_clique_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "K_Clique_" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        #Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        k_clique_comms = hf.delete_comms(Protein_Graph_GC,
                                         k_clique_comms_pre_del, std_val)

        #Get the graph similiarty score after deletion
        k_clique_score = hf.score_graph(k_clique_comms,
                                        Structure_Dict,
                                        k_property,
                                        property,
                                        already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm)
                        for comm in k_clique_comms]) / len(k_clique_comms)
        print(k_clique_score, len(k_clique_comms), avg_comm,
              hf.num_nodes(k_clique_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(k_clique_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "K_Clique_" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(k_clique_graph,
                               hf.list_to_dict(k_clique_comms_pre_del),
                               "Community")

        #Save the graph with nodes labled by community
        nx.write_gml(
            k_clique_graph, "K_Clique_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End K_Clique')

    # Modularity Maximization Implementation
    if mod_max == True:
        print('Begin Modularity Maximization')
        # Create a copy of the graph, which will be used when we lable nodes by community
        mod_graph = Protein_Graph_GC.copy()

        #Find communities using modularity maximization
        mod_max_comms_pre_del = nx.algorithms.community.modularity_max.greedy_modularity_communities(
            Protein_Graph_GC)
        mod_max_comms_pre_del = list(list(mod_max_comms_pre_del))

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in mod_max_comms_pre_del
                                ]) / len(mod_max_comms_pre_del)

        # Get the graph similiarty score
        mod_max_score_pre_del = hf.score_graph(mod_max_comms_pre_del,
                                               Structure_Dict,
                                               k_property,
                                               property,
                                               already_list=True)
        print(mod_max_score_pre_del, len(mod_max_comms_pre_del),
              avg_comm_pre_del, hf.num_nodes(mod_max_comms_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(mod_max_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Mod_Max" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        mod_max_comms = hf.delete_comms(Protein_Graph_GC,
                                        mod_max_comms_pre_del, std_val)

        # Get the graph similiarty score after deletion
        mod_max_score = hf.score_graph(mod_max_comms,
                                       Structure_Dict,
                                       k_property,
                                       property,
                                       already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm)
                        for comm in mod_max_comms]) / len(mod_max_comms)
        print(mod_max_score, len(mod_max_comms), avg_comm,
              hf.num_nodes(mod_max_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(mod_max_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Mod_Max" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(mod_graph,
                               hf.list_to_dict(mod_max_comms_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            mod_graph, "Mod_Max_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Modularity Maximization')

    # Fluid Implementation
    if fluid == True:
        print('Begin Fluid')
        # Create a copy of the graph, which will be used when we lable nodes by community
        fluid_graph = Protein_Graph_GC.copy()

        # You can use a predetermined number of communities, or optimize the number of communieis for the graph
        if opt_fluid == False:
            fluid_comms_pre_del = nx.algorithms.community.asyn_fluid.asyn_fluidc(
                Protein_Graph_GC, num_fluid_comms)
            fluid_comms_pre_del = list(list(fluid_comms_pre_del))
        else:
            fluid_comms_pre_del = hf.opt_fluid(Protein_Graph_GC,
                                               start_fluid_comms,
                                               end_fluid_comms,
                                               fluid_step_size,
                                               fluid_num_trials)

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in fluid_comms_pre_del
                                ]) / len(fluid_comms_pre_del)

        # Get the graph similiarty score
        fluid_score_pre_del = hf.score_graph(fluid_comms_pre_del,
                                             Structure_Dict,
                                             k_property,
                                             property,
                                             already_list=True)
        print(fluid_score_pre_del, len(fluid_comms_pre_del), avg_comm_pre_del,
              hf.num_nodes(fluid_comms_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(fluid_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Fluid" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in
        # the graph
        fluid_comms = hf.delete_comms(Protein_Graph_GC, fluid_comms_pre_del,
                                      std_val)

        # Get the graph similiarty score after deletion
        fluid_score = hf.score_graph(fluid_comms,
                                     Structure_Dict,
                                     k_property,
                                     property,
                                     already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm) for comm in fluid_comms]) / len(fluid_comms)
        print(fluid_score, len(fluid_comms), avg_comm,
              hf.num_nodes(fluid_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(fluid_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Fluid" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(fluid_graph,
                               hf.list_to_dict(fluid_comms_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            fluid_graph, "Fluid_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Fluid')

    # louvian Implmentation
    if louv == True:
        print('Begin Louvain')
        # Create a copy of the graph, which will be used when we lable nodes by community
        louv_graph = Protein_Graph_GC.copy()

        #Create communities using the louvian
        opt_louv = hf.optimize_louv(Protein_Graph_GC, Structure_Dict, 100, 1,
                                    property, k_property)
        louv_comm_pre_del = hf.Get_Community(opt_louv[0])

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in louv_comm_pre_del
                                ]) / len(louv_comm_pre_del)

        # Get the graph similiarty score
        louv_score_pre_del = hf.score_graph(louv_comm_pre_del,
                                            Structure_Dict,
                                            k_property,
                                            property,
                                            already_list=True)
        print(louv_score_pre_del, len(louv_comm_pre_del), avg_comm_pre_del,
              hf.num_nodes(louv_comm_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(louv_comm_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Louv" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        louv_comms = hf.delete_comms(Protein_Graph_GC, louv_comm_pre_del,
                                     std_val)

        # Get the graph similiarty score after deletion
        louv_score = hf.score_graph(louv_comms,
                                    Structure_Dict,
                                    k_property,
                                    property,
                                    already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm) for comm in louv_comms]) / len(louv_comms)
        print(louv_score, len(louv_comms), avg_comm, hf.num_nodes(louv_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(louv_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Louv" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(louv_graph, hf.list_to_dict(louv_comm_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            louv_graph, "Louv_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Louvain')

    # Create Degree Distribution Plot and print out the expexted degree of the node
    if degree_dist == True:
        x, y, expected_degree = hf.degree_dist(Protein_Graph_GC)
        print("Expected Degree:", expected_degree)
        plt.figure()
        plt.loglog(x, y, 'bo')
        plt.title("Degree distribution")
        plt.xlabel("log(degree values)")
        plt.ylabel("log(degree frequencies)")
        plt.savefig('degree_dist_' + edge_type + '.png')
        plt.show()

    #Find the average clustering coefficient of the graph
    if avg_clust == True:
        average_clustering = nx.average_clustering(Protein_Graph_GC)
        print("Average Clustering Coefficient:", average_clustering)
def main (graph_name_GC):


   
   
    H1 = nx.read_gml(graph_name_GC)   # just GC, but with Role info
    H1 = nx.connected_component_subgraphs(H1)[0] 


    print len(H1.nodes())

    

    list_R6_labels=[]
    dicc_label_node={}
    list_network_ids=[]
    for node in H1.nodes():
              
        list_network_ids.append(int(H1.node[node]['label']))# this actually corresponds to the id from the users table in the DB
        dicc_label_node[int(H1.node[node]['label'])]=node
      
        if (H1.node[node]['role'] =="R6"):
            list_R6_labels.append(int(H1.node[node]['label']))# this actually corresponds to the id from the users table in the DB


    #print "# R6s:",len(list_R6_labels)
    
    print len(dicc_label_node)

    database = "calorie_king_social_networking_2010"  
    server="tarraco.chem-eng.northwestern.edu"
    user="******" 
    passwd="n1ckuDB!"

    db= Connection(server, database, user, passwd) 


    query1="""select * from users"""    
    result1 = db.query(query1)  # is a list of dict.

   

    name1="GINI_coef_friendships_strenght_friendship_with_R6s.csv"           
    file=open(name1, 'wt')   
    print >> file,'label','ck_id','gini_friendships','gini_to_friends','gini_from_friends','sum_strength_with_R6s','sum_strength_to_R6s','sum_strength_from_R6s','tot_mess','tot_sent','tot_received', 'tot_public_mess','blog_posts','home_page','forum_posts','lesson_com','tot_act'



    list_blog_posts=[]
    list_home_page=[]
    list_forum_posts=[]
    list_lesson_com=[]
    list_tot_public_mess=[]


    dicc_ck_label={}
    for r1 in result1:   #first i build a dicc ck_id vs. label        
      ck_id=r1['ck_id']      
      label=int(r1['id'])  # this corresponds to the 'label' in the gml files
      dicc_ck_label[ck_id]=label

      try:
          node=dicc_label_node[label]
          H1.node[node]['ck_id']=ck_id        
      except KeyError: pass

   



    num_users=0.
    for r1 in result1:   #loop over users 
      num_users+=1.
     
      print int(num_users)
      ck_id=r1['ck_id']
      label=int(r1['id'])  # this corresponds to the 'label' in the gml files
      try:
          node=dicc_label_node[label]
      except KeyError: pass

      query2="select  * from friends where (src ='"+str(ck_id)+"')or (dest ='"+str(ck_id)+"') "
      result2= db.query(query2)          
      degree=len(result2)           




      num_messg_friends=0.
      num_messg_to_friends=0.
      num_messg_from_friends=0.
      flag_sent=0
      flag_received=0
      
      list_weighted_to_friends=[] # one value per FRIEND of a given user
      list_weighted_from_friends=[]
      list_weighted_tot_messg_friends=[]
      
      list_weighted_to_friends_norm=[]   # one value per FRIEND of a given user, normalized by the tot number of messages that user sent
      list_weighted_from_friends_norm=[]
      list_weighted_tot_messg_friends_norm=[]
      
      list_weighted_to_friends_R6s=[]  
      list_weighted_from_friends_R6s=[]
      list_weighted_tot_messg_friends_R6s=[]
  
      list_weighted_to_friends_R6s_norm=[]  
      list_weighted_from_friends_R6s_norm=[]
      list_weighted_tot_messg_friends_R6s_norm=[]     


     # query3="select  * from private_messages where (src_id ='"+str(ck_id)+"') "  
      #result3= db.query(query3)
      #tot_sent=float(len(result3))
     
 

      #query4="select  * from private_messages where  (dest_id ='"+str(ck_id)+"') "   
      #result4= db.query(query4)
      #tot_received=float(len(result4))

    


      query5="select  * from private_messages where (src_id ='"+str(ck_id)+"')or (dest_id ='"+str(ck_id)+"') "   # all messages
      result5= db.query(query5)
      num_tot_messg=float(len(result5))


      tot_sent=0
      tot_received=0
      for r5 in result5:
          if r5['src_id']==ck_id:
              tot_sent+=1
          elif r5['dest_id']==ck_id:
              tot_received+=1 



      query6="SELECT * FROM activity_combined where activity_flag != 'WI' and  activity_flag != 'PM' and ck_id='"+str(ck_id)+"'   "     
      result6= db.query(query6)
      tot_public_mess=len(result6)


      query7="SELECT * FROM activity_combined where activity_flag != 'WI' and ck_id='"+str(ck_id)+"'   "     
      result7= db.query(query7)
      tot_activity=len(result7)



      blog_posts=0
      home_page=0
      forum_posts=0
      lesson_com=0
     
      for r6 in result6:
      #    print r6

          if r6['activity_flag']=="BC":
              blog_posts+=1
          elif r6['activity_flag']=="HP":
              home_page+=1
          elif r6['activity_flag']=="FP":
              forum_posts+=1
          elif r6['activity_flag']=="LC":
              lesson_com+=1             
    
      list_blog_posts.append(blog_posts)
      list_home_page.append(home_page)
      list_forum_posts.append(forum_posts)
      list_lesson_com.append(lesson_com)
      list_tot_public_mess.append(tot_public_mess)

     
      #if tot_public_mess>0:
    
    
      print ck_id,"tot public:", tot_public_mess, "blogs:",blog_posts, "home page:",home_page, "forum:",forum_posts, "lessons",lesson_com, "tot private:", num_tot_messg,"tot sent:",tot_sent,"tot_received:",tot_received,"tot act:",tot_activity


      # if num_users <=500:    # JUST TO TEST THE CODE

      if label in list_network_ids:  # if the user is in the network, i check how many messages they send each other
             
          print "\n\nnode label",label,ck_id,"has degree:",H1.degree(node),"from DB",degree

         

   
          for f in H1.neighbors(node):
             
              messg_to_one_friend=0.    #looking at a particular friend
              messg_from_one_friend=0.
              messg_one_friend=0.

      
              from_R6s=0.
              to_R6s=0.
              with_R6s=0.


              flag_R6_friend=0
              flag_to_R6=0
              flag_from_R6=0

             


              for r5 in result5:
                
                  if r5['src_id']== ck_id   and   r5['dest_id']== H1.node[f]['ck_id']:                   
                      num_messg_to_friends+=1.
                      num_messg_friends+=1.
                      flag_sent=1

                      messg_to_one_friend+=1.
                      messg_one_friend+=1.

                      if H1.node[f]['role']=='R6':
                          if H1.node[node]['R6_overlap'] >0:
                              to_R6s+=1.                              
                              with_R6s+=1.  
                              flag_R6_friend=1
                              flag_to_R6=1
                            


   

                  elif r5['dest_id']== ck_id   and   r5['src_id']== H1.node[f]['ck_id']:                 
                      num_messg_from_friends+=1.
                      num_messg_friends+=1.
                      flag_received=1

                      messg_from_one_friend+=1.
                      messg_one_friend+=1.

                      if H1.node[f]['role']=='R6':
                          if H1.node[node]['R6_overlap'] >0:
                              from_R6s+=1.                                   
                              with_R6s+=1.     
                              flag_R6_friend=1                              
                              flag_from_R6=1


           
              list_weighted_to_friends.append(messg_to_one_friend)    # weight of each friendship    (not normalized)          
              list_weighted_from_friends.append(messg_from_one_friend)                                    
              list_weighted_tot_messg_friends.append(messg_one_friend) 



             
              if flag_to_R6!=0:
                  list_weighted_to_friends_R6s.append(to_R6s) 
                 
              if flag_from_R6!=0:
                  list_weighted_from_friends_R6s.append(from_R6s) 
                  
              if flag_R6_friend !=0:
                  list_weighted_tot_messg_friends_R6s.append(with_R6s)
  



          for item in list_weighted_tot_messg_friends:    # normalization
              if sum(list_weighted_tot_messg_friends)>0:
                  list_weighted_tot_messg_friends_norm.append(item/sum(list_weighted_tot_messg_friends))

          for item in list_weighted_to_friends:
              if sum(list_weighted_to_friends)>0:
                  list_weighted_to_friends_norm.append(item/sum(list_weighted_to_friends))

          for item in list_weighted_from_friends:
               if sum(list_weighted_from_friends)>0:
                   list_weighted_from_friends_norm.append(item/sum(list_weighted_from_friends))



          for item in list_weighted_tot_messg_friends_R6s:    # normalization
              if sum(list_weighted_tot_messg_friends)>0:
                  list_weighted_tot_messg_friends_R6s_norm.append(item/sum(list_weighted_tot_messg_friends))

          for item in list_weighted_to_friends_R6s:
              if sum(list_weighted_to_friends)>0:
                  list_weighted_to_friends_R6s_norm.append(item/sum(list_weighted_to_friends))

          for item in list_weighted_from_friends_R6s:
               if sum(list_weighted_from_friends)>0:
                   list_weighted_from_friends_R6s_norm.append(item/sum(list_weighted_from_friends))





     # i calculate how skewed friendships for a given user are:
          if len(list_weighted_to_friends) >0 and sum(list_weighted_to_friends)>0:
              Gini_to_friends=GINI_coef.calculate_GINI(list_weighted_to_friends)            
          else:
              Gini_to_friends='NA'

          if len(list_weighted_from_friends) >0 and sum(list_weighted_from_friends)>0:
              Gini_from_friends=GINI_coef.calculate_GINI(list_weighted_from_friends)  
          else:                   
              Gini_from_friends='NA'

          if len(list_weighted_tot_messg_friends) >0 and sum(list_weighted_tot_messg_friends)>0:
              Gini_friends=GINI_coef.calculate_GINI(list_weighted_tot_messg_friends)
          else:
              Gini_friends='NA'
        

#'label','ck_id','gini_friendships','gini_to_friends','gini_from_friends','sum_strength_with_R6s','sum_strength_to_R6s','sum_strength_from_R6s','tot_mess','tot_sent','tot_received', 'tot_public_mess','blog_posts','home_page','forum_posts','lesson_com','tot_act'


          print >> file,label,ck_id,Gini_friends,Gini_to_friends,Gini_from_friends,sum(list_weighted_tot_messg_friends_R6s_norm),sum(list_weighted_to_friends_R6s_norm),sum(list_weighted_from_friends_R6s_norm),num_tot_messg,tot_sent,tot_received, tot_public_mess,blog_posts,home_page,forum_posts,lesson_com,tot_activity



            
      else :  #if not networked node (or not GC)
          print >> file,label,ck_id,'NA','NA','NA','NA','NA','NA',num_tot_messg,tot_sent,tot_received, tot_public_mess,blog_posts,home_page,forum_posts,lesson_com,tot_activity
예제 #55
0
import numpy as np
import pandas as pd
import math
import csv
from gurobipy import *
#define a powerset function
from itertools import chain, combinations


def powerset(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))


#initialize graph from file
Grid = nx.read_gml("Bus30WithData.gml")
Grid = nx.convert_node_labels_to_integers(Grid)
#declare needed constants
SteadyStatePower = 255  #in MW--the PU Basis
PlanningHorizon = 7  #this is measured in shifts
ShiftLength = 12  #in Hours
#Define sets to be used in optimiation
Nodes = list(range(0, len(Grid.nodes)))
Time = list(range(0, PlanningHorizon))
RoadGrid = nx.Graph()
RoadGrid.add_nodes_from(Grid.nodes)
for i in Nodes:
    for j in Nodes:
        if Grid.has_edge(i, j, 1):
            RoadGrid.add_edge(i, j, weight=Grid[i][j][1]['length'])
        else:
예제 #56
0
파일: code7.py 프로젝트: gaoyuanning/TOPTW
def toptw(startDestList, myGraph, bigPointDir):

    # 用来记录加入到路径中的点
    existList = []

    routeMaxDuration = myGraph.graph['RouteMaxDuration']
    threeDayPath = []
    threeDayProfitList = [0, 0, 0]
    threeDayBestRatioList = [-1, -1, -1]
    for i, val in enumerate(startDestList):
        s = val['s']
        t = val['t']
        existList.append(s)
        existList.append(t)
        tP = [{'ID': s}, {'ID': t}]
        duration = myGraph.edges[s, t]['duration']
        startSlack = 0
        destSlack = routeMaxDuration - duration
        tP[0]['slack'] = startSlack
        tP[0]['aTime'] = 0
        tP[0]['dTime'] = 0
        tP[0]['waitTime'] = 0
        tP[1]['slack'] = destSlack
        tP[1]['aTime'] = duration
        tP[1]['dTime'] = duration
        # 终点的等待时间,如何确定呢
        tP[1]['waitTime'] = myGraph.nodes[t]['TimeWindows'][
            i + 1]['opentime'] - duration
        if tP[1]['waitTime'] < 0:
            tP[1]['waitTime'] = 40
        threeDayPath.append(tP)

    finish = False
    while finish == False:
        finish = True
        # top-k最优路径
        # k = 5
        # bestKPath = []
        # bestKRatio = []
        # bestKProfit = []
        # bestKNodeId = []
        # 寻找一个合适点进行插入
        for data in myGraph.nodes.data():
            node = data[1]
            if node['ID'] in existList:
                continue

            threeDayRatioList = [-1, -1, -1]
            threeDayTmpPathList = [[] for i in range(3)]
            threeDayTmpProfitList = [x for x in threeDayProfitList]

            # 遍历三条路径,选择一条最适合的插入这个点
            for day, travelPath in enumerate(threeDayPath):
                day = day + 1
                # 找一个最匹配的slack插入到它前边
                bestMatch = -1
                tmpWaitTime = -1
                gapTime = 10000000
                for index, _ in enumerate(travelPath[1:]):
                    # index值是阶段后的列表中的索引值
                    index = index + 1
                    preComponent = travelPath[index - 1]
                    preToCandiNodeDuration = myGraph.edges[
                        preComponent['ID'], node['ID']]['duration']
                    arriveTimeToCandiNode = preComponent[
                        'dTime'] + preToCandiNodeDuration
                    if (arriveTimeToCandiNode > routeMaxDuration
                            or max(arriveTimeToCandiNode,
                                   node['TimeWindows'][day]['opentime']) +
                            node['ServiceTime'] > routeMaxDuration
                            or node['TimeWindows'][day]['closetime'] -
                            arriveTimeToCandiNode < node['ServiceTime']):
                        break

                    deparTimeOnCandiNode = max(
                        arriveTimeToCandiNode, node['TimeWindows'][day]
                        ['opentime']) + node['ServiceTime']
                    curComponent = travelPath[index]
                    candiNodeToCurNodeDuration = myGraph.edges[
                        node['ID'], curComponent['ID']]['duration']
                    arriveTimeToCurNode = deparTimeOnCandiNode + candiNodeToCurNodeDuration
                    # 由于插入这个点增加的时间
                    # t1为加入的点带来的时间duraion,t2为原先的pre到cur的duration
                    t1 = arriveTimeToCurNode - preComponent['dTime']
                    t2 = curComponent['aTime'] - preComponent['dTime']
                    deltaTime = t1 - t2
                    # 在slack允许的情况下找等待时间最长的点进行插入
                    curGapTime = curComponent['slack'] - deltaTime
                    if curGapTime < 0:
                        continue
                    if travelPath[index]['waitTime'] > tmpWaitTime:
                        bestMatch = index
                        tmpWaitTime = travelPath[index]['waitTime']

                    # # 更新当前的间隔值
                    # if curGapTime < gapTime:
                    #     gapTime = curGapTime
                    #     bestMatch = index

                # 没有合适的插入点
                if bestMatch == -1:
                    continue

                # 计算插入后的总收益与slack的比值
                tmpTotalProfit = threeDayProfitList[day - 1] + node['Profit']
                tmpTravelPath = [copy.copy(x) for x in travelPath]
                timeParamDict = {
                    'arriveTimeToCandiNode': arriveTimeToCandiNode,
                    'deparTimeOnCandiNode': deparTimeOnCandiNode,
                    'arriveTimeToCurNode': arriveTimeToCurNode,
                    'day': day
                }
                tmpTotalSlack, tmpTravelPath = calcuSlack2(
                    myGraph, tmpTravelPath, node, bestMatch, timeParamDict)
                if float(tmpTotalSlack) == 0:
                    continue
                ratio = float(tmpTotalProfit) / float(tmpTotalSlack)
                threeDayRatioList[day - 1] = ratio
                threeDayTmpPathList[day -
                                    1] = [copy.copy(x) for x in tmpTravelPath]
                threeDayTmpProfitList[day - 1] = tmpTotalProfit

            # 随机选择一天进行插入
            accept = False

            dieTime = 0
            while accept == False:
                randomDay = random.randrange(0, 3)
                if threeDayRatioList[randomDay] > threeDayBestRatioList[
                        randomDay]:
                    threeDayBestRatioList[randomDay] = threeDayRatioList[
                        randomDay]
                    threeDayPath[randomDay] = [
                        copy.copy(x) for x in threeDayTmpPathList[randomDay]
                    ]
                    threeDayProfitList[randomDay] = threeDayTmpProfitList[
                        randomDay]
                    accept = True
                dieTime = dieTime + 1
                if dieTime > 15:
                    break

            # 取threeDayRatioList中最大值的那条路就是要插入的路径
            # accept = False
            # for i in range(3):
            #     if accept == False:
            #         bestRatio = max(threeDayRatioList)
            #         whichDay = threeDayRatioList.index(bestRatio)
            #         if bestRatio > threeDayBestRatioList[whichDay]:
            #             threeDayBestRatioList[whichDay] = bestRatio
            #             threeDayPath[whichDay] = [copy.copy(x) for x in threeDayTmpPathList[whichDay]]
            #             threeDayProfitList[whichDay] = threeDayTmpProfitList[whichDay]
            #             accept = True
            #         threeDayRatioList[whichDay] = -5

            if accept == True:
                existList.append(node['ID'])
                finish = False

    shake(myGraph, threeDayPath, threeDayBestRatioList, threeDayProfitList,
          existList)
    profitSum = sum(threeDayProfitList)
    bigPointPaths = os.listdir(bigPointDir)
    for day, path in enumerate(threeDayPath):
        print(path)
        for component in path:
            if str(component['ID']) in bigPointPaths:
                smallG = nx.read_gml(bigPointDir + '\\' + str(component['ID']))
                smallG = nx.convert_node_labels_to_integers(smallG)
                tmpP = BigPoint.dfsTraverse(
                    smallG, component['aTime'] + component['waitTime'],
                    component['dTime'], day + 1)
                profitSum = profitSum + tmpP
                profitSum = profitSum - myGraph.nodes[
                    component['ID']]['Profit']
        print()
    print('total profit:', profitSum)

    return
def get_row(general_path, pattern_result, experiment_name, pattern_path):
    row = {}
    pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml'))
    parent_id = get_parent_id(os.path.join(pattern_path))
    nr_randvar_values = man.count_nr_randvars_in_graph(pattern)
    cycles = man.is_there_cycle_in_graph(pattern)
    max_degree = man.get_maximum_node_degree(pattern)
    average_degree = man.get_average_node_degree(pattern)
    n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern)
    #get nr embeddings of exhaustive
    nr_emb = None
    sel_emb = None
    has_obd = True
    emb_stds = []

    if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')):
        has_obd = False

    if os.path.exists(
            os.path.join(os.path.dirname(pattern_result), "selected.info")):
        sel_emb = extract_nr_embeddings_NS(
            os.path.join(os.path.dirname(pattern_result), "selected.info"))
    print "General path: ", general_path
    print os.path.join(
        general_path, 'exhaustive_approach', 'results_' +
        general_path.split('/')[-1] + '.res'), "exists?", os.path.exists(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + general_path.split('/')[-1] + '.res'))
    pattern_name = None
    print general_path.split('/')
    if general_path.split('/')[-1] == "":
        pattern_name = general_path.split('/')[-2]
    else:
        pattern_name = general_path.split('/')[-1]
    print pattern_name
    if os.path.exists(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + pattern_name + '.res')):
        nr_emb, time, nr_obs = extract_nr_embeddings(
            os.path.join(general_path, 'exhaustive_approach',
                         'results_' + pattern_name + '.res'))
    #get the results
    if os.path.exists(os.path.join(pattern_result, 'monitoring')):
        embeddings, emb_stds, klds = get_stat(
            os.path.join(pattern_result, 'monitoring'), experiment_name)
    else:
        embeddings = [None] * 120
        klds = [None] * 120
    print "EMBEDDINGS: ", embeddings
    unequal_size_warning = False
    OBD = None
    if os.path.exists(
            os.path.join(general_path, 'results_furer', 'OBDDecomp.info')):
        OBD = getOBDecomp(
            os.path.join(general_path, 'results_furer', 'OBDDecomp.info'))

    nodes, edges = man.get_readable_text_format(pattern)
    print "PATTERN NAME: ", pattern_result

    row['pattern_name'] = pattern_result
    row['parent_id'] = parent_id
    row['nr_randvar_values'] = int(nr_randvar_values)
    row['nodes'] = nodes
    row['edges'] = edges
    row['has_cycles'] = cycles
    row['density'] = float(nx.density(pattern))
    row['shape'] = man.get_graph_shape(pattern)
    row['max_degree'] = float(max_degree)
    row['avg_deg'] = float(average_degree)
    row['nr_targets'] = n_target_nodes

    if sel_emb:
        row['sel_emb'] = float(sel_emb)
    else:
        row['sel_emb'] = sel_emb
    if nr_emb:
        row['exh_emb'] = float(nr_emb)
    else:
        row['exh_emb'] = nr_emb
    row['has_obd'] = has_obd
    #row['unequal_size_warn']=unequal_size_warning
    row['OBD'] = OBD

    print "Nr embeddingS: ", len(embeddings)
    for i in xrange(0, len(embeddings)):
        row["emb_" + str(i + 1)] = embeddings[i]

    for i in xrange(0, len(emb_stds)):
        row["std_" + str(i + 1)] = emb_stds[i]

    for i in xrange(0, len(klds)):
        row["KLD_" + str(i + 1)] = klds[i]

    return row
예제 #58
0
def main(graph_name):
 



   G = nx.read_gml(graph_name)



   cutting_day=125  # i use this only for the filenames




   for_testing_fixed_set="YES"   # when YES, fixed values param, to get all statistics on final distances etc
# change the range for the parameters accordingly

   envelopes="YES"

   Niter=1000   # 100 iter seems to be enough (no big diff. with respect to 1000it)

   percent_envelope=95.
   
   list_id_weekends_T3=look_for_T3_weekends(G)  # T3 doesnt share fellows in the weekend  (but they are the exception)
   Nbins=200   # for the histogram of sum of distances


   all_team="NO"   # as adopters or not

   dir_real_data='../Results/'
   dir="../Results/weight_shifts/infection/" 

   delta_end=3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)


   if for_testing_fixed_set=="NO":
      output_file3="../Results/weight_shifts/Landscape_parameters_infection_"+str(Niter)+"iter_A_F_inferred.dat" 
      file3 = open(output_file3,'wt')        
      
      file3.close()



######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################



   if all_team=="YES":    
      print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
      exit()

   else:
      filename_actual_evol="../Results/Actual_evolution_adopters_from_inference.dat"
  


   file1=open(filename_actual_evol,'r')         ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
   list_lines_file=file1.readlines()
            

   list_actual_evol=[]  
   for line in list_lines_file:      # [1:]:   # i exclude the first row   
     
      num_adopters= float(line.split("\t")[1])          
      list_actual_evol.append(num_adopters)



##################################################################




#../Results/weight_shifts/infection/Average_time_evolution_Infection_training_p0.8_Immune0.3_1000iter_2012_avg_ic_day125.dat ESTOS VALORES SON EL OPTIMUM FIT THE 152-DIAS
   prob_min=0.4
   prob_max=0.401
   delta_prob=0.1
   
   

   prob_Immune_min=0.50
   prob_Immune_max=0.51
   delta_prob_Immune=0.1
   




     

   dict_filenames_tot_distance={}   # i will save the filename as key and the tot distance from that curve to the original one

   prob_Immune=prob_Immune_min
   while prob_Immune<= prob_Immune_max:
        
      print "prom Immune:",prob_Immune        

      prob_infection=prob_min
      while prob_infection<= prob_max:
                 
        print "  p:",prob_infection        


        if for_testing_fixed_set=="YES":
           output_file2=dir+"Average_time_evolution_Infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_A_F_inferred.dat"

        else:
           output_file2=dir+"Average_time_evolution_Infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_A_F_inferred.dat"


        file2 = open(output_file2,'wt')                                       
        file2.close()
        



      #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
        list_lists_t_evolutions=[]    

        list_dist_fixed_parameters=[]
        list_dist_fixed_parameters_testing_segment=[]
        list_abs_dist_at_ending_point_fixed_parameters=[]
        list_dist_at_ending_point_fixed_parameters=[]
        list_final_num_infected=[]
        list_abs_dist_point_by_point_indiv_simus_to_actual=[]
        list_dist_point_by_point_indiv_simus_to_actual=[]

     #   list_abs_dist_at_cutting_day=[]

        for iter in range(Niter):
            
            print "     iter:",iter


            #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
        #    file_name_indiv_evol=output_file2.strip("Average_").split('.dat')[0]+"_indiv_iter"+str(iter)+".dat"
           
         #   file4 = open(file_name_indiv_evol,'wt')       
          #  file4.close()
              ##########################################




            ########### set I.C.

            list_I=[]  #list infected doctors                
            max_order=0
            for n in G.nodes():
                G.node[n]["status"]="S"  # all nodes are Susceptible
                if G.node[n]['type']=="shift":                      
                    if  G.node[n]['order']>max_order:
                        max_order=G.node[n]['order']   # to get the last shift-order for the time loop
                else:
                    if G.node[n]['label']=="Wunderink"  or G.node[n]["label"]=="Weiss":           
                        G.node[n]["status"]="I"                       
                        list_I.append(G.node[n]['label'])
          



            
           
            list_single_t_evolution=[]
            list_single_t_evolution.append(2.0)  # I always start with TWO infected doctors!!


            for n in G.nodes():   # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"):
                    if G.node[n]['label']!="Wunderink"  and G.node[n]["label"]!="Weiss": 
                        rand=random.random()
                        if rand< prob_Immune:
                            G.node[n]["status"]="Immune"



       
  
            ################# the dynamics starts: 
            
            t=1
            while t<= max_order:  # loop over shifts, in order           
                for n in G.nodes():
                    if G.node[n]['type']=="shift" and G.node[n]['order']==t:

                        shift_lenght=int(G.node[n]['shift_lenght'])

                        if shift_lenght==2 and n not in list_id_weekends_T3:
                           shift_lenght=1   # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)
                         #  print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght']

                        flag_possible_infection=0
                        for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift
                            if G.node[doctor]["status"]=="I":
                                flag_possible_infection=1
                                

                        if flag_possible_infection:
                            for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection

                               for i in range(shift_lenght):   # i repeat the infection process several times, to acount for shift lenght
                                  if G.node[doctor]["status"]=="S":
                                     rand=random.random()
                                     if rand<prob_infection:
                                        G.node[doctor]["status"]="I"
                                        
                                       # if G.node[doctor]["type"]=="A":   # fellows participate in the dynamics, but i only consider the attendings as real adopters
                                        list_I.append(G.node[doctor]["label"])
                                        
              #  if for_testing_fixed_set=="YES":
               #    if t==cutting_day:
                #      list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_I))))
                 #     print abs(float(list_actual_evol[-1])-float(len(list_I))), float(list_actual_evol[t]),float(len(list_I))
                     

                list_single_t_evolution.append(float(len(list_I)))

                t+=1
   


                ######## end t loop




            ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
           # file4 = open(file_name_indiv_evol,'at')                
            #for i in range(len(list_single_t_evolution)):  #time step by time step                            
             #  print >> file4, i,list_single_t_evolution[i], prob_infection, prob_Immune 
            #file4.close()
            ########################################################



          

            list_lists_t_evolutions.append(list_single_t_evolution)
             
 
            list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol,list_single_t_evolution))
            list_dist_fixed_parameters_testing_segment.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves_testing_segment( list_actual_evol,list_single_t_evolution, cutting_day))
                 
            list_abs_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1]-list_actual_evol[-1]) )   # i save the distance at the ending point between the current simu and actual evol
            list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1]-list_actual_evol[-1])    # i save the distance at the ending point between the current simu and actual evol
            list_final_num_infected.append(list_single_t_evolution[-1])


            for  index in range(len(list_single_t_evolution)):
               
               list_abs_dist_point_by_point_indiv_simus_to_actual.append(abs(list_single_t_evolution[index]-list_actual_evol[index]))
               list_dist_point_by_point_indiv_simus_to_actual.append(list_single_t_evolution[index]-list_actual_evol[index])


           
        ######## end loop Niter
      



       
       
        list_pair_dist_std_delta_end=[]
        
        list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) )   # average dist between the curves over Niter
        list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) )

        list_pair_dist_std_delta_end.append(numpy.mean(list_abs_dist_at_ending_point_fixed_parameters))


        if for_testing_fixed_set=="NO":   
           file3 = open(output_file3,'at')          # i print out the landscape           
           print >> file3, prob_infection,prob_Immune,numpy.mean(list_abs_dist_at_ending_point_fixed_parameters), numpy.mean(list_dist_fixed_parameters), numpy.mean(list_final_num_infected),numpy.std(list_final_num_infected)
           file3.close()


     
        if (numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)) <= delta_end:  # i only consider situations close enough at the ending point   
          
           dict_filenames_tot_distance[output_file2]=list_pair_dist_std_delta_end
 
         



        file2 = open(output_file2,'at')        
        for s in range(len(list_single_t_evolution)):           
            list_fixed_t=[]
            for iter in range (Niter):
                list_fixed_t.append(list_lists_t_evolutions[iter][s])        
            print >> file2, s,numpy.mean(list_fixed_t)                    
        file2.close()

        print "printed out: ", output_file2
       # raw_input()

        if  envelopes=="YES":
           calculate_envelope_set_curves.calculate_envelope(list_lists_t_evolutions,percent_envelope,"Infection",[prob_infection,prob_Immune])






        if for_testing_fixed_set=="YES":

           num_valid_endings=0.
           for item in list_abs_dist_at_ending_point_fixed_parameters:
              if item <= delta_end:  # i count how many realizations i get close enough at the ending point         
                 num_valid_endings+=1.
     

           print "average distance of the optimum in the testing segment:",numpy.mean(list_dist_fixed_parameters),numpy.std(list_dist_fixed_parameters),list_dist_fixed_parameters,"\n"
           print "fraction of realizations that end within delta_doctor:",num_valid_endings/Niter,"mean ending dist:",numpy.mean(list_dist_at_ending_point_fixed_parameters), "SD final dist",numpy.std(list_dist_at_ending_point_fixed_parameters) ,list_dist_at_ending_point_fixed_parameters,"\n"
        
        

           histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
           histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename)



           histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
          
           histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2)



           histogram_filename3="../Results/weight_shifts/histogr_sum_dist_testing_segment_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
                 
           histograma_bines_gral.histograma_bins_zero(list_dist_fixed_parameters_testing_segment,Nbins,histogram_filename3)



           histogram_filename4="../Results/weight_shifts/histogr_abs_dist_point_by_point_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
           histograma_gral_negv_posit.histograma(list_abs_dist_point_by_point_indiv_simus_to_actual,histogram_filename4)



           histogram_filename5="../Results/weight_shifts/histogr_dist_point_by_point_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
           histograma_gral_negv_posit.histograma(list_dist_point_by_point_indiv_simus_to_actual,histogram_filename5)




           output_file10="../Results/weight_shifts/Summary_results_infection_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"          
           file10 = open(output_file10,'wt')    
           
           print >> file10, "Summary results from best fit infection with",Niter, "iter, and with values for the parameters:  prob_inf ",prob_infection," prob immune: ",prob_Immune,"\n"
           
           print >> file10, "average distance of the optimum in the testing segment:",numpy.mean(list_dist_fixed_parameters),numpy.std(list_dist_fixed_parameters),list_dist_fixed_parameters,"\n"
           print >> file10,  "fraction of realizations that end within delta_doctor:",num_valid_endings/Niter,"mean ending dist:",numpy.mean(list_dist_at_ending_point_fixed_parameters), "SD final dist",numpy.std(list_dist_at_ending_point_fixed_parameters) ,list_dist_at_ending_point_fixed_parameters,"\n"
           
           
           print >> file10,  "written optimum best fit evolution file:",output_file2
           print  >> file10,"written histogram file: ",histogram_filename
           
           file10.close()


        
           print  "written Summary file: ",output_file10
        




        prob_infection+= delta_prob
      prob_Immune+= delta_prob_Immune

   if for_testing_fixed_set=="NO":   # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum
      compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,"Infection_weight",all_team,Niter,None)  # last argument doesnt apply (cutting day)




   if for_testing_fixed_set=="NO":
      print "written landscape file:",output_file3
예제 #59
0
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

#read graph
G = nx.read_gml('./../data/dolphins/dolphins.gml')
#get noralized laplacian
A = nx.adjacency_matrix(G).todense()
deg = A.sum(axis=1)
deg = np.squeeze(np.array(deg))
D_hf_inv = np.diag(deg**-0.5)
D = np.diag(deg)
L = D - A
L_norm = np.matmul(np.matmul(D_hf_inv, L), D_hf_inv)

#compute eigenvecs,eigenvalues
vals, vec = np.linalg.eig(L_norm)

# sort
vec = vec[:, np.argsort(vals)]
vals = vals[np.argsort(vals)]

# use Fiedler value to find best cut to separate data based on sign
community_a = vec[:, 1] > 0

#obtain list of node names
V = list(G.nodes)
cluster_a = []
cluster_b = []
for i, x in enumerate(community_a):
    if x:
예제 #60
0
    def read_graph(self, filename, file_type='edgelist', separator='\t', remove_whitespace=False, header=False, headerrow=None, vtype=np.uint32, itype=np.uint32):
        """
        Reads the graph from an edgelist, gml or graphml file and initializes the class attribute adjacency_matrix.

        Parameters
        ----------
        filename : string
            Name of the file, for example 'JohnsHopkins.edgelist', 'JohnsHopkins.gml', 'JohnsHopkins.graphml'.

        file_type : string
            Type of file. Currently only 'edgelist', 'gml' and 'graphml' are supported.
            Default = 'edgelist'

        separator : string
            used if file_type = 'edgelist'
            Default = '\t'

        remove_whitespace : bool
            set it to be True when there is more than one kinds of separators in the file
            Default = False

        header : bool
            This lets the first line of the file contain a set of heade
            information that should be ignore_index
            Default = False

        headerrow : int
            Use which row as column names. This argument takes precidence over
            the header=True using headerrow = 0
            Default = None

        vtype
            numpy integer type of CSC format index array
            Default = np.uint32

        itype
            numpy integer type of CSC format index pointer array
            Default = np.uint32
        """
        if file_type == 'edgelist':

            #dtype = {0:'int32', 1:'int32', 2:'float64'}
            if header and headerrow is None:
                headerrow = 0

            if remove_whitespace:
                df = pd.read_csv(filename, header=headerrow, delim_whitespace=remove_whitespace)
            else:
                df = pd.read_csv(filename, sep=separator, header=headerrow, delim_whitespace=remove_whitespace)
            cols = [0,1,2]
            if header != None:
                cols = list(df.columns)
            source = df[cols[0]].values
            target = df[cols[1]].values
            if df.shape[1] == 2:
                weights = np.ones(source.shape[0])
            elif df.shape[1] == 3:
                weights = df[cols[2]].values
            else:
                raise Exception('GraphLocal.read_graph: df.shape[1] not in (2, 3)')
            self._num_vertices = max(source.max() + 1, target.max()+1)
            #self.adjacency_matrix = source, target, weights

            self.adjacency_matrix = sp.csr_matrix((weights.astype(np.float64), (source, target)), shape=(self._num_vertices, self._num_vertices))

        elif file_type == 'gml':
            warnings.warn("Loading a gml is not efficient, we suggest using an edgelist format for this API.")
            G = nx.read_gml(filename).to_undirected()
            self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64)
            self._num_vertices = nx.number_of_nodes(G)

        elif file_type == 'graphml':
            warnings.warn("Loading a graphml is not efficient, we suggest using an edgelist format for this API.")
            G = nx.read_graphml(filename).to_undirected()
            self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64)
            self._num_vertices = nx.number_of_nodes(G)

        else:
            print('This file type is not supported')
            return


        self._weighted = False
        for i in self.adjacency_matrix.data:
            if i != 1:
                self._weighted = True
                break
        is_symmetric = (self.adjacency_matrix != self.adjacency_matrix.T).sum() == 0
        if not is_symmetric:
            # Symmetrize matrix, choosing larger weight
            sel = self.adjacency_matrix.T > self.adjacency_matrix
            self.adjacency_matrix = self.adjacency_matrix - self.adjacency_matrix.multiply(sel) + self.adjacency_matrix.T.multiply(sel)
            assert (self.adjacency_matrix != self.adjacency_matrix.T).sum() == 0

        self._num_edges = self.adjacency_matrix.nnz
        self.compute_statistics()
        self.ai = itype(self.adjacency_matrix.indptr)
        self.aj = vtype(self.adjacency_matrix.indices)