Exemple #1
0
def load_train_test_graphs(dataset, recache_input):
    raw_mat_path = 'data/{}.npz'.format(dataset)
    train_graph_path = 'data/{}/train_graph.pkl'.format(dataset)
    test_graph_path = 'data/{}/test_graph.pkl'.format(dataset)

    if recache_input:
        print('loading sparse matrix from {}'.format(raw_mat_path))
        m = load_sparse_csr(raw_mat_path)

        print('splitting train and test...')
        train_m, test_m = split_train_test(
            m,
            weights=[0.9, 0.1])

        print('converting to nx.DiGraph')
        train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign')
        test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign')
                
        print('saving train and test graphs...')
        nx.write_gpickle(train_g, train_graph_path)
        nx.write_gpickle(test_g, test_graph_path)
    else:
        print('loading train and test graphs...')
        train_g = nx.read_gpickle(train_graph_path)
        test_g = nx.read_gpickle(test_graph_path)
    return train_g, test_g
Exemple #2
0
    def __init__(self, post_fcn = None):
        self.parse_args()
        options = self.options

        input = options.input
        # Input graph
        input_path = os.path.join(input, input + '.gpk')
        self.g = nx.read_gpickle(input_path)
        if not self.g:
            raise Exception("null input file for input path %s" % input_path)
        # Output (reduced) graph.
        # Nodes: (lat, long) tuples w/ list of associated users & location strings
        # Edges: weight: number of links in this direction
        self.r = nx.DiGraph()

        conn = Connection()
        self.input_db = conn[self.options.input_db]
        self.input_coll = self.input_db[self.options.input_coll]

        print "now processing"
        self.reduce()
        print geo_stats(self.r)

        if options.write:
            geo_path = os.path.join(input, input + '.grg')
            nx.write_gpickle(self.r, geo_path)
def mainmain():
    redirects = process_redirects(sys.argv[1])
    print "redirects", len(redirects)
    sys.stderr.write(sys.argv[1] + " processed\n")

    links = process_links(sys.argv[2], redirects)
    links_t = len(links)
    print "links", links_t
    sys.stderr.write(sys.argv[2] + " processed\n")

    G = networkx.Graph()

    articles_processed = 0

    for article in links:
        articles_processed = articles_processed + 1
        if (articles_processed % 100000) == 0:
            sys.stdout.write(
                "links processed="
                + str(articles_processed)
                + "/"
                + str(links_t)
                + " (%"
                + str(articles_processed * 100 / links_t)
                + ")\n"
            )
        if len(links[article]) < 6:
            continue
        G.add_node(article)
        for l in links[article]:
            if (l in links) and (article in links[l]):  # back link is also present
                G.add_node(l)
                G.add_edge(article, l)

    networkx.write_gpickle(G, sys.argv[3])
def createBridge(numOfNodes, edgeProb, bridgeNodes):
	'''
	numOfNodes: Number of nodes in the clustered part of the Bridge Graph
	edgeProb: Probability of existance of an edge between any two vertices.
	bridgeNodes: Number of nodes in the bridge
	This function creates a Bridge Graph with 2 main clusters connected by a bridge.
	'''	
	print "Generating and Saving Bridge Network..."	
	G1 = nx.erdos_renyi_graph(2*numOfNodes + bridgeNodes, edgeProb) #Create an ER graph with number of vertices equal to twice the number of vertices in the clusters plus the number of bridge nodes.
	G = nx.Graph() #Create an empty graph so that it can be filled with the required components from G1
	G.add_edges_from(G1.subgraph(range(numOfNodes)).edges()) #Generate an induced subgraph of the nodes, ranging from 0 to numOfNodes, from G1 and add it to G
	G.add_edges_from(G1.subgraph(range(numOfNodes + bridgeNodes,2*numOfNodes + bridgeNodes)).edges()) #Generate an induced subgraph of the nodes, ranging from (numOfNodes + bridgeNodes) to (2*numOfNodes + bridgeNodes)

	A = random.randrange(numOfNodes) #Choose a random vertex from the first component
	B = random.randrange(numOfNodes + bridgeNodes,2*numOfNodes + bridgeNodes) #Choose a random vertex from the second component

	prev = A #creating a connection from A to B via the bridge nodes
	for i in range(numOfNodes, numOfNodes + bridgeNodes):
		G.add_edge(prev, i)
		prev = i
	G.add_edge(i, B)
	
	StrMap = {}
	for node in G.nodes():
		StrMap[node] = str(node)
	G = nx.convert.relabel_nodes(G,StrMap)
	filename = "BG_" + str(numOfNodes) + "_" + str(edgeProb) + "_" + str(bridgeNodes) + ".gpickle"
	nx.write_gpickle(G,filename)#generate a gpickle file of the learnt graph.
	print "Successfully written into " + filename
Exemple #5
0
def activity_in_cells(cells, frames):
    allActivity = []
    #  This dictionary keeps the location of cells and average activity in each
    #  cell.
    global g_
    g_.graph["shape"] = cells.shape
    goodCells = {}
    for cellColor in range(1, int(cells.max())):
        print("+ Computing for cell color %d" % cellColor)
        xs, ys = np.where(cells == cellColor)
        pixals = zip(xs, ys)  # These pixals belong to this cell.
        if len(pixals) < 1:
            continue

        cellActivity = []
        g_.add_node(cellColor)
        g_.node[cellColor]["pixals"] = pixals
        for x, y in pixals:
            cellActivity.append(frames[y, x, :])
        cellVec = np.mean(cellActivity, axis=0)
        g_.node[cellColor]["activity"] = cellVec
        # Attach this activity to graph as well after normalization.
        allActivity.append(cellVec / cellVec.max())

    # Now compute correlation between nodes and add edges
    for n1, n2 in itertools.combinations(g_.nodes(), 2):
        v1, v2 = g_.node[n1]["activity"], g_.node[n2]["activity"]
        g_.add_edge(n1, n2, weight=sync_index(v1, v2, "dilawar"), weight_sigma=sync_index_clip(v1, v2))
    cellGraph = "cells_as_graph.gpickle"
    nx.write_gpickle(g_, cellGraph)
    print("[INFO] Wrote cell graph to pickle file %s" % cellGraph)
    print("\t nodes %d" % g_.number_of_nodes())
    print("\t edges %d" % g_.number_of_edges())
    activity = np.vstack(allActivity)
    return activity
Exemple #6
0
def addNodeDe_EdgeDist():
    """
    Add node degree and edge distance on the filtered Graph
    :return: Graph
    """
    schema = 'total_v3_csvneo4j'
    Graph_type = 'undirected'
    alpha_thred = 0.65
    nodeDegree_thred = 1.0
    DisTypes = ['G','SP','R']

    G = nx.read_gpickle('../filteredG_{}_alpha{}_nodeD{}_{}.gpickle'.format(Graph_type,alpha_thred,nodeDegree_thred,schema))
    print 'after read'
    print 'edges: ', len(G.edges())
    print 'nodes: ', len(G.nodes())
    G = main.addNode_degree(G)
    print 'finish adding node degree'
    G = main.addEdge_distance(G,DisTypes)
    print 'finish adding edge degree'

    nx.write_gpickle(G,'../addNodeEdgeDegree_{}_{}_alpha{}_nodeD{}_{}.gpickle'.format('+'.join(DisTypes),Graph_type,alpha_thred,nodeDegree_thred,schema))
    print 'finishing write gpickle'
    print 'edges: ', len(G.edges())
    print 'nodes: ', len(G.nodes())

    return
def graph_preprocessing_with_counts(G_input=None, save_file=None):

    if not G_input:
        graph_file = os.path.join(work_dir, "adj_graph.p")
        G = nx.read_gpickle(graph_file)
    else:
        G = G_input.copy()

    print "Raw graph size:", G.size()
    print "Raw graph nodes", G.number_of_nodes()

    profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))}

    for edge in G.edges(data=True):
        nodes = edge[:2]
        _weight = edge[2]['weight']
        _count = edge[2]['count']
        
        if _count < 3:
            G.remove_edge(*nodes)

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()

    G.remove_nodes_from(nx.isolates(G))

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()
    
    if save_file:
        print "Saving to", save_file
        nx.write_gpickle(G,save_file)

    return G
Exemple #8
0
    def save_pickle_in_cfile(self, local_fname, networkref):
        """ Creates a pickled version of the graph and stores it in the
        cfile
        
        Parameters
        ----------
        local_fname: string
            The filename used in the Pickle folder to store
        networkref: NetworkX Graph instance
            The NetworkX graph to pickle
        
        """

        logger.info('Write a generated graph pickle to the connectome file.')
        picklefilepath = os.path.join(tempfile.gettempdir(),local_fname)
        from networkx import write_gpickle
        # add nodekeys, edgekeys, graphid to helpernode 'n0' before storage
        helperdict = {'nodekeys': networkref.nodekeys.copy(), \
                      'edgekeys': networkref.edgekeys.copy(), \
                      'graphid' : networkref.networkid }
        networkref.graph.add_node('n0')
        networkref.graph.node['n0'] = helperdict
        write_gpickle(networkref.graph, picklefilepath)
        networkref.graph.remove_node('n0')
        
        from zipfile import ZipFile, ZIP_DEFLATED
        tmpzipfile = ZipFile(self.data.fullpathtofile, 'a', ZIP_DEFLATED)
        # store it in the zip file
        tmpzipfile.write(picklefilepath, 'Pickle/' + local_fname)
        tmpzipfile.close()
        
        # remove pickle file from system
        logger.debug('Unlink: %s' % picklefilepath)
        os.unlink(picklefilepath)
Exemple #9
0
def better_display(G,slim,pic,fit=None,pngpath=None,bw=None):
    import engine.Genetic as Genetic
    _fit = "C" if fit == "C" else "L"
    R = Genetic.genetic(G,_fit,pngpath,bw)
    for node in G:
        G.node[node]["pos"] = R[node]
    nx.write_gpickle(G,pic)
Exemple #10
0
def create_nodes(paths, args):
    """ creates nodes
    Parameters
    ----------
    paths.node_file       : file
    args.fasta_file       : file
    """

    # read in fasta to dictionary.
    seqs = io.load_fasta(args.contig_file)

    # create graph.
    G = nx.MultiGraph()

    # add nodes to graph.
    for name, seq in seqs.items():

        # skip split names.
        tmp = name.split(" ")
        name = tmp[0]

        # add node.
        G.add_node(name, {'seq':seq, 'width':len(seq), 'cov':0})

    # write to disk.
    nx.write_gpickle(G, paths.node_file)
Exemple #11
0
def G_init(slim_dict,slim):
    def check_field(pos,G,rate):
        for node in G:
            node_pos = G.node[node]["pos"]
            if math.sqrt(np.sum((pos-node_pos)**2)) < rate:
                return True
        return False

    if len(slim_dict) < 25:
        rate = 0.42
    elif len(slim_dict) < 42:
        rate = 0.3
    else:
        rate = 0.25

    G = nx.Graph()
    for node in slim_dict:
        pos = np.array((0.0,0.0))
        while check_field(pos,G,rate):
            pos = np.array((2.8*random.random()-1.4, 2.8*random.random()-1.4))
        G.add_node(node,size=len(GOID2group[node]),color=0,pos=pos)
    for node in G:
        G.node[node]["sum"] = 0
        G.node[node]["1"] = 0
        G.node[node]["2"] = 0
    for GOID_1 in slim_dict:
        for GOID_2 in GO2interact[GOID_1]:
            if GOID_2 != GOID_1 and GOID_2 in slim_dict:
                G.add_edge(GOID_1,GOID_2,weight=GO2interact[GOID_1][GOID_2],percent=0.5,percent_1=0.5,percent_2=0.5)
    png = path+"/results/slim_"+slim+"/slim_"+slim+".png"
    G = nx.relabel_nodes(G,slim_dict)
    nx.write_gpickle(G,path+"/results/G_slim_"+slim)
    draw_G(G,png)
Exemple #12
0
def fit_forestFire_mod(graphSize, graphID, dkPath, original2k, resultPath):
    """
    Runs synthetic graph tests for various 'p' values (burn rate).
    """
    outfile = open(resultPath + graphID + '_ff_dkDistances.txt', 'w')
    
    p = 0.01
    while p < 1.0:
        print 'Running modified Forest Fire with parameters: n = ', graphSize, ' p = ', p
        
        newFile = graphID + '_ff_' + str(p)

        # Create synthetic graph
        syntheticGraph = sm.forestFire_mod(graphSize, p)

        # Write pickle, edge list, and 2k distro to file
        print 'Writing pickle and calculating dK-2...\n'
        nx.write_gpickle(syntheticGraph, resultPath + newFile + '.pickle')
        getdk2(syntheticGraph, newFile, dkPath, resultPath)

        # Find distance between the dK-2 distributions
        dkDistance = tk.get_2k_distance(original2k, resultPath + newFile + '_target.2k')
        outfile.write(str(dkDistance) + '\tp = ' + str(p) + '\n')
        outfile.flush()

        p += 0.01

    outfile.close()
Exemple #13
0
def buildGraph(db):

    DATA_DIR = os.environ['OPENSHIFT_DATA_DIR']
    file = os.path.join(DATA_DIR, "UserTagsGraph.gpickle")

    if not os.path.isfile(file):
        user_network = nx.Graph()
        users = DAL.UserTags.getAll(db)
        user_tags = {}
        all_tags = {}

        for user in users:
            tags = user.tags_to_list()
            user_tags[user.id] = tags
            for tag in tags:
                all_tags.setdefault(tag, set()).add(user.id)

        for tag in all_tags:
            user_network.add_edges_from([perm for perm in itertools.permutations(all_tags[tag], 2)])

        #save graph to file
        nx.write_gpickle(user_network, file)
    else:
        user_network = nx.read_gpickle(file)

    return user_network
Exemple #14
0
def computemaxweight(graph,path,protlist,path_lenght,alone):
	elements=[]
	nodes=[]
	ess=[]
	print "------Starting Graph------"
	print nx.info(graph)
	
	for i in path:
		max=0
		for j in path[i]:
			count=0
			for k in range(0,len(j)-1,1):
				count=count+float(graph.edge[j[k]][j[k+1]]["weight"])
			if count>max:
				max=count
				elements=j
		
		ess.extend(elements[1:len(elements)-1])
	ess=list(set(ess))
	H=graph.subgraph(ess+protlist)
	#H.add_nodes_from(protlist)
	graphred=check(H,path_lenght,ess,protlist,path)
	nx.write_gpickle(graphred,"weightmaxfilter.gpickle")
	f1=open("weightproteins.txt","w")
	for i in graphred.nodes():
		if i in alone:
			pass
		else:	
			f1.write(i+"\n")
def main(**kwargs):
    cells = kwargs["cells"]
    frames = kwargs["frames"]
    if isinstance(cells, str):
        cells = np.load(cells)
    if isinstance(frames, str):
        frames = np.load(frames)

    logger.info("Creating correlation graph")
    N = int(cells.max())
    for i in range(1, N):
        logger.info("\tDone %d out of %d" % (i, N))
        indices = list(zip(*np.where(cells == i)))
        if len(indices) < 2:
            continue
        pixals = []
        for y, x in indices:
            pixals.append(frames[x, y, :])
        pixals = np.mean(pixals, axis=0)
        g_.add_node(i, timeseries=pixals, indices=indices)

    g_.graph["shape"] = frames[:, :, 0].shape
    create_correlate_graph(g_)
    outfile = kwargs.get("output", False) or "correlation_graph.pickle"
    logger.info("Writing pickle of graph to %s" % outfile)
    nx.write_gpickle(g_, outfile)
    logger.info("Graph pickle is saved to %s" % outfile)
def handle_by_file(out_dir, tweet_file, country, net_func=comprehend_network):
    try:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        if net_func == comprehend_network:
            net_type = "comprehend"
        elif net_func == user2user_network:
            net_type = "user2user"
        elif net_func == content_based_network:
            net_type = "content"
        elif net_func == entity_network:
            net_type = "entity"
        elif net_func == entity_corr_network:
            net_type = "entity_corr"
        else:
            net_type = "normal"

        net = net_func(tweet_file)
        net.graph["country"] = country
        g_date = re.search(r'\d{4}-\d{2}-\d{2}', tweet_file).group()
        net.graph["date"] = g_date
        out_file = os.path.join(out_dir,
                                "graph_%s_%s" % (net_type,
                                                 tweet_file.split(os.sep)[-1]))
        nx.write_gpickle(net, out_file + ".gpickle")
        nx.write_graphml(net, out_file + ".graphml")
    except Exception, e:
        print "Error Encoutered: %s, \n %s" \
            % (tweet_file, sys.exc_info()[0]), e
Exemple #17
0
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp):
    """
    Simplify the undirected graph and then update the 3 undirected weight properties.
    :param read_g: is the graph pickle to read
    :param write_g: is the updated graph pickle to write
    :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight
    :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight)
    :return: None
    """
    G=nx.read_gpickle(read_g)
    print 'number of original nodes: ', nx.number_of_nodes(G)
    print 'number of original edges: ', nx.number_of_edges(G)

    for (u,v,w) in G.edges(data='weight'):
        if w < minEdgeWeight:
            G.remove_edge(u,v)

    for n in G.nodes():
        if G.degree(n)<minNodeDegree:
            G.remove_node(n)

    print 'number of new nodes: ', nx.number_of_nodes(G)
    print 'number of new edges: ', nx.number_of_edges(G)

    for (a, b, w) in G.edges_iter(data='weight'):
        unweight_allocation(G, a, b, w,Lp,Sp)

    print 'update weight ok'
    nx.write_gpickle(G, write_g)

    return
def load_data():
    start = time.time()
    try:
        print("Loading data from /data pickles and hfd5 adj matrices")
        f = h5py.File('data/cosponsorship_data.hdf5', 'r')
        for chamber in ['house', 'senate']:
            for congress in SUPPORTED_CONGRESSES:
                adj_matrix_lookup[(chamber, congress)] = np.asarray(f[chamber + str(congress)])

                igraph_graph = igraph.load("data/" + chamber + str(congress) + "_igraph.pickle", format="pickle")
                igraph_graph_lookup[(chamber, congress, False)] = igraph_graph

                nx_graph = nx.read_gpickle("data/" + chamber + str(congress) + "_nx.pickle")
                nx_graph_lookup[(chamber, congress, False)] = nx_graph
    except IOError as e:
        print("Loading data from cosponsorship files")
        f = h5py.File("data/cosponsorship_data.hdf5", "w")
        for chamber in ['house', 'senate']:
            for congress in SUPPORTED_CONGRESSES:
                print("Starting %s %s" % (str(congress), chamber))
                adj_matrix = load_adjacency_matrices(congress, chamber)
                data = f.create_dataset(chamber + str(congress), adj_matrix.shape, dtype='f')
                data[0: len(data)] = adj_matrix

                # igraph
                get_cosponsorship_graph(congress, chamber, False).save("data/" + chamber + str(congress) + "_igraph.pickle", "pickle")
                # networkx
                nx.write_gpickle(get_cosponsorship_graph_nx(congress, chamber, False), "data/" + chamber + str(congress) + "_nx.pickle")

                print("Done with %s %s" % (str(congress), chamber))
    print("Data loaded in %d seconds" % (time.time() - start))
Exemple #19
0
    def save_graph(self, graphname, fmt='edgelist'):
        """
        Saves the graph to disk

        **Positional Arguments:**

                graphname:
                    - Filename for the graph

        **Optional Arguments:**

                fmt:
                    - Output graph format
        """
        self.g.graph['ecount'] = nx.number_of_edges(self.g)
        g = nx.convert_node_labels_to_integers(self.g, first_label=1)
        if fmt == 'edgelist':
            nx.write_weighted_edgelist(g, graphname, encoding='utf-8')
        elif fmt == 'gpickle':
            nx.write_gpickle(g, graphname)
        elif fmt == 'graphml':
            nx.write_graphml(g, graphname)
        else:
            raise ValueError('edgelist, gpickle, and graphml currently supported')
        pass
Exemple #20
0
    def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"):
        """
        Save the celltype-to-celltype connectivity information in a file.
        
        filename -- path of the file to be saved.

        format -- format to save in. Using GML as GraphML support is
        not complete in NetworkX.  

        """
        start = datetime.now()
        if format == "gml":
            nx.write_gml(self.__celltype_graph, filename)
        elif format == "yaml":
            nx.write_yaml(self.__celltype_graph, filename)
        elif format == "graphml":
            nx.write_graphml(self.__celltype_graph, filename)
        elif format == "edgelist":
            nx.write_edgelist(self.__celltype_graph, filename)
        elif format == "pickle":
            nx.write_gpickle(self.__celltype_graph, filename)
        else:
            raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format))
        end = datetime.now()
        delta = end - start
        config.BENCHMARK_LOGGER.info(
            "Saved celltype_graph in file %s of format %s in %g s"
            % (filename, format, delta.seconds + delta.microseconds * 1e-6)
        )
        print "Saved celltype connectivity graph in", filename
Exemple #21
0
def create_graph_df(vtask_paths, graphs_dir_out):
    """
    Creates a frame that maps sourcefiles to networkx digraphs in terms of DOT files
    :param source_path_list:
    :param dest_dir_path:
    :param relabel:
    :return:
    """
    if not isdir(graphs_dir_out):
        raise ValueError('Invalid destination directory.')
    data = []
    graphgen_times = []

    print('Writing graph representations of verification tasks to {}'.format(graphs_dir_out), flush=True)

    common_prefix = commonprefix(vtask_paths)
    for vtask in tqdm(vtask_paths):
        short_prefix = dirname(common_prefix)
        path = join(graphs_dir_out, vtask[len(short_prefix):][1:])

        if not os.path.exists(dirname(path)):
            os.makedirs(dirname(path))

        ret_path = path + '.pickle'

        # DEBUG
        if isfile(ret_path):
            data.append(ret_path)
            continue

        start_time = time.time()

        graph_path, node_labels_path, edge_types_path, edge_truth_path, node_depths_path \
            = _run_cpachecker(abspath(vtask))
        nx_digraph = nx.read_graphml(graph_path)

        node_labels = _read_node_labeling(node_labels_path)
        nx.set_node_attributes(nx_digraph, 'label', node_labels)

        edge_types = _read_edge_labeling(edge_types_path)
        parsed_edge_types = _parse_edge(edge_types)
        nx.set_edge_attributes(nx_digraph, 'type', parsed_edge_types)

        edge_truth = _read_edge_labeling(edge_truth_path)
        parsed_edge_truth = _parse_edge(edge_truth)
        nx.set_edge_attributes(nx_digraph, 'truth', parsed_edge_truth)

        node_depths = _read_node_labeling(node_depths_path)
        parsed_node_depths = _parse_node_depth(node_depths)
        nx.set_node_attributes(nx_digraph, 'depth', parsed_node_depths)

        assert not isfile(ret_path)
        assert node_labels and parsed_edge_types and parsed_edge_truth and parsed_node_depths
        nx.write_gpickle(nx_digraph, ret_path)
        data.append(ret_path)

        gg_time = time.time() - start_time
        graphgen_times.append(gg_time)

    return pd.DataFrame({'graph_representation': data}, index=vtask_paths), graphgen_times
Exemple #22
0
def main():
    if not (len(sys.argv) == 2 and direction in ["forward, backward"]):
        print("usage: ./gen_graph.py [forward/backward]", file=sys.stderr)
        sys.exit(1)

    direction = sys.argv[1]
    if direction == "forward":
        f = roundf
    else:
        f = inv_roundf
    n = 65536

    g = nx.DiGraph()
    for x in range(n):
        for ns, w in f(convert_int(x)):
            y = convert_states(ns)
            g.add_edge(x, y, weight=w)
        print(x)
    nx.write_gpickle(g, "{}.gpickle".format(direction))

    print("Generated {}.gpickle.".format(direction))

    nx.reverse(g, copy=False)
    nx.write_gpickle(g, "rev_{}.gpickle".format(direction))

    print("Generated rev_{}.gpickle.".format(direction))
Exemple #23
0
def store_graph(graph,name=None):
	filename=datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+"_Network.gpickle"
	if name != None:
		filename=name+".gpickle"
	nx.write_gpickle(graph,filename)
	print("Finish storing the graph"+" "+filename)
	return filename
def createScaleFreeNetwork(numOfNodes, degree):
	'''
	numOfNodes: The number of nodes that the scale free network should have
	degree: The degree of the Scale Free Network
	This function creates a Scale Free Network containing 'numOfNodes' nodes, each of degree 'degree'
	It generates the required graph and saves it in a file. It runs the Reinforcement Algorithm to create a weightMatrix and an ordering of the vertices based on their importance by Flagging.
	'''
	global reinforce_time
	G = nx.barabasi_albert_graph(numOfNodes, degree) #Create a Scale Free Network of the given number of nodes and degree
	StrMap = {}
	for node in G.nodes():
		StrMap[node] = str(node)
	G = nx.convert.relabel_nodes(G,StrMap)

	print "Undergoing Machine Learning..."

	start = time.time()
	H = reinforce(G) #Enforce Machine Learning to generate a gml file of the learnt graph.
	finish = time.time()
	reinforce_time = finish - start

	print "Machine Learning Completed..."
	filename = "SFN_" + str(numOfNodes) + "_" + str(degree) + '.gpickle' 
	nx.write_gpickle(H,filename)#generate a gpickle file of the learnt graph.
	print "Learnt graph Successfully written into " + filename
Exemple #25
0
    def generate_weak_links_map(self):
        weak_nodes = self.detect_weak_nodes(-5,25)
        active_weak_nodes = [node[0] for node in weak_nodes if max([l[1] for l in node[1]]) > 10]
        ap_nodes = [node for node in self.g.nodes() if self.g.in_degree(node) > 0]

        edges = self.g.edges(active_weak_nodes)
        snr_g = nx.DiGraph()
        snr_g.add_nodes_from(active_weak_nodes + ap_nodes)
        snr_g.add_edges_from(edges)
        
        for node in active_weak_nodes:
            snr_g.node[node]['type'] = 'sta'
        for node in ap_nodes:
            snr_g.node[node]['type'] = 'ap'
                

        nx.write_gpickle(snr_g,'graph_pickle_connectivity_%d.pkl' % time.time())
        #nx.draw(snr_g,with_labels=False)
        #pylab.savefig("connectivity-graph-%d.png" % (int(time.time())))

        d = json_graph.node_link_data(snr_g) # node-link format to serialize
        # write json 
        json.dump(d, open('force/force.json','w'))


        print ap_nodes
Exemple #26
0
def correlate_node_by_sync( cells ):
    global template_ , avg_
    for m, n in itertools.combinations( cells.nodes( ), 2 ):
        vec1, vec2 = cells.node[m]['timeseries'], cells.node[n]['timeseries']
        corr = sync_index( vec1, vec2 )
        rcorr = sync_index( vec2, vec1 )
        if corr > 0.6:
            cells.add_edge( m, n, weight = corr )
            cells.add_edge( n, m, weight = rcorr )

    outfile = 'final.png' 
    plt.figure( figsize = (12,8) )
    plt.subplot( 2, 2, 1 )
    plt.imshow( avg_, interpolation = 'none', aspect = 'auto' )
    plt.title( 'All frames averaged' )
    plt.colorbar( ) # orientation = 'horizontal' )

    syncImg = np.zeros( shape=template_.shape )
    syncDict = defaultdict( list )
    nx.write_gpickle( cells, 'cells.gpickle' )
    logger.info( 'Logging out after writing to graph.' )
    return 
    try:
        nx.drawing.nx_agraph.write_dot( cells, 'all_cell.dot' )
    except Exception as e:
        logger.warn( 'Failed to write dot file %s' % e )
    for i, c in enumerate( nx.attracting_components( cells ) ):
        if len(c) < 2:
            continue
        logger.info( 'Found attracting component of length %d' % len(c) )
        for p in c:
            cv2.circle( syncImg, (p[1], p[0]), 2, (i+1), 2 )
            # syncDict[str(c)].append( cells.node[p]['timeseries'] )

    plt.subplot( 2, 2, 2 )
    plt.imshow( timeseries_
            , interpolation = 'none', aspect = 'auto', cmap = 'seismic' )
    plt.colorbar(  ) #orientation = 'horizontal' )
    plt.title( 'Activity of each pixal' )

    plt.subplot( 2, 2, 3 )
    plt.imshow( syncImg, interpolation = 'none', aspect = 'auto' )
    plt.colorbar( ) #orientation = 'horizontal' )

    # Here we draw the synchronization.
    plt.subplot( 2, 2, 4 )
    # clusters = []
    # for c in syncDict:
        # clusters += syncDict[c]
        # # Append two empty lines to separate the clusters.
        # clusters += [ np.zeros( timeseries_.shape[1] ) ] 
    # try:
        # plt.imshow( np.vstack(clusters), interpolation = 'none', aspect = 'auto' )
        # plt.colorbar(  ) #orientation = 'horizontal' )
    # except Exception as e:
        # print( "Couldn't plot clusters %s" % e )
    plt.tight_layout( )
    plt.savefig( outfile )
    logger.info( 'Saved to file %s' % outfile )
Exemple #27
0
def _write_networks_to_file():
	g1=AnnotatedGraph()
	g1.load_HPRDNPInteractome()
	nx.write_gpickle(g1,LINKROOT+"/datasets/HPRDNPInteractome.gPickle")

	g2=AnnotatedGraph()
	g2.load_HPRDOnlyInteractome()
	nx.write_gpickle(g2,LINKROOT+"/datasets/HPRDInteractome.gPickle")
Exemple #28
0
    def persist(self):
        #print "Persisted"
        prefixes = ["", ".reserve.01",  ".reserve.02", ".reserve.03", ".reserve.04", ".reserve.05"]

        for i in range(len(prefixes)-2, -1, -1):
            if os.path.exists(self.graph_file + prefixes[i]):
                os.rename(self.graph_file + prefixes[i], self.graph_file + prefixes[i+1])
        nx.write_gpickle(self.nxgraph, self.graph_file + prefixes[0]) # "/home/alexmak/test.pickle")#
 def save_data(self):
     nx.write_gpickle(self.graph, "popitgraph.pickle")
     f = open("node_color.pickle", "w")
     pickle.dump(self.colors, f)
     f.close()
     f = open("node_label.pickle", "w")
     pickle.dump(self.labels, f)
     f.close()
Exemple #30
0
    def fragment_graph(self, path, x=3, y=3, mode='normal'):
        """ Generate fragments of the graph and save them
        individually in path.

        mode == 'pixels': x, y are dimensions of fragments
        otherwise: x, y are number of fragments in resp. axis
        """
        print "Fragmenting."
        G = nx.connected_component_subgraphs(self.graph)[0]

        # bounding box
        xs = [d['x'] for n, d in G.nodes_iter(data=True)]
        ys = [d['y'] for n, d in G.nodes_iter(data=True)]

        x_min = min(xs)
        x_max = max(xs)
        y_min = min(ys)
        y_max = max(ys)
        
        # equal sized tiles. otherwise x, y mean number of tiles in
        # respective axis
        if mode == 'pixels':
            x = float(x)
            y = float(y)
            
            x_fragments = int((x_max - x_min)/x)
            y_fragments = int((y_max - y_min)/y)

            print "Tiling into {}x{} fragments of size {}x{}.".format(
                    x_fragments, y_fragments, x, y)

        # fragment into pieces
        fragments = []
        for i in xrange(x_fragments):
            for j in xrange(y_fragments):
                x0 = x_min + i/float(x_fragments)*(x_max - x_min)
                x1 = x0 + 1./float(x_fragments)*(x_max - x_min)

                y0 = y_min + j/float(y_fragments)*(y_max - y_min)
                y1 = y0 + 1./float(y_fragments)*(y_max - y_min)
                
                nodes = [n for n, d in G.nodes_iter(data=True)
                        if d['x'] >= x0 and d['x'] <= x1
                        and d['y'] >= y0 and d['y'] <= y1]

                fragments.append(G.subgraph(nodes))
        
        # save fragments as individual graphs
        if not os.path.exists(path):
            os.makedirs(path)
        
        print "Saving fragments."
        name, ext = os.path.splitext(os.path.basename(self.fname))
        for i, fragment in enumerate(fragments):
            nx.write_gpickle(fragment, 
                    os.path.join(path, 
                        name + '_fragment_{}.gpickle'.format(i)))
Exemple #31
0
def write_graph(graph, path):
    """Given a graph object and a path, save graph to path as gpickle"""
    nx.write_gpickle(graph, path)
Exemple #32
0
def main():
    # Leggo il file pickle dei retweet
    # Costruisco un grafo con networkx partendo dai dati ottenuti
    with open(
            '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListBlue.pkl',
            'rb') as input:
        retweetListBlue = pickle.load(input)

    with open(
            '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListRed.pkl',
            'rb') as input:
        retweetListRed = pickle.load(input)

    with open(
            '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListYellow.pkl',
            'rb') as input:
        retweetListYellow = pickle.load(input)

    with open(
            '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/probRetBlue.pkl',
            'rb') as input:
        probRetBlue = pickle.load(input)

    with open(
            '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/probRetRed.pkl',
            'rb') as input:
        probRetRed = pickle.load(input)
    List = []

    for i in retweetListBlue:

        List.append(i)
    for i in retweetListRed:

        List.append(i)

    DizPesi = {}

    for i in probRetBlue:

        if not DizPesi.has_key(i):
            DizPesi[i] = probRetBlue[i]
        else:
            continue

    for i in probRetRed:

        if not DizPesi.has_key(i):
            DizPesi[i] = probRetRed[i]
        else:
            continue

    nodi_Blue = NodeDict(retweetListBlue)
    nodi_Red = NodeDict(retweetListRed)

    G = createGraph(List, DizPesi)
    size_node_degree = []

    print "Numero NODI", len(G.nodes)
    UpdateNode(retweetListYellow, nodi_Blue)
    UpdateNode(retweetListYellow, nodi_Red)
    #print(test)

    posizioneBlue = PosNode(G.nodes(), nodi_Blue)
    posizioneRed = PosNode(G.nodes(), nodi_Red)

    dizPosizioneBlue = PosNodeDizionario(G.nodes, nodi_Blue)
    dizPosizioneRed = PosNodeDizionario(G.nodes, nodi_Red)
    #dizPosizioneYellow = PosNodeDizionario(G.nodes,nodi_Yellow);

    #List of Polarization of Elite and Listener
    firstPolar = setFirstPolarization(G, dizPosizioneBlue, dizPosizioneRed)

    #print "Passo 0 di polarizzazione ",firstPolar

    dictFirstPol = {}
    x = 0
    for i in G.nodes():

        if not dictFirstPol.has_key(i):
            dictFirstPol[i] = firstPolar[x]
            x = x + 1

    list = []
    for i in G.nodes():

        list.append(i)

    #matrice di adiacenza partendo dalla lista dei nodi
    mat_attr = nx.attr_matrix(G, rc_order=list)

    at_array = np.array(mat_attr)

    newPol = opinionPolarization(G, at_array, firstPolar, list)

    dictPol = {}
    x = 0
    for i in G.nodes():

        if not dictPol.has_key(i):
            dictPol[i] = newPol[x]
            x = x + 1

    print(len(G.nodes))
    #size = float(len(set(partition.values())))

    #cambio i colori dei nodi a seconda del loro grado
    #Polar = Polarization(p_array,posizioneRed,posizioneBlue,len(G.nodes),matriceProbRetweet)

    #funziona con la partizione
    node_color = colorNode(G, nodi_Blue, nodi_Red)

    #node_colorPol= colorNodePol(len(G.nodes()),newPol)

    testdict = opinionPolarizationDict(G, at_array, firstPolar, list)

    print("testdict ", testdict)
    list_lastPol = testdict.get(len(testdict) - 1)
    #print(list_lastPol)
    #print(set(testdict[1]))
    node_colorPol = colorNodePol(len(G.nodes()), list_lastPol)

    test = {}
    x = 0
    for i in G.nodes():

        if not testdict.has_key(i):
            test[i] = testdict.get(len(testdict) - 1)[x]
            x = x + 1

    for i in range(0, len(testdict)):
        if i + 1 == (len(testdict) - 1):
            break
        print("i", i, "j", i + 1, " simili=",
              set(testdict[i]) == set(testdict[i + 1]))

    #labels= labelPolarization(Polar,G,nodi_Blue,nodi_Red)

    pos = nx.spring_layout(G)
    #Per la partizione
    # list_nodes=[]
    # for com in set(partition.values()):
    #     count = count + 1.
    #     x=0
    #     for nodes in partition.keys():
    #        # print "nodes",nodes
    #         if partition[nodes] == com :
    #             list_nodes.append(nodes)

    #con la partizione
    #nx.draw_networkx_nodes(G, pos Biotestamento,list_nodes,with_labels=False,node_color=node_color)

    nx.write_gpickle(G,
                     '../Test/Biotestamento/Gennaio/grafoBiotestVen.pickle',
                     protocol=pickle.HIGHEST_PROTOCOL)
    with open(
            '../Test/Biotestamento/Gennaio/dizionarioPolarizzazioneVenezuela.pickle',
            "wb") as output:
        pickle.dump(test, output, pickle.HIGHEST_PROTOCOL)
    with open(
            '../Test/Biotestamento/Gennaio/listaColoriPolarizzazioneVenezuela.pickle',
            "wb") as output:
        pickle.dump(node_colorPol, output, pickle.HIGHEST_PROTOCOL)

    nx.draw_networkx_nodes(G,
                           pos,
                           G.nodes(),
                           with_labels=True,
                           node_color=node_colorPol)

    nx.draw_networkx_edges(G, pos, alpha=0.5, edge_color='b')

    nx.draw_networkx_labels(G, pos, test, font_size=8)

    plt.savefig("../Test/Biotestamento/Gennaio/PolarizzazioneVene.png",
                format="PNG")

    plt.show()
Exemple #33
0
 def write_graph(self, outfile, manifest):
     """Write the graph to a gpickle file. Before doing so, serialize and
     include all nodes in their corresponding graph entries.
     """
     out_graph = _updated_graph(self.graph, manifest)
     nx.write_gpickle(out_graph, outfile)
Exemple #34
0
def read_graph(bestedges, maxerr=100, directed=False):
    logging.debug("Max error = {0}%".format(maxerr))
    tag = "dir." if directed else ""
    bestgraph = bestedges.split(".")[0] + ".err{0}.{1}graph".format(
        maxerr, tag)
    if need_update(bestedges, bestgraph):
        G = {} if directed else nx.Graph()
        fp = open(bestedges)
        best_store = {}
        for row in fp:
            if row[0] == '#':
                continue
            id1, lib_id, best5, o5, best3, o3, j1, j2 = row.split()
            id1, best5, best3 = int(id1), int(best5), int(best3)
            j1, j2 = float(j1), float(j2)
            if j1 <= maxerr or j2 <= maxerr:
                if not directed:
                    G.add_node(id1)
                id1p5, id1p3 = "{0}-5'".format(id1), "{0}-3'".format(id1)
                best5o5 = "{0}-{1}".format(best5, o5)
                best3o3 = "{0}-{1}".format(best3, o3)
                best_store[id1p5] = best5o5
                best_store[id1p3] = best3o3
            if best5 and j1 <= maxerr:
                if directed:
                    G[id1p5] = best5o5
                else:
                    G.add_edge(best5, id1, weight=10)
            if best3 and j2 <= maxerr:
                if directed:
                    G[id1p3] = best3o3
                else:
                    G.add_edge(id1, best3, weight=10)

        # Annotate edge weight for mutual best link, note that edge weights are
        # (11) set close to 10, to minimize impact to layout (Yifan Hu's
        # multilevel)
        nmutuals = 0
        for k, v in best_store.items():
            if best_store.get(v) == k and k < v:
                k, v = int(k.split("-")[0]), int(v.split("-")[0])
                G[k][v]["weight"] = 11
                nmutuals += 1
        logging.debug("Mutual best edges: {0}".format(nmutuals))

        if directed:
            fw = open(bestgraph, "w")
            dump(G, fw)
            fw.close()
        else:
            nx.write_gpickle(G, bestgraph)
        logging.debug("Graph pickled to `{0}`".format(bestgraph))

        # Compute node degree histogram and save in (degree, counts) tab file
        degrees = G.degree()
        degree_counter = Counter(degrees.values())
        degreesfile = "degrees.txt"
        fw = open(degreesfile, "w")
        for degree, count in sorted(degree_counter.items()):
            print("{0}\t{1}".format(degree, count), file=fw)
        fw.close()
        logging.debug("Node degree distribution saved to `{0}`".\
                        format(degreesfile))

        # Save high degree (top 1%) nodes in save in (node, degree) tab file
        percentile = sorted(degrees.values(),
                            reverse=True)[len(degrees) / 1000]
        logging.debug("Top 0.1% has degree of at least {0}".format(percentile))
        hubs = [(k, v) for k, v in degrees.items() if v >= percentile]
        hubs.sort(key=lambda x: x[1], reverse=True)  # degress descending
        hubsfile = "hubs.txt"
        fw = open(hubsfile, "w")
        for node, degree in hubs:
            print("{0}\t{1}".format(node, degree), file=fw)
        fw.close()
        logging.debug("Hubs saved to `{0}`".format(hubsfile))

    logging.debug("Read graph from `{0}`".format(bestgraph))
    if directed:
        G = load(open(bestgraph))
    else:
        G = nx.read_gpickle(bestgraph)
        graph_stats(G)
    return G
Exemple #35
0
def main():
    if not torch.cuda.is_available():
        logger.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logger.info('gpu device = %d' % args.gpu)
    logger.info("args = %s", args)

    # # load the correct ops dictionary
    op_dict_to_load = "operations.%s" % args.ops
    logger.info('loading op dict: ' + str(op_dict_to_load))
    op_dict = eval(op_dict_to_load)

    # load the correct primitives list
    primitives_to_load = "genotypes.%s" % args.primitives
    logger.info('loading primitives:' + primitives_to_load)
    primitives = eval(primitives_to_load)
    logger.info('primitives: ' + str(primitives))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    if args.multi_channel:
        final_path = None
        if args.final_path is not None:
            final_path = np.load(args.final_path)

        genotype = None
        if args.load_genotype is not None:
            genotype = getattr(genotypes, args.load_genotype)
        cnn_model = model_search.MultiChannelNetwork(
            args.init_channels,
            CIFAR_CLASSES,
            layers=args.layers_of_cells,
            criterion=criterion,
            steps=args.layers_in_cells,
            primitives=primitives,
            op_dict=op_dict,
            weighting_algorithm=args.weighting_algorithm,
            genotype=genotype)
        #save_graph(cnn_model.G, os.path.join(args.save, 'network_graph.pdf'))
        if args.load_genotype is not None:
            # TODO(ahundt) support other batch shapes
            data_shape = [1, 3, 32, 32]
            batch = torch.zeros(data_shape)
            cnn_model(batch)
            logger.info("loaded genotype_raw_weights = " +
                        str(cnn_model.genotype('raw_weights')))
            logger.info("loaded genotype_longest_path = " +
                        str(cnn_model.genotype('longest_path')))
            logger.info("loaded genotype greedy_path = " +
                        str(gen_greedy_path(cnn_model.G, strategy="top_down")))
            logger.info(
                "loaded genotype greedy_path_bottom_up = " +
                str(gen_greedy_path(cnn_model.G, strategy="bottom_up")))
            # TODO(ahundt) support other layouts
    else:
        cnn_model = model_search.Network(
            args.init_channels,
            CIFAR_CLASSES,
            layers=args.layers_of_cells,
            criterion=criterion,
            steps=args.layers_in_cells,
            primitives=primitives,
            op_dict=op_dict,
            weights_are_parameters=args.no_architect,
            C_mid=args.mid_channels,
            weighting_algorithm=args.weighting_algorithm)
    cnn_model = cnn_model.cuda()
    logger.info("param size = %fMB", utils.count_parameters_in_MB(cnn_model))

    if args.load:
        logger.info('loading weights from: ' + args.load)
        utils.load(cnn_model, args.load)

    optimizer = torch.optim.SGD(cnn_model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Get preprocessing functions (i.e. transforms) to apply on data
    train_transform, valid_transform = utils.get_data_transforms(args)

    # Get the training queue, select training and validation from training set
    train_queue, valid_queue = dataset.get_training_queues(
        args.dataset,
        train_transform,
        valid_transform,
        args.data,
        args.batch_size,
        args.train_portion,
        search_architecture=True)

    lr_schedule = cosine_power_annealing(
        epochs=args.epochs,
        max_lr=args.learning_rate,
        min_lr=args.learning_rate_min,
        warmup_epochs=args.warmup_epochs,
        exponent_order=args.lr_power_annealing_exponent_order)
    epochs = np.arange(args.epochs) + args.start_epoch
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #       optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    if args.no_architect:
        architect = None
    else:
        architect = Architect(cnn_model, args)

    epoch_stats = []

    stats_csv = args.epoch_stats_file
    stats_csv = stats_csv.replace('.json', '.csv')
    with tqdm(epochs, dynamic_ncols=True) as prog_epoch:
        best_valid_acc = 0.0
        best_epoch = 0
        # state_dict = {}
        # og_state_keys = set()
        # updated_state_keys = set()

        #saving state_dict for debugging weights by comparison
        # for key in cnn_model.state_dict():
        #   state_dict[key] = cnn_model.state_dict()[key].clone()
        #   # logger.info('layer = {}'.format(key))
        # logger.info('Total keys in state_dict = {}'.format(len(cnn_model.state_dict().keys())))
        # og_state_keys.update(cnn_model.state_dict().keys())
        best_stats = {}
        weights_file = os.path.join(args.save, 'weights.pt')
        for epoch, learning_rate in zip(prog_epoch, lr_schedule):
            # scheduler.step()
            # lr = scheduler.get_lr()[0]
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
            genotype = None
            if args.final_path is None:
                genotype = cnn_model.genotype()
                logger.info('genotype = %s', genotype)

            if not args.multi_channel:
                # the genotype is the alphas in the multi-channel case
                # print the alphas in other cases
                logger.info('alphas_normal = %s', cnn_model.arch_weights(0))
                logger.info('alphas_reduce = %s', cnn_model.arch_weights(1))

            # training
            train_acc, train_obj = train(train_queue, valid_queue, cnn_model,
                                         architect, criterion, optimizer,
                                         learning_rate)

            if args.multi_channel and args.final_path is None:
                # TODO(ahundt) remove final path and switch back to genotype, and save out raw weights plus optimal path
                optimal_path = nx.algorithms.dag.dag_longest_path(cnn_model.G)
                optimal_path_filename = os.path.join(
                    args.save, 'longest_path_layer_sequence.npy')
                logger.info('Saving model layer sequence object: ' +
                            str(optimal_path_filename))
                np.save(optimal_path_filename, optimal_path)
                graph_filename = os.path.join(
                    args.save, 'network_graph_' + str(epoch) + '.graph')
                logger.info('Saving updated weight graph: ' +
                            str(graph_filename))
                nx.write_gpickle(cnn_model.G, graph_filename)
                logger.info('optimal_path  : %s', optimal_path)

            # validation
            valid_acc, valid_obj = infer(valid_queue, cnn_model, criterion)

            if valid_acc > best_valid_acc:
                # new best epoch, save weights

                utils.save(cnn_model, weights_file)

                if args.multi_channel:

                    graph_filename = os.path.join(
                        args.save,
                        'network_graph_best_valid' + str(epoch) + '.graph')
                    logger.info('Saving updated weight graph: ' +
                                str(graph_filename))

                best_epoch = epoch
                best_valid_acc = valid_acc
                prog_epoch.set_description(
                    'Overview ***** best_epoch: {0} best_valid_acc: {1:.2f} ***** Progress'
                    .format(best_epoch, best_valid_acc))

            logger.info(
                'epoch, %d, train_acc, %f, valid_acc, %f, train_loss, %f, valid_loss, %f, lr, %e, best_epoch, %d, best_valid_acc, %f',
                epoch, train_acc, valid_acc, train_obj, valid_obj,
                learning_rate, best_epoch, best_valid_acc)
            stats = {
                'epoch': epoch,
                'train_acc': train_acc,
                'valid_acc': valid_acc,
                'train_loss': train_obj,
                'valid_loss': valid_obj,
                'lr': learning_rate,
                'best_epoch': best_epoch,
                'best_valid_acc': best_valid_acc,
                'genotype': str(genotype),
                'arch_weights': str(cnn_model.arch_weights)
            }
            epoch_stats += [copy.deepcopy(stats)]
            with open(args.epoch_stats_file, 'w') as f:
                json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
            utils.list_of_dicts_to_csv(stats_csv, epoch_stats)

    # print the final model
    if args.final_path is None:
        genotype = cnn_model.genotype()
        logger.info('genotype = %s', genotype)
    logger.info('Search for Model Complete! Save dir: ' + str(args.save))
Exemple #36
0
def main():
    """ Contains majority of expermiment. Runs a markov chain on the state dual graph, determining how the distribution is affected to changes in the
     state dual graph.
     Raises:
        RuntimeError if PROPOSAL_TYPE of config file is neither 'sierpinski'
        nor 'convex'
    """
    output_directory = createDirectory(config)
    epsilon = config["epsilon"]
    k = config["NUM_DISTRICTS"]
    updaters = {
        'population': Tally('population'),
        'cut_edges': cut_edges,
    }
    graph, dual = preprocessing(config["INPUT_GRAPH_FILENAME"],
                                output_directory)
    ideal_population = sum(graph.nodes[x]["population"]
                           for x in graph.nodes()) / k
    faces = graph.graph["faces"]
    faces = list(faces)
    square_faces = [face for face in faces if len(face) == 4]
    totpop = 0
    for node in graph.nodes():
        totpop += int(graph.nodes[node]['population'])
    #length of chain
    steps = config["CHAIN_STEPS"]

    #length of each gerrychain step
    gerrychain_steps = config["GERRYCHAIN_STEPS"]
    #faces that are currently modified. Code maintains list of modified faces, and at each step selects a face. if face is already in list,
    #the face is un-modified, and if it is not, the face is modified by the specified proposal type.
    special_faces = set(
        [face for face in square_faces if np.random.uniform(0, 1) < .5])
    chain_output = defaultdict(list)
    #start with small score to move in right direction
    print("Choosing", math.floor(len(faces) * config['PERCENT_FACES']),
          "faces of the dual graph at each step")
    max_score = -math.inf
    #this is the main markov chain
    for i in tqdm.tqdm(range(1, steps + 1), ncols=100, desc="Chain Progress"):
        special_faces_proposal = copy.deepcopy(special_faces)
        proposal_graph = copy.deepcopy(graph)
        if (config["PROPOSAL_TYPE"] == "sierpinski"):
            for i in range(math.floor(len(faces) * config['PERCENT_FACES'])):
                face = random.choice(faces)
                ##Makes the Markov chain lazy -- this just makes the chain aperiodic.
                if random.random() > .5:
                    if not (face in special_faces_proposal):
                        special_faces_proposal.append(face)
                    else:
                        special_faces_proposal.remove(face)
            face_sierpinski_mesh(proposal_graph, special_faces_proposal)
        elif (config["PROPOSAL_TYPE"] == "add_edge"):
            for j in range(
                    math.floor(len(square_faces) * config['PERCENT_FACES'])):
                face = random.choice(square_faces)
                ##Makes the Markov chain lazy -- this just makes the chain aperiodic.
                if random.random() > .5:
                    if not (face in special_faces_proposal):
                        special_faces_proposal.add(face)
                    else:
                        special_faces_proposal.remove(face)
            add_edge_proposal(proposal_graph, special_faces_proposal)
        else:
            raise RuntimeError(
                'PROPOSAL TYPE must be "sierpinski" or "convex"')

        initial_partition = Partition(proposal_graph,
                                      assignment=config['ASSIGN_COL'],
                                      updaters=updaters)

        # Sets up Markov chain
        popbound = within_percent_of_ideal_population(initial_partition,
                                                      epsilon)
        tree_proposal = partial(recom,
                                pop_col=config['POP_COL'],
                                pop_target=ideal_population,
                                epsilon=epsilon,
                                node_repeats=1)

        #make new function -- this computes the energy of the current map
        exp_chain = MarkovChain(tree_proposal,
                                Validator([popbound]),
                                accept=accept.always_accept,
                                initial_state=initial_partition,
                                total_steps=gerrychain_steps)
        seats_won_for_republicans = []
        seats_won_for_democrats = []
        for part in exp_chain:
            rep_seats_won = 0
            dem_seats_won = 0
            for j in range(k):
                rep_votes = 0
                dem_votes = 0
                for n in graph.nodes():
                    if part.assignment[n] == j:
                        rep_votes += graph.nodes[n]["EL16G_PR_R"]
                        dem_votes += graph.nodes[n]["EL16G_PR_D"]
                total_seats_dem = int(dem_votes > rep_votes)
                total_seats_rep = int(rep_votes > dem_votes)
                rep_seats_won += total_seats_rep
                dem_seats_won += total_seats_dem
            seats_won_for_republicans.append(rep_seats_won)
            seats_won_for_democrats.append(dem_seats_won)

        seat_score = statistics.mean(seats_won_for_republicans)

        #implement modified mattingly simulated annealing scheme, from evaluating partisan gerrymandering in wisconsin
        if i <= math.floor(steps * .67):
            beta = i / math.floor(steps * .67)
        else:
            beta = (i / math.floor(steps * .67)) * 100
        temperature = 1 / (beta)

        weight_seats = 1
        weight_flips = -.2
        config['PERCENT_FACES'] = config['PERCENT_FACES']
        flip_score = len(
            special_faces)  # This is the number of edges being swapped

        score = weight_seats * seat_score + weight_flips * flip_score

        ##This is the acceptance step of the Metropolis-Hasting's algorithm. Specifically, rand < min(1, P(x')/P(x)), where P is the energy and x' is proposed state
        #if the acceptance criteria is met or if it is the first step of the chain
        def update_outputs():
            chain_output['dem_seat_data'].append(seats_won_for_democrats)
            chain_output['rep_seat_data'].append(seats_won_for_republicans)
            chain_output['score'].append(score)
            chain_output['seat_score'].append(seat_score)
            chain_output['flip_score'].append(flip_score)

        def propagate_outputs():
            for key in chain_output.keys():
                chain_output[key].append(chain_output[key][-1])

        if i == 1:
            update_outputs()
            special_faces = copy.deepcopy(special_faces_proposal)
        #this is the simplified form of the acceptance criteria, for intuitive purposes
        #exp((1/temperature) ( proposal_score - previous_score))
        elif np.random.uniform(0, 1) < (math.exp(score) / math.exp(
                chain_output['score'][-1]))**(1 / temperature):
            update_outputs()

            special_faces = copy.deepcopy(special_faces_proposal)
        else:
            propagate_outputs()

        #if score is highest seen, save map.
        if score > max_score:
            #todo: all graph coloring for graph changes that produced this score
            nx.write_gpickle(proposal_graph,
                             output_directory + '/' + "max_score",
                             pickle.HIGHEST_PROTOCOL)
            f = open(output_directory + "/max_score_data.txt", "w+")
            f.write("maximum score: " + str(score) + "\n" + "edges changed: " +
                    str(len(special_faces)) + "\n" + "Seat Score: " +
                    str(seat_score))
            save_obj(special_faces, output_directory + '/', "special_faces")
            max_score = score

    plt.plot(range(len(chain_output['score'])), chain_output['score'])
    plt.xlabel("Meta-Chain Step")
    plt.ylabel("Score")
    plot_name = output_directory + '/' + 'score' + '.png'
    plt.savefig(plot_name)

    ## Todo: Add scatter plot of the seat_score and flip_score here.

    save_obj(chain_output, output_directory, "chain_output")
Exemple #37
0
    tres = grp.create_dataset("t", (1, ), maxshape=(None, ), dtype=float)

#    km0=4*2**(1/4)
#    u0[:,0]=np.exp(-np.linalg.norm(k-k[int(k.shape[0]/2)],axis=0)**2/4**2)*np.exp(1j*np.pi*np.random.random(N))
#    u0[:,1]=np.exp(-np.linalg.norm(k-k[int(k.shape[0]/2)],axis=0)**2/4**2)*np.exp(1j*np.pi*np.random.random(N))
#    u0=np.sqrt(6*np.sqrt(2/np.pi)*km0**(-5)*np.abs(k)**4*np.exp(-2*(np.abs(k)/km0)**2))*np.exp(1j*np.pi*np.random.random(Nh))
if (save_network):
    gr = nx.Graph()
    strs = [np.str(l) for l in trs]
    gr.add_nodes_from(kn, bipartite=0)
    gr.add_nodes_from(strs, bipartite=1)

    for l in range(len(trs)):
        gr.add_edges_from([(kn[trs[l][0]], strs[l]), (kn[trs[l][1]], strs[l]),
                           (kn[trs[l][2]], strs[l])])
    nx.write_gpickle(gr, 'nwfile.pkl')

r = spi.RK45(func, t0, u0.ravel().view(dtype=float), t1, max_step=dt)
epst = 1e-12
ct = time.time()
if (random_forcing == True):
    force_update()
#dtff,dtf,dts,dtss=np.sort((dt,dtr,dtrw,dtout))
toldr = -1.0e12
toldrw = -1.0e12
toldout = -1.0e12

while (r.status == 'running'):
    told = r.t
    if (r.t >= toldout + dtout - epst and r.status == 'running'):
        toldout = r.t
def cd_cluster_evolution_graph(
    config,
    source_folder,
    snaphot_mapping_folder,
    subseqitem_mapping_folder,
    target_folder,
    regulations,
):
    config_clustering_files, snapshots = get_config_clustering_files(
        config, source_folder)

    first = True

    B = nx.DiGraph()

    prev_community_id_for_rolled_down = None
    prev_preprocessed_mappings = None
    prev_snapshot = None

    for config_clustering_file, snapshot in zip(config_clustering_files,
                                                snapshots):
        # Add nodes to graph

        clustering = readwrite.read_community_json(
            os.path.join(source_folder, config_clustering_file))

        with open(
                os.path.join(
                    subseqitem_mapping_folder,
                    f'{snapshot}_{config["pp_merge"]}.pickle',
                ),
                "rb",
        ) as f:
            preprocessed_mappings = pickle.load(f)

        counters_dict = get_cluster_law_names_counting_seqitems(
            preprocessed_mappings, clustering.communities)
        most_common_dict = {
            k: ",".join(
                [f"{elem_k},{count}" for elem_k, count in v.most_common()])
            for k, v in counters_dict.items()
        }
        chars_n_dict = get_community_sizes(
            clustering.communities,
            preprocessed_mappings["chars_n"],
        )
        tokens_n_dict = get_community_sizes(clustering.communities,
                                            preprocessed_mappings["tokens_n"])

        for community_key, community_nodes in enumerate(
                clustering.communities):
            community_nodes_sorted = sorted(
                community_nodes,
                key=lambda n: preprocessed_mappings["tokens_n"].get(n, 0),
                reverse=True,
            )
            for n in community_nodes_sorted:
                assert "," not in n
            B.add_node(
                f"{snapshot}_{community_key}",
                bipartite=snapshot,
                chars_n=chars_n_dict[community_key],
                tokens_n=tokens_n_dict[community_key],
                law_names=most_common_dict[community_key],
                nodes_contained=",".join(community_nodes_sorted),
            )

        communities_rolled_down = [[
            n for rolled_up_node in community_nodes
            for n in preprocessed_mappings["items_mapping"][rolled_up_node]
        ] for community_nodes in clustering.communities]

        community_id_for_rolled_down = {
            n: community_id
            for community_id, nodes in enumerate(communities_rolled_down)
            for n in nodes
        }

        if not first:

            with open(
                    os.path.join(snaphot_mapping_folder,
                                 f"{prev_snapshot}_{snapshot}.json")) as f:
                mapping = json.load(f)

            # draw edges
            edges_tokens_n = defaultdict(int)
            edges_chars_n = defaultdict(int)
            for prev_leaf_and_text_idx, leaf_and_text_idx in mapping.items():
                prev_leaf, prev_text_idx = prev_leaf_and_text_idx.rsplit(
                    "_", 1)
                leaf, text_idx = leaf_and_text_idx.rsplit("_", 1)

                text_idx = int(text_idx)

                try:
                    prev_community_id = prev_community_id_for_rolled_down[
                        prev_leaf]
                except KeyError as err:
                    report_mapping_error(
                        err, prev_preprocessed_mappings["tokens_n"])
                    continue

                try:
                    community_id = community_id_for_rolled_down[leaf]
                except KeyError as err:
                    report_mapping_error(err,
                                         preprocessed_mappings["tokens_n"])
                    continue

                prev_community_name = f"{prev_snapshot}_{prev_community_id}"
                community_name = f"{snapshot}_{community_id}"
                edge = (prev_community_name, community_name)

                if leaf in preprocessed_mappings["texts_tokens_n"]:
                    texts_tokens_n = preprocessed_mappings["texts_tokens_n"][
                        leaf]
                    texts_chars_n = preprocessed_mappings["texts_chars_n"][
                        leaf]
                    tokens_n = texts_tokens_n[text_idx]
                    chars_n = texts_chars_n[text_idx]
                else:
                    assert text_idx == 0
                    tokens_n = preprocessed_mappings["tokens_n"][leaf]
                    chars_n = preprocessed_mappings["chars_n"][leaf]

                # Use the tokens_n and chars_n values of the later year
                edges_tokens_n[edge] += tokens_n
                edges_chars_n[edge] += chars_n

            B.add_edges_from(edges_tokens_n.keys())
            nx.set_edge_attributes(B, edges_tokens_n, "tokens_n")
            nx.set_edge_attributes(B, edges_chars_n, "chars_n")

        first = False
        prev_snapshot = snapshot
        prev_community_id_for_rolled_down = community_id_for_rolled_down
        prev_preprocessed_mappings = preprocessed_mappings

    nx.write_gpickle(
        B,
        f"{target_folder}/"
        f'{filename_for_pp_config(snapshot="all", **config, file_ext=".gpickle.gz")}',
    )

    # Write families
    families = cluster_families(B, threshold=0.15)
    path = (
        f"{target_folder}/"
        f'{filename_for_pp_config(snapshot="all", **config, file_ext=".families.json")}'
    )
    with open(path, "w") as f:
        json.dump(families, f)
Exemple #39
0
 def save_graph(self, path):
     nx.write_gpickle(self.g, path)
Exemple #40
0
    parser_init = argparse.ArgumentParser()
    parser_init.add_argument("--input_graph", help="Graph in gpickle format.")
    parser_init.add_argument("--percentile", help="Degree percentile.")
    parser_init.add_argument("--step_size", help="Neighbourhood size.")
    parser_init.add_argument(
        "--heuristic",
        help=
        "possible options: degree, pagerank_numpy, pagerank_scipy, katz, eigenvector_centrality_numpy, flow_betweenness, communicability, pagerank_scipy"
    )
    parser_init.add_argument("--ontology_id", help="dataset.")
    parser_init.add_argument("--make_samples", help="dataset.")
    parser_init.add_argument("--output_graph", help="dataset.")

    parsed = parser_init.parse_args()
    G = nx.read_gpickle(parsed.input_graph)

    if parsed.output_graph:
        nx.write_gpickle(result_graph, "graph_datasets/" + job_id + ".gpickle")
    outgraph2 = g2o(G, parsed.percentile, parsed.step_size, parsed.heuristic)

    if parsed.ontology_id:
        rdfpart = rm.rdfconverter(outgraph2,
                                  "query")  ## query is the folder with lists
        if parsed.make_samples:
            rdfpart.return_target_n3("samples/" +
                                     parsed.ontology_id)  ## target folder
        otype = parsed.ontology_id.split(".")[1]
        rdfpart.return_background_knowledge("BK/autogen" + parsed.ontology_id,
                                            otype)
Exemple #41
0
def log_graph(
    graph,
    outdir,
    filename,
    identify_self=False,
    nodecolor="tag",
    fig_size=(4, 3),
    dpi=300,
    label_node_feat=True,
    edge_vmax=None,
    args=None,
    eps=1e-6,
):
    """
    Args:
        nodecolor: the color of node, can be determined by 'label', or 'feat'. For feat, it needs to
            be one-hot'
    """
    if len(graph.edges) == 0:
        return
    import matplotlib.pyplot as plt
    plt.switch_backend("agg")
    cmap = plt.get_cmap("tab20")
    plt.switch_backend("agg")
    fig = plt.figure(figsize=fig_size, dpi=dpi)

    node_colors = []
    # edge_colors = [min(max(w, 0.0), 1.0) for (u,v,w) in Gc.edges.data('weight', default=1)]
    edge_colors = [w for (u, v, w) in graph.edges.data("weight", default=1)]

    # maximum value for node color
    vmax = 19
    # for i in graph.nodes():
    #     if nodecolor == "feat" and "feat" in graph.nodes[i]:
    #         num_classes = graph.nodes[i]["feat"].size()[0]
    #         if num_classes >= 10:
    #             cmap = plt.get_cmap("tab20")
    #             vmax = 19
    #         elif num_classes >= 8:
    #             cmap = plt.get_cmap("tab10")
    #             vmax = 9
    #         break

    feat_labels = {}
    for i in graph.nodes():
        if identify_self and "self" in graph.nodes[i]:
            node_colors.append(0)
        elif nodecolor == "tag" and "tag" in graph.nodes[i]:
            node_colors.append(graph.nodes[i]["tag"])
            feat_labels[i] = graph.nodes[i]["tag"]
        elif nodecolor == "feat" and "feat" in Gc.nodes[i]:
            # print(Gc.nodes[i]['feat'])
            feat = graph.nodes[i]["feat"].detach().numpy()
            # idx with pos val in 1D array
            feat_class = 0
            for j in range(len(feat)):
                if feat[j] == 1:
                    feat_class = j
                    break
            node_colors.append(feat_class)
            feat_labels[i] = feat_class
        else:
            node_colors.append(1)
    if not label_node_feat:
        feat_labels = None

    plt.switch_backend("agg")
    fig = plt.figure(figsize=fig_size, dpi=dpi)

    if graph.number_of_nodes() == 0:
        raise Exception("empty graph")
    if graph.number_of_edges() == 0:
        raise Exception("empty edge")
    # remove_nodes = []
    if len(graph.nodes) > 20:
        pos_layout = nx.kamada_kawai_layout(graph, weight=None)
        # pos_layout = nx.spring_layout(graph, weight=None)
    else:
        pos_layout = nx.kamada_kawai_layout(graph, weight=None)

    weights = [d for (u, v, d) in graph.edges(data="weight", default=1)]
    if edge_vmax is None:
        edge_vmax = statistics.median_high(
            [d for (u, v, d) in graph.edges(data="weight", default=1)])
    min_color = min([d for (u, v, d) in graph.edges(data="weight", default=1)])
    # color range: gray to black
    edge_vmin = 2 * min_color - edge_vmax
    print(edge_vmin)
    print(edge_vmax)
    print(edge_colors)
    nx.draw(
        graph,
        pos=pos_layout,
        with_labels=False,
        font_size=4,
        labels=feat_labels,
        node_color=node_colors,
        vmin=0,
        vmax=vmax,
        cmap=cmap,
        edge_color=edge_colors,
        edge_cmap=plt.get_cmap("Greys"),
        edge_vmin=edge_vmin - eps,
        edge_vmax=edge_vmax,
        width=1.3,
        node_size=100,
        alpha=0.9,
    )
    fig.axes[0].xaxis.set_visible(False)
    fig.canvas.draw()

    save_path = os.path.join(outdir, filename)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    nx.write_gpickle(graph, os.path.splitext(save_path)[0] + '.gpickle')
    plt.savefig(save_path, format="pdf")
def main():
    parser = argparse.ArgumentParser(
        description=
        'maps a given document-author-contribution file to a weighted bipartite network of document and author nodes'
    )
    parser.add_argument(
        '--contribs',
        type=argparse.FileType('r'),
        help='path to input contribution MatrixMarket file (.mm/.mm.bz2)',
        required=True)
    parser.add_argument('--bipart-graph',
                        type=argparse.FileType('w'),
                        help='path to output graph (.graph/.graph.bz2) file',
                        required=True)
    parser.add_argument('--top-n-contribs',
                        type=int,
                        help='keep at most N highest contribs per author',
                        required=True)

    args = parser.parse_args()
    input_contribs_path = args.contribs.name
    output_bipart_graph_path = args.bipart_graph.name
    top_n_contribs = args.top_n_contribs

    logger.info('running with:\n{}'.format(
        pformat({
            'input_contribs_path': input_contribs_path,
            'output_bipart_graph_path': output_bipart_graph_path,
            'top_n_contribs': top_n_contribs
        })))

    # lade gespeicherte Beiträge
    contribs = MmCorpus(input_contribs_path)
    num_docs = contribs.num_docs
    num_authors = contribs.num_terms
    logger.info('processing contributions of {} documents, {} authors'.format(
        num_docs, num_authors))

    # erzeuge bipartites Affiliationsnetzwerk: enthält Dokumente & Autoren als Knoten, Dokument-Autor-Beiträge ergeben entsprechende gewichtete Kanten
    bipart_graph = nx.Graph()
    doc_nodes = tuple('d' + str(n) for n in range(0, num_docs))
    bipart_graph.add_nodes_from(doc_nodes, bipartite=0)
    auth_nodes = tuple('a' + str(n) for n in range(0, num_authors))
    bipart_graph.add_nodes_from(auth_nodes, bipartite=1)
    bipart_graph.add_weighted_edges_from(get_edges_from_contribs(contribs),
                                         weight='weight')
    log_nwx(bipart_graph)
    logger.info('bipartite? {}'.format(bipartite.is_bipartite(bipart_graph)))
    simplify_graph_nwx(bipart_graph)
    logger.info(
        'actual numbers after simplifying: {} docs, {} authors, {} edges'.
        format(*get_bipartite_node_counts(bipart_graph),
               len(bipart_graph.edges)))

    # gib höchsten Knotengrad eines Autoren aus
    max_degree_author = max(bipart_graph.degree(auth_nodes),
                            key=lambda node_deg: node_deg[1])
    logger.info('author {} having max degree of {}'.format(*max_degree_author))

    # aktalisiere variablen
    doc_nodes, auth_nodes = get_bipartite_nodes(bipart_graph)

    # prune die Anzahl aller inzidenten Kanten von Autoren jeweils auf die K Kanten mit den größten Gewichten
    logger.info('pruning to top {} edges per author'.format(top_n_contribs))
    for auth_node in auth_nodes:
        logger.debug('author {}'.format(auth_node))
        auth_edges = bipart_graph[auth_node]
        auth_edges = tuple((neighbor, weight['weight'])
                           for neighbor, weight in auth_edges.items())
        logger.debug('incident edges \n{}'.format(pformat(auth_edges)))
        num_remove = len(auth_edges) - top_n_contribs
        author_min_edges = nsmallest(num_remove,
                                     auth_edges,
                                     key=lambda edge: edge[1])
        logger.debug('removing edges \n{}'.format(pformat(author_min_edges)))
        bipart_graph.remove_edges_from(
            (auth_node, neighbor) for neighbor, weight in author_min_edges)

    # keep_max_edges = 10000
    # logger.info('pruning to {} highest edges'.format(keep_max_edges))
    # num_edges_to_remove = len(bipart_graph.edges) - keep_max_edges
    # min_edges = nsmallest(num_edges_to_remove, bipart_graph.edges(data='weight'), key=lambda edge: edge[2])
    # bipart_graph.remove_edges_from(min_edges)
    # log_nwx(bipart_graph)

    # gib höchsten Knotengrad eines Autoren aus
    max_degree_author = max(bipart_graph.degree(auth_nodes),
                            key=lambda node_deg: node_deg[1])
    logger.info('author {} having max degree of {}'.format(*max_degree_author))

    # entferne isolierte Knoten
    simplify_graph_nwx(bipart_graph)
    log_nwx(bipart_graph)
    logger.info('new number of documents {}, authors {}'.format(
        *get_bipartite_node_counts(bipart_graph)))

    # speichere Affiliationsnetzwerk
    logger.info('writing graph to {}'.format(output_bipart_graph_path))
    nx.write_gpickle(bipart_graph, output_bipart_graph_path)
        mng = plt.get_current_fig_manager()
        mng.resize(*mng.window.maxsize())
        plt.show()

        if len(pk.selected) > 0:
            selected(pk.data)
        else:
            selected(pk.all)

        if DG.number_of_nodes() > 0:
            fn = filter(lambda n: n.name == 0, DG.nodes())[0]
            ln = filter(lambda n: n.name == len(DG.nodes()) - 1, DG.nodes())[0]
            DG.add_weighted_edges_from([(ln, fn, ln.dist(fn))])

            nx.write_gpickle(DG, "data/mission.pkl")
    else:
        DG = nx.read_gpickle("data/mission.pkl")

        for idx, node in enumerate(sorted(DG.nodes(), key=lambda n: n.name)):
            nx.draw(DG, dict((n, n.pos) for n in DG.nodes()),
                    node_color=["g" if n.name == idx else "y" for n in DG.nodes()])
            plt.show(block=False)
            node.speed_limit = int(input("Insert speed limit: "))
            plt.close()

        # nx.draw(DG, dict((n, n.pos) for n in DG.nodes()), node_color=["g" if False else "y" for n in DG.nodes()])
        # plt.show(block=True)

        nx.write_gpickle(DG, "data/mission.pkl")
Exemple #44
0
 def save_graph(self, graph_path):
     nx.write_gpickle(self.graph, graph_path)
        seed_species=exp_data.species.sig.id_list,  # genes seed species
        all_measured_list=exp_data.species.id_list,  # all data measured
        use_biogrid=True,  # expand with biogrid
        use_hmdb=True,  # expand with hmdb
        use_reactome=True,  # expand with reactome
        use_signor=True,  # expand with signor
        trim_source_sink=True,  # remove all source and sink nodes not measured
        save_name='Data/cisplatin_based_network_new'
    )

    # Load the network, note that it is returned above but for future use
    # we will use load in
    network = nx.read_gpickle('Data/cisplatin_based_network.p')

    utils.add_data_to_graph(network, exp_data)
    print("Saving network")
    # write to GML for cytoscape or other program
    nx.write_gml(

        network,
        os.path.join(os.path.dirname(__file__), 'Data',
                     'cisplatin_network_w_attributes.gml')
    )

    # write to gpickle for fast loading in python
    nx.write_gpickle(
        network,
        os.path.join(os.path.dirname(__file__), 'Data',
                     'cisplatin_based_network.p'),
    )
Exemple #46
0
def community():
    try:

        n = getInteger('participants')  #initial participants
        m = getInteger('proposals')  #initial proposals

        initial_sentiment = getFloat('initial_sentiment')

    except Exception as err:
        return str(err), 422

    plot_name = str(n) + str(m)

    #initializer
    network, initial_supply, total_requested = initialize_network(n, m)

    proposals = get_nodes_by_type(network, 'proposal')
    participants = get_nodes_by_type(network, 'participant')
    supporters = get_edges_by_type(network, 'support')
    influencers = get_edges_by_type(network, 'influence')
    competitors = get_edges_by_type(network, 'conflict')

    nx.draw_kamada_kawai(network, nodelist=participants, edgelist=influencers)
    plt.title('Participants Social Network')
    plt.savefig('static/plot3-' + plot_name + '.png')
    plt.clf()

    nx.draw_kamada_kawai(network,
                         nodelist=proposals,
                         edgelist=competitors,
                         node_color='b')
    plt.title('Proposals Conflict Network')
    plt.savefig('static/plot4-' + plot_name + '.png')
    plt.clf()

    plt.hist([network.nodes[i]['holdings'] for i in participants])
    plt.title('Histogram of Participants Token Holdings')
    plt.savefig('static/plot5-' + plot_name + '.png')
    plt.clf()

    plt.hist([network.nodes[i]['funds_requested'] for i in proposals])
    plt.title('Histogram of Proposals Funds Requested')
    plt.savefig('static/plot6-' + plot_name + '.png')
    plt.clf()

    affinities = np.empty((n, m))
    for i_ind in range(n):
        for j_ind in range(m):
            i = participants[i_ind]
            j = proposals[j_ind]
            affinities[i_ind][j_ind] = network.edges[(i, j)]['affinity']

    dims = (20, 5)
    fig, ax = plt.subplots(figsize=dims)

    sns.heatmap(affinities.T,
                xticklabels=participants,
                yticklabels=proposals,
                square=True,
                cbar=True,
                ax=ax)

    plt.title('affinities between participants and proposals')
    plt.ylabel('proposal_id')
    plt.xlabel('participant_id')
    plt.savefig('static/plot7-' + plot_name + '.png')
    plt.clf()

    nx.write_gpickle(network, 'static/network.gpickle')

    return jsonify({
        # inputs
        'participants':
        m,
        'proposals':
        n,
        'initial_sentiment':
        initial_sentiment,
        # outputs
        'initial_supply':
        initial_supply,
        'results': [
            'plot3-' + plot_name + '.png',
            'plot4-' + plot_name + '.png',
            'plot5-' + plot_name + '.png',
            'plot6-' + plot_name + '.png',
            'plot7-' + plot_name + '.png',
        ],
        'network':
        jsonifyNetwork(network)
    })
def graph_path(graph, tmpdir):
    gpath = tmpdir / "graph.pkl"
    nx.write_gpickle(graph, str(gpath))
    yield gpath
Exemple #48
0
 
    return G

def transformWord(graph, start, goal):
    paths=collections.deque([ [start] ])
    extended=set()
    while len(paths)!=0:
        currentPath=paths.popleft()
        currentWord=currentPath[-1]
        if currentWord==goal:
            return currentPath
        elif currentWord in extended:
            continue
        extended.add(currentWord)
        transforms=graph[currentWord]
        for word in transforms:
            if word not in currentPath:
                #avoid loops
                paths.append(currentPath[:]+[word])
    #no transformation
    return []

print("First step")
dictionary = words2.dictionary  
graph = constructGraph(dictionary)
print("second step")
nx.write_gpickle(graph,"test2.gpickle")
print("third step")
print(transformWord(graph , 'time' , 'space'))

Exemple #49
0
                             names=["id", "module"])
    modules = list(cluster_df.groupby('module'))
    num_modules = len(modules) - 1
    G.graph['modules'] = [[] for i in range(num_modules)]
    G.graph['edges'] = [[] for i in range(num_modules)]
    G.graph['size'] = [0] * num_modules
    for i in range(0, num_modules):
        module_num = int(modules[i][0])
        ids = modules[i][1]['id']
        for n in ids:
            if n in G:
                G.node[n]['module'] = module_num
                G.graph['modules'][module_num].append(n)
        G.graph['size'][module_num] = len(G.graph['modules'][module_num])

    for i in range(0, num_modules):
        mG.add_node(i,
                    size=len(G.graph['modules'][i]),
                    genes=G.graph['modules'][module_num])

    for i in range(0, num_modules):
        for j in range(i + 1, num_modules):
            cut_size = nx.algorithms.cuts.cut_size(G, G.graph['modules'][i],
                                                   G.graph['modules'][j])
            if cut_size > 0:
                cut_size = cut_size / (
                    (G.graph['size'][i] + G.graph['size'][j]) / 2)
                mG.add_edge(i, j, weight=cut_size)

    nx.write_gpickle(mG, parsed.opickle)
Exemple #50
0
 def plot_save(self, G):
     utils.simple_plot(G, kys=['ord'], save='./data/img/{}.png'.format(G.name))
     nx.write_gpickle(G, './data/pkl/{}.pickle'.format(G.name))
def construct_graph(cpnet_csv_path, cpnet_vocab_path, output_path, prob = 0, prune=False):
    print('generating ConceptNet graph file...')

    nltk.download('stopwords', quiet=True)
    nltk_stopwords = nltk.corpus.stopwords.words('english')
    nltk_stopwords += ["like", "gone", "did", "going", "would", "could",
                       "get", "in", "up", "may", "wanter"]  # issue: mismatch with the stop words in grouding.py

    blacklist = set(["uk", "us", "take", "make", "object", "person", "people"])  # issue: mismatch with the blacklist in grouding.py

    concept2id = {}
    id2concept = {}
    with open(cpnet_vocab_path, "r", encoding="utf8") as fin:
        id2concept = [w.strip() for w in fin]
    concept2id = {w: i for i, w in enumerate(id2concept)}

    id2relation = merged_relations
    relation2id = {r: i for i, r in enumerate(id2relation)}
    # del_cpts = random.sample(range(780000), prob)
    # del_cpts_dict = np.zeros((800000,))
    # del_cpts_dict[del_cpts] = 1
    graph = nx.MultiDiGraph()
    nrow = sum(1 for _ in open(cpnet_csv_path, 'r', encoding='utf-8'))
    with open(cpnet_csv_path, "r", encoding="utf8") as fin:

        def not_save(cpt):
            # if cpt in blacklist or del_cpts_dict[concept2id[cpt]] == 1:
            if cpt in blacklist:
                return True
            '''originally phrases like "branch out" would not be kept in the graph'''
            # for t in cpt.split("_"):
            #     if t in nltk_stopwords:
            #         return True
            return False

        attrs = set()
        i = 0
        for line in tqdm(fin, total=nrow):
            ls = line.strip().split('\t')
            rel = relation2id[ls[0]]
            subj = concept2id[ls[1]]
            obj = concept2id[ls[2]]
            weight = float(ls[3])
            if prune and (not_save(ls[1]) or not_save(ls[2]) or id2relation[rel] == "hascontext"):
                continue
            # if id2relation[rel] == "relatedto" or id2relation[rel] == "antonym":
            # weight -= 0.3
            # continue
            if subj == obj:  # delete loops
                continue
            # weight = 1 + float(math.exp(1 - weight))  # issue: ???
            # if prune and i<num_changes:
            #     p = random.random()
            #     if p<0.5:
            #         rel = random.choice(list(range(len(relation2id))))
            #         i = i+1

            if (subj, obj, rel) not in attrs:
                # p = random.random()
                # if p<prob:
                #     i = i+1
                #     continue
                #     rel = random.choice(list(range(len(relation2id))))
                graph.add_edge(subj, obj, rel=rel, weight=weight)
                attrs.add((subj, obj, rel))
                graph.add_edge(obj, subj, rel=(rel + len(relation2id)), weight=weight)
                attrs.add((obj, subj, (rel + len(relation2id))))
    print(i, " perturbations done")
    nx.write_gpickle(graph, output_path)
    print(f"graph file saved to {output_path}")
    print()
Exemple #52
0
	def kfold_validation(self, k=10):

		available_ram = psutil.virtual_memory()[1]
		available_ram = int(int(available_ram) * .9 * 1e-9)

		if available_ram > 5:
			jvm.start(max_heap_size='5g')
		else:
			jvm.start(max_heap_size=str(available_ram)+'g')

		jvm.start()

		###

		print('\nCaricando '+self.input_file+' con opts -f'+str(self.features_number)+' -c'+self.classifier_name+'\n')
		# load .arff file
		dataset = arff.load(open(self.input_file, 'r'))

		#data = np.array(dataset['data'], dtype=object)
		data = np.array(dataset['data'])
		self.features_names = [x[0] for x in dataset['attributes']]

		self.attributes_number = data.shape[1]
		self.dataset_features_number = self.attributes_number - self.levels_number

		# Factorization of Nominal features_index
		features_encoder = OrdinalEncoder()
		nominal_features_index = [i for i in range(len(dataset['attributes'][:-self.levels_number])) if dataset['attributes'][i][1] != u'NUMERIC']
		if len(nominal_features_index) > 0:
			data[:, nominal_features_index] = features_encoder.fit_transform(data[:, nominal_features_index])

		self.labels_encoders = []

		for i in range(self.levels_number):
			self.labels_encoders.append(LabelEncoder())
			self.labels_encoders[-1].fit(data[:, self.dataset_features_number + i])

		classifiers_per_fold = []
		oracles_per_fold = []
		predictions_per_fold = []
		probabilities_per_fold = []
		predictions_per_fold_all = []

		print('\n***\nStart testing with '+str(k)+'Fold cross-validation -f'+str(self.features_number)+' -c'+self.classifier_name+'\n***\n')

		skf = StratifiedKFold(n_splits=k, shuffle=True)
		fold_cnt = 1

		#for train_index, test_index in skf.split(data, np.array(data[:,self.attributes_number-1], dtype=int)):
		for train_index, test_index in skf.split(data, data[:,self.attributes_number-1]):
			print(fold_cnt)
			fold_cnt += 1
			self.classifiers = []

			self.training_set = data[train_index, :self.dataset_features_number]
			self.testing_set = data[test_index, :self.dataset_features_number]
			self.ground_truth = data[train_index, self.dataset_features_number:]
			self.oracle = data[test_index, self.dataset_features_number:]
			
			self.prediction = np.ndarray(shape=[len(test_index),self.levels_number],dtype='<U24')		# Hard Output
			self.probability = np.ndarray(shape=[len(test_index),self.levels_number],dtype=object)		# Soft Output
			self.prediction_all = np.ndarray(shape=[len(test_index),self.levels_number],dtype='<U24')

			root = Tree()

			root.train_index = [i for i in range(self.training_set.shape[0])]
			root.test_index = [i for i in range(self.testing_set.shape[0])]
			root.test_index_all = root.test_index
			root.children_tags = list(set(self.ground_truth[root.train_index, root.level]))
			root.children_number = len(root.children_tags)

			root.encoder = LabelEncoder()
			root.encoder.fit(self.ground_truth[root.train_index, root.level])

			if self.has_config and root.tag + '_' + str(root.level + 1) in self.config:
				if 'f' in self.config[root.tag + '_' + str(root.level + 1)]:
					root.features_number = self.config[root.tag + '_' + str(root.level + 1)]['f']
				elif 'p' in self.config[root.tag + '_' + str(root.level + 1)]:
					root.packets_number = self.config[root.tag + '_' + str(root.level + 1)]['p']
				root.classifier_name = self.config[root.tag + '_' + str(root.level + 1)]['c']

				print('\nconfig','tag',root.tag,'level',root.level,'f',root.features_number,'c',root.classifier_name,'train_test_len',len(root.train_index),len(root.test_index))
			else:
				root.features_number = self.features_number
				root.packets_number = self.packets_number
				root.classifier_name = self.classifier_name

				print('\nconfig','tag',root.tag,'level',root.level,'f',root.features_number,'c',root.classifier_name,'train_test_len',len(root.train_index),len(root.test_index))

			self.classifiers.append(root)

			if root.children_number > 1:

				classifier_to_call = getattr(self, supported_classifiers[root.classifier_name])
				classifier_to_call(node=root)
				
			else:

				self.unary_class_results_inferring(root)

			# Creating hierarchy recursively
			if root.level < self.levels_number-1 and root.children_number > 0:
				self.recursive(root)

			classifiers_per_fold.append(self.classifiers)

			oracles_per_fold.append(self.oracle)
			predictions_per_fold.append(self.prediction)
			probabilities_per_fold.append(self.probability)
			predictions_per_fold_all.append(self.prediction_all)

		folder_discr = self.classifier_name

		if self.has_config:
			folder_discr = self.config_name

		material_folder = './data_'+folder_discr+'/material/'

		if not os.path.exists('./data_'+folder_discr):
			os.makedirs('./data_'+folder_discr)
			os.makedirs(material_folder)
		elif not os.path.exists(material_folder):
			os.makedirs(material_folder)

		type_discr = 'flow'
		feat_discr = '_f_' + str(self.features_number)
		work_discr = '_w_' + str(self.workers_number)

		if not self.has_config and self.packets_number != 0:
			type_discr = 'early'
			feat_discr = '_p_' + str(self.packets_number)
		elif self.has_config:
			if 'p' in self.config:
				type_discr = 'early'
			feat_discr = '_c_' + self.config_name

		if self.has_config and self.classifier_name:
			if self.features_number != 0:
				feat_discr = '_f_' + str(self.features_number) + feat_discr + '_' + self.classifier_name
			if self.packets_number != 0:
				feat_discr = '_p_' + str(self.packets_number) + feat_discr + '_' + self.classifier_name

		material_features_folder = './data_'+folder_discr+'/material/features/'
		material_train_durations_folder = './data_'+folder_discr+'/material/train_durations/'

		if not os.path.exists(material_folder):
			os.makedirs(material_folder)
			os.makedirs(material_features_folder)
			os.makedirs(material_train_durations_folder)
		if not os.path.exists(material_features_folder):
			os.makedirs(material_features_folder)
		if not os.path.exists(material_train_durations_folder):
			os.makedirs(material_train_durations_folder)

		for i in range(self.levels_number):

			file = open(material_folder + 'multi_' + type_discr + '_level_' + str(i+1) + work_discr + feat_discr + '.dat', 'w+')
			file.close()

			for j in range(k):

				file = open(material_folder + 'multi_' + type_discr + '_level_' + str(i+1) + work_discr + feat_discr + '.dat', 'a')

				file.write('@fold\n')
				for o, p in zip(oracles_per_fold[j][:,i], predictions_per_fold[j][:,i]):
					file.write(str(o)+' '+str(p)+'\n')

				file.close()

		# Inferring NW metrics per classifier

		for classifier in classifiers_per_fold[0]:

			file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '.dat', 'w+')
			file.close()
			file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_all.dat', 'w+')
			file.close()
			file = open(material_features_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_features.dat', 'w+')
			file.close()
			file = open(material_train_durations_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_test_durations.dat', 'w+')
			file.close()
			
		file = open(material_train_durations_folder + 'multi_' + type_discr + work_discr + feat_discr + '_test_durations.dat', 'w+')
		file.close()

		for fold_n, classifiers in enumerate(classifiers_per_fold):

			for classifier in classifiers:

				file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '.dat', 'a')

				if classifier.level > 0:
					index = []

					for pred_n, prediction in enumerate(predictions_per_fold[fold_n][classifier.test_index, classifier.level-1]):
						if prediction == oracles_per_fold[fold_n][classifier.test_index[pred_n], classifier.level-1]:
							index.append(classifier.test_index[pred_n])

					prediction_nw = predictions_per_fold[fold_n][index, classifier.level]
					oracle_nw = oracles_per_fold[fold_n][index, classifier.level]
				else:
					prediction_nw = predictions_per_fold[fold_n][classifier.test_index, classifier.level]
					oracle_nw = oracles_per_fold[fold_n][classifier.test_index, classifier.level]

				file.write('@fold\n')
				for o, p in zip(oracle_nw, prediction_nw):
						file.write(str(o)+' '+str(p)+'\n')

				file.close()

				file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_all.dat', 'a')

				prediction_all = predictions_per_fold_all[fold_n][classifier.test_index_all, classifier.level]
				oracle_all = oracles_per_fold[fold_n][classifier.test_index_all, classifier.level]

				file.write('@fold\n')
				for o, p in zip(oracle_all, prediction_all):
						file.write(str(o)+' '+str(p)+'\n')

				file.close()

				file = open(material_features_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_features.dat', 'a')

				file.write('@fold\n')
				file.write(self.features_names[classifier.features_index[0]])

				for feature_index in classifier.features_index[1:]:
					file.write(','+self.features_names[feature_index])

				file.write('\n')

				file.close()
				
				file = open(material_train_durations_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_test_durations.dat', 'a')

				file.write('%.6f\n' % (classifier.test_duration))

				file.close()

		# Retrieve train_durations for each classifier
		test_durations_per_fold = []
		
		for classifiers in classifiers_per_fold:
			test_durations_per_fold.append([])
			for classifier in classifiers:
				test_durations_per_fold[-1].append(classifier.test_duration)

		file = open(material_train_durations_folder + 'multi_' + type_discr + work_discr + feat_discr + '_test_durations.dat', 'w+')

		mean_parallel_test_duration = np.mean(np.max(test_durations_per_fold, axis=1))
		std_parallel_test_duration = np.std(np.max(test_durations_per_fold, axis=1))

		mean_sequential_test_duration = np.mean(np.sum(test_durations_per_fold, axis=1))
		std_sequential_test_duration = np.std(np.sum(test_durations_per_fold, axis=1))
	 
		file.write('mean_par,std_par,mean_seq,std_seq\n')
		file.write('%.6f,%.6f,%.6f,%.6f\n' % (mean_parallel_test_duration,std_parallel_test_duration,mean_sequential_test_duration,std_sequential_test_duration))

		file.close()

		graph_folder = './data_'+folder_discr+'/graph/'

		if not os.path.exists('./data_'+folder_discr):
			os.makedirs('./data_'+folder_discr)
			os.makedirs(graph_folder)
		elif not os.path.exists(graph_folder):
			os.makedirs(graph_folder)

		# Graph plot
		G = nx.DiGraph()
		for info in classifiers_per_fold[0]:
			G.add_node(str(info.level)+' '+info.tag, level=info.level,
						 tag=info.tag, children_tags=info.children_tags)
		for node_parent, data_parent in G.nodes.items():
			for node_child, data_child in G.nodes.items():
				if data_child['level']-data_parent['level'] == 1 and any(data_child['tag'] in s for s in data_parent['children_tags']):
					G.add_edge(node_parent, node_child)
		nx.write_gpickle(G, graph_folder+'multi_' + type_discr + feat_discr +'_graph.gml')

		print('\n***\nStart testing with incremental gamma threshold\n***\n')

		thresholds_number = 9

		oracle_gamma = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=object)
		prediction_gamma = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=object)
		classified_ratio = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=float)

		for i in range(thresholds_number):
			gamma = float(i+1)/10.0

			for j in range(k):

				indexes = []

				for l in range(levels_number):

					for index, p in enumerate(probabilities_per_fold[j][:, l]):
						if max(p) < gamma:
							indexes.append(index)

					new_oracle = np.delete(oracles_per_fold[j][:, l], [indexes])
					new_prediction = np.delete(predictions_per_fold[j][:, l], [indexes])

					oracle_gamma[l, i, j] = new_oracle
					prediction_gamma[l, i, j] = new_prediction
					classified_ratio[l, i, j] = float(len(new_prediction))/float(len(predictions_per_fold[j][:, l]))

		for i in range(thresholds_number):

			for l in range(levels_number):

				file = open(material_folder + 'multi_' + type_discr + '_level_' + str(l) + work_discr + feat_discr + '_gamma_'+str(float(i+1)/10.0)+'.dat', 'w+')

				for j in range(k):
					file.write('@fold_cr\n')
					file.write(str(classified_ratio[l, i, j])+'\n')
					for o, p in zip(oracle_gamma[l, i, j], prediction_gamma[l, i, j]):
						file.write(str(o)+' '+str(p)+'\n')

				file.close()

		###

		jvm.stop()
def cmat(
    track_file,
    roi_file,
    resolution_network_file,
    matrix_name,
    matrix_mat_name,
    endpoint_name,
    intersections=False,
):
    """ Create the connection matrix for each resolution using fibers and ROIs. """
    import scipy.io as sio

    stats = {}
    iflogger.info("Running cmat function")
    # Identify the endpoints of each fiber
    en_fname = op.abspath(endpoint_name + "_endpoints.npy")
    en_fnamemm = op.abspath(endpoint_name + "_endpointsmm.npy")

    iflogger.info("Reading Trackvis file %s", track_file)
    fib, hdr = nb.trackvis.read(track_file, False)
    stats["orig_n_fib"] = len(fib)

    roi = nb.load(roi_file)
    # Preserve on-disk type unless scaled
    roiData = np.asanyarray(roi.dataobj)
    roiVoxelSize = roi.header.get_zooms()
    (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize)

    # Output endpoint arrays
    iflogger.info("Saving endpoint array: %s", en_fname)
    np.save(en_fname, endpoints)
    iflogger.info("Saving endpoint array in mm: %s", en_fnamemm)
    np.save(en_fnamemm, endpointsmm)

    n = len(fib)
    iflogger.info("Number of fibers: %i", n)

    # Create empty fiber label array
    fiberlabels = np.zeros((n, 2))
    final_fiberlabels = []
    final_fibers_idx = []

    # Add node information from specified parcellation scheme
    path, name, ext = split_filename(resolution_network_file)
    if ext == ".pck":
        gp = nx.read_gpickle(resolution_network_file)
    elif ext == ".graphml":
        gp = nx.read_graphml(resolution_network_file)
    else:
        raise TypeError("Unable to read file:", resolution_network_file)
    nROIs = len(gp.nodes())

    # add node information from parcellation
    if "dn_position" in gp.nodes[list(gp.nodes())[0]]:
        G = gp.copy()
    else:
        G = nx.Graph()
        for u, d in gp.nodes(data=True):
            G.add_node(int(u), **d)
            # compute a position for the node based on the mean position of the
            # ROI in voxel coordinates (segmentation volume )
            xyz = tuple(
                np.mean(
                    np.where(np.flipud(roiData) == int(d["dn_correspondence_id"])),
                    axis=1,
                )
            )
            G.nodes[int(u)]["dn_position"] = tuple([xyz[0], xyz[2], -xyz[1]])

    if intersections:
        iflogger.info("Filtering tractography from intersections")
        intersection_matrix, final_fiber_ids = create_allpoints_cmat(
            fib, roiData, roiVoxelSize, nROIs
        )
        finalfibers_fname = op.abspath(
            endpoint_name + "_intersections_streamline_final.trk"
        )
        stats["intersections_n_fib"] = save_fibers(
            hdr, fib, finalfibers_fname, final_fiber_ids
        )
        intersection_matrix = np.matrix(intersection_matrix)
        I = G.copy()
        H = nx.from_numpy_matrix(np.matrix(intersection_matrix))
        H = nx.relabel_nodes(H, lambda x: x + 1)  # relabel nodes so they start at 1
        I.add_weighted_edges_from(
            ((u, v, d["weight"]) for u, v, d in H.edges(data=True))
        )

    dis = 0
    for i in range(endpoints.shape[0]):

        # ROI start => ROI end
        try:
            startROI = int(
                roiData[endpoints[i, 0, 0], endpoints[i, 0, 1], endpoints[i, 0, 2]]
            )
            endROI = int(
                roiData[endpoints[i, 1, 0], endpoints[i, 1, 1], endpoints[i, 1, 2]]
            )
        except IndexError:
            iflogger.error(
                "AN INDEXERROR EXCEPTION OCCURED FOR FIBER %s. "
                "PLEASE CHECK ENDPOINT GENERATION",
                i,
            )
            break

        # Filter
        if startROI == 0 or endROI == 0:
            dis += 1
            fiberlabels[i, 0] = -1
            continue

        if startROI > nROIs or endROI > nROIs:
            iflogger.error(
                "Start or endpoint of fiber terminate in a voxel which is labeled higher"
            )
            iflogger.error("than is expected by the parcellation node information.")
            iflogger.error("Start ROI: %i, End ROI: %i", startROI, endROI)
            iflogger.error("This needs bugfixing!")
            continue

        # Update fiber label
        # switch the rois in order to enforce startROI < endROI
        if endROI < startROI:
            tmp = startROI
            startROI = endROI
            endROI = tmp

        fiberlabels[i, 0] = startROI
        fiberlabels[i, 1] = endROI

        final_fiberlabels.append([startROI, endROI])
        final_fibers_idx.append(i)

        # Add edge to graph
        if G.has_edge(startROI, endROI) and "fiblist" in G.edge[startROI][endROI]:
            G.edge[startROI][endROI]["fiblist"].append(i)
        else:
            G.add_edge(startROI, endROI, fiblist=[i])

    # create a final fiber length array
    finalfiberlength = []
    if intersections:
        final_fibers_indices = final_fiber_ids
    else:
        final_fibers_indices = final_fibers_idx

    for idx in final_fibers_indices:
        # compute length of fiber
        finalfiberlength.append(length(fib[idx][0]))

    # convert to array
    final_fiberlength_array = np.array(finalfiberlength)

    # make final fiber labels as array
    final_fiberlabels_array = np.array(final_fiberlabels, dtype=int)

    iflogger.info(
        "Found %i (%f percent out of %i fibers) fibers that start or "
        "terminate in a voxel which is not labeled. (orphans)",
        dis,
        dis * 100.0 / n,
        n,
    )
    iflogger.info("Valid fibers: %i (%f%%)", n - dis, 100 - dis * 100.0 / n)

    numfib = nx.Graph()
    numfib.add_nodes_from(G)
    fibmean = numfib.copy()
    fibmedian = numfib.copy()
    fibdev = numfib.copy()
    for u, v, d in G.edges(data=True):
        G.remove_edge(u, v)
        di = {}
        if "fiblist" in d:
            di["number_of_fibers"] = len(d["fiblist"])
            idx = np.where(
                (final_fiberlabels_array[:, 0] == int(u))
                & (final_fiberlabels_array[:, 1] == int(v))
            )[0]
            di["fiber_length_mean"] = float(np.mean(final_fiberlength_array[idx]))
            di["fiber_length_median"] = float(np.median(final_fiberlength_array[idx]))
            di["fiber_length_std"] = float(np.std(final_fiberlength_array[idx]))
        else:
            di["number_of_fibers"] = 0
            di["fiber_length_mean"] = 0
            di["fiber_length_median"] = 0
            di["fiber_length_std"] = 0
        if not u == v:  # Fix for self loop problem
            G.add_edge(u, v, **di)
            if "fiblist" in d:
                numfib.add_edge(u, v, weight=di["number_of_fibers"])
                fibmean.add_edge(u, v, weight=di["fiber_length_mean"])
                fibmedian.add_edge(u, v, weight=di["fiber_length_median"])
                fibdev.add_edge(u, v, weight=di["fiber_length_std"])

    iflogger.info("Writing network as %s", matrix_name)
    nx.write_gpickle(G, op.abspath(matrix_name))

    numfib_mlab = nx.to_numpy_matrix(numfib, dtype=int)
    numfib_dict = {"number_of_fibers": numfib_mlab}
    fibmean_mlab = nx.to_numpy_matrix(fibmean, dtype=np.float64)
    fibmean_dict = {"mean_fiber_length": fibmean_mlab}
    fibmedian_mlab = nx.to_numpy_matrix(fibmedian, dtype=np.float64)
    fibmedian_dict = {"median_fiber_length": fibmedian_mlab}
    fibdev_mlab = nx.to_numpy_matrix(fibdev, dtype=np.float64)
    fibdev_dict = {"fiber_length_std": fibdev_mlab}

    if intersections:
        path, name, ext = split_filename(matrix_name)
        intersection_matrix_name = op.abspath(name + "_intersections") + ext
        iflogger.info("Writing intersection network as %s", intersection_matrix_name)
        nx.write_gpickle(I, intersection_matrix_name)

    path, name, ext = split_filename(matrix_mat_name)
    if not ext == ".mat":
        ext = ".mat"
        matrix_mat_name = matrix_mat_name + ext

    iflogger.info("Writing matlab matrix as %s", matrix_mat_name)
    sio.savemat(matrix_mat_name, numfib_dict)

    if intersections:
        intersect_dict = {"intersections": intersection_matrix}
        intersection_matrix_mat_name = op.abspath(name + "_intersections") + ext
        iflogger.info("Writing intersection matrix as %s", intersection_matrix_mat_name)
        sio.savemat(intersection_matrix_mat_name, intersect_dict)

    mean_fiber_length_matrix_name = op.abspath(name + "_mean_fiber_length") + ext
    iflogger.info(
        "Writing matlab mean fiber length matrix as %s", mean_fiber_length_matrix_name
    )
    sio.savemat(mean_fiber_length_matrix_name, fibmean_dict)

    median_fiber_length_matrix_name = op.abspath(name + "_median_fiber_length") + ext
    iflogger.info(
        "Writing matlab median fiber length matrix as %s",
        median_fiber_length_matrix_name,
    )
    sio.savemat(median_fiber_length_matrix_name, fibmedian_dict)

    fiber_length_std_matrix_name = op.abspath(name + "_fiber_length_std") + ext
    iflogger.info(
        "Writing matlab fiber length deviation matrix as %s",
        fiber_length_std_matrix_name,
    )
    sio.savemat(fiber_length_std_matrix_name, fibdev_dict)

    fiberlengths_fname = op.abspath(endpoint_name + "_final_fiberslength.npy")
    iflogger.info("Storing final fiber length array as %s", fiberlengths_fname)
    np.save(fiberlengths_fname, final_fiberlength_array)

    fiberlabels_fname = op.abspath(endpoint_name + "_filtered_fiberslabel.npy")
    iflogger.info("Storing all fiber labels (with orphans) as %s", fiberlabels_fname)
    np.save(fiberlabels_fname, np.array(fiberlabels, dtype=np.int32))

    fiberlabels_noorphans_fname = op.abspath(endpoint_name + "_final_fiberslabels.npy")
    iflogger.info(
        "Storing final fiber labels (no orphans) as %s", fiberlabels_noorphans_fname
    )
    np.save(fiberlabels_noorphans_fname, final_fiberlabels_array)

    iflogger.info("Filtering tractography - keeping only no orphan fibers")
    finalfibers_fname = op.abspath(endpoint_name + "_streamline_final.trk")
    stats["endpoint_n_fib"] = save_fibers(hdr, fib, finalfibers_fname, final_fibers_idx)
    stats["endpoints_percent"] = (
        float(stats["endpoint_n_fib"]) / float(stats["orig_n_fib"]) * 100
    )
    stats["intersections_percent"] = (
        float(stats["intersections_n_fib"]) / float(stats["orig_n_fib"]) * 100
    )

    out_stats_file = op.abspath(endpoint_name + "_statistics.mat")
    iflogger.info("Saving matrix creation statistics as %s", out_stats_file)
    sio.savemat(out_stats_file, stats)
Exemple #54
0
def add_travel_time_dir(graph_dir,
                        mask_dir,
                        conv_dict,
                        graph_dir_out,
                        min_z=128,
                        dx=4,
                        dy=4,
                        percentile=90,
                        use_totband=True,
                        use_weighted_mean=True,
                        variable_edge_speed=False,
                        mask_prefix='',
                        save_shapefiles=True,
                        verbose=False):
    '''Update graph properties to include travel time for entire directory'''
    pickle_protocol = 4  # 4 is most recent, python 2.7 can't read 4

    logger.info("Updating graph properties to include travel time")
    logger.info("  Writing to: " + str(graph_dir_out))
    os.makedirs(graph_dir_out, exist_ok=True)

    image_names = sorted(
        [z for z in os.listdir(mask_dir) if z.endswith('.tif')])
    for i, image_name in enumerate(image_names):
        im_root = image_name.split('.')[0]
        if len(mask_prefix) > 0:
            im_root = im_root.split(mask_prefix)[-1]
        out_file = os.path.join(graph_dir_out, im_root + '.gpickle')

        if (i % 1) == 0:
            logger.info("\n" + str(i + 1) + " / " + str(len(image_names)) +
                        " " + image_name + " " + im_root)
        mask_path = os.path.join(mask_dir, image_name)
        graph_path = os.path.join(graph_dir, im_root + '.gpickle')

        if not os.path.exists(graph_path):
            logger.info("  ", i, "DNE, skipping: " + str(graph_path))
            return
            # continue

        mask = skimage.io.imread(mask_path)
        G_raw = nx.read_gpickle(graph_path)

        # see if it's empty
        if len(G_raw.nodes()) == 0:
            nx.write_gpickle(G_raw, out_file, protocol=pickle_protocol)
            continue

        G = infer_travel_time(G_raw,
                              mask,
                              conv_dict,
                              min_z=min_z,
                              dx=dx,
                              dy=dy,
                              percentile=percentile,
                              use_totband=use_totband,
                              use_weighted_mean=use_weighted_mean,
                              variable_edge_speed=variable_edge_speed,
                              verbose=verbose)
        G = G.to_undirected()
        nx.write_gpickle(G, out_file, protocol=pickle_protocol)
    return
Exemple #55
0
def main():
    import os
    
    p = 0.7
    delta = 1
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--type', choices=all_graph_types,
                        help='graph type')
    parser.add_argument('-s', '--size', type=int,
                        default=0,
                        help="size of graph")
    parser.add_argument('-e', '--size_exponent', type=int,
                        default=1,
                        help="exponent of the size")
    parser.add_argument('-b', '--exponent_base', type=int,
                        default=10,
                        help="base of the size exponent")
    parser.add_argument('-n', '--n_rounds', type=int,
                        default=100,
                        help="number of simulated cascades")

    args = parser.parse_args()
    gtype = args.type
    if args.size:
        size = args.size
        output_dir = 'data/{}/{}'.format(gtype, size)
    else:
        size = args.exponent_base ** args.size_exponent
        output_dir = 'data/{}/{}-{}'.format(gtype, args.exponent_base,
                                            args.size_exponent)
    if gtype == KRONECKER_HIER:
        g = gen_kronecker(P=P_hier, k=args.size_exponent, n_edges=2**args.size_exponent * 3)
    elif gtype == KRONECKER_PERI:
        g = gen_kronecker(P=P_peri, k=args.size_exponent, n_edges=2**args.size_exponent * 3)
    elif gtype == KRONECKER_RAND:
        g = gen_kronecker(P=P_rand, k=args.size_exponent, n_edges=2**args.size_exponent * 3)
    elif gtype == PL_TREE:
        p = 0.88
        g = random_powerlaw_tree(size, tries=999999)
    elif gtype == B_TREE:
        g = nx.balanced_tree(args.exponent_base, args.size_exponent-1)
    elif gtype == ER:
        g = extract_larges_CC(nx.fast_gnp_random_graph(size, 0.1))
    elif gtype == BARABASI:
        g = extract_larges_CC(nx.barabasi_albert_graph(size, 5))
    elif gtype == GRID:
        g = grid_2d(int(np.sqrt(size)))
    elif gtype == CLIQUE:
        g = nx.complete_graph(size)
    elif gtype == LINE:
        g = nx.path_graph(size)
    else:
        raise ValueError('unsupported graph type {}'.format(gtype))

    g.remove_edges_from(g.selfloop_edges())
    print('|V|={}, |E|={}'.format(g.number_of_nodes(), g.number_of_edges()))

    if gtype == GRID:
        mapping = {(i, j): int(np.sqrt(size)) * i + j for i, j in g.nodes_iter()}
        g = nx.relabel_nodes(g, mapping)
    else:
        g = nx.convert_node_labels_to_integers(g)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print('graph type: {}'.format(gtype))
    # g = add_p_and_delta(g, p, delta)
    output_path = '{}/graph.graphml'.format(output_dir, gtype)
    print('saving to {}'.format(output_path))
    nx.write_graphml(g, output_path)
    nx.write_gpickle(g, '{}/graph.gpkl'.format(output_dir, gtype))

    if False:
        pkl.dump(time_probas,
                 open('{}/{}.pkl'.format(output_dir, INF_TIME_PROBA_FILE), 'wb'))

        pkl.dump(node2id,
                 open('{}/{}.pkl'.format(output_dir, NODE2ID_FILE), 'wb'))
        pkl.dump(id2node,
                 open('{}/{}.pkl'.format(output_dir, ID2NODE_FILE), 'wb'))
Exemple #56
0
        syns = wn.synsets(term)

        for syn_obj in syns:
            # extracts the text value from the syn object
            syn = syn_obj.name().split('.')[0]

            # We have not seen this syn yet
            if syn not in syn_dict.token2id:
                # add syn term to dictionary
                syn_dict.add_documents([[syn]])

                # add syn node to graph
                pickled_graph.add_node(node_count,
                                       type='SYN',
                                       term_id=syn_dict.token2id[syn],
                                       freq_per_doc=-1,
                                       vector_ind=-1)
                syn_to_node_map[syn_dict.token2id[syn]] = node_count

                # Keep track of values
                node_count += 1

            if syn not in syns_per_term[term]:
                syns_per_term[term].add(syn)

                pickled_graph.add_edge(term_ind,
                                       syn_to_node_map[syn_dict.token2id[syn]],
                                       attr_dict={'weight': 0.5})

nx.write_gpickle(pickled_graph, "final_network.gpickle")
            dp["et"] = dp["et_new"]
            dp["rel_err"] = dp["new_rel_err"]
            again = True
            base_graph = g_c
            break
        else:
            print("k updated to {}".format(k))
            k = 1 + (k - 1) * .75
            if k < 1.0001:
                break   


# In[ ]:


nx.write_gpickle(base_graph,"data/taxi_graphs/base_graph_hour_{}.pkl".format(HOUR))


# In[ ]:


for e, info in base_graph.edges.items():
    print(info)
    print("speed is {}".format(info['dist'] / (info['weight'] / 3600.)))
    break


# In[ ]:


#speeds = pd.Series([info["dist"] / (info['weight'] / 3600.) for info in final_graph.edges.values()])
Exemple #58
0
def exportaspickle(ps,name):
    nx.write_gpickle(ps,name)
Exemple #59
0
    json_data = json.loads(data)
    screen_names_to_user_ids.append((sn, json_data['id']))

g = nx.Graph()
ids_of_interest = [str(user_id) for (screen_name, user_id) in
                   screen_names_to_user_ids]
for (screen_name, user_id) in screen_names_to_user_ids:
    print >> sys.stderr, 'Processing', screen_name

    try:
        friend_ids = list(r.smembers(getRedisIdByScreenName(screen_name,
                          'friend_ids')))
        print >> sys.stderr, "Adding edge: %s, %s" % (str(user_id), str(friend_id))
        [g.add_edge(user_id, friend_id) for friend_id in friend_ids if friend_id
         in ids_of_interest]
    except Exception, e:
        print >> sys.stderr, 'No friend information available. Skipping', screen_name

# store graph to disk by pickling it...

if not os.path.isdir('out'):
    os.mkdir('out')

filename = os.path.join('out', DB + '.gpickle')
nx.write_gpickle(g, filename)

print >> sys.stderr, 'Pickle file stored in: %s' % filename

# you can read it back out like this...
# g = nx.read_gpickle(os.path.join('out', DB + '.gpickle'))
Exemple #60
0
 def write_p_net(self, w_path):
     nx.write_gpickle(self.p_graph, w_path)