Exemple #1
0
def createGraph(semantic_relationships, graph_file=None, edges_weight=None):
    '''
    Build the supervised graph G=(V, E) using networkx library, where V is the set of synsets and E the set of semantic
    correlation between synsets (vertexes)
    :param semantic_relationships: the relationship from which build the graph
    :param graph_file: where to save the file (to avoid creating the graph each time)
    :return: return the graph
    '''

    if graph_file is not None and os.path.isfile(graph_file):
        return nx.read_multiline_adjlist(graph_file)

    G = nx.Graph()

    keys = list(semantic_relationships.keys())

    for lemma in tqdm.tqdm(semantic_relationships.keys()):
        G.add_node(lemma)
        for relationship, nodes in semantic_relationships[lemma].items():
            for node in nodes:
                if node in keys:
                    G.add_edge(lemma, node, v=relationship, weight=1.0)

    if edges_weight is not None:
        G.add_weighted_edges_from(edges_weight)

    if graph_file is not None:
        nx.write_multiline_adjlist(G, graph_file)

    return G
Exemple #2
0
def GENERATE_MUL_NETWORK(NETWORK_DICT, RANDOM_DICT, N_LAYERS, path):

    #这个功能可以暂时封存了,不要也罢

    MULTI_NETWORK = list()
    for value in NETWORK_DICT.values():
        MULTI_NETWORK.append(
            inf_network(False, None, value['name'], **value['arg']))

    for i in range(N_LAYERS):
        MULTI_NETWORK[i].layer = i

    for i in range(N_LAYERS):
        MULTI_NETWORK[i].change_weight(RANDOM_DICT[str(i + 1)]['fun'],
                                       **RANDOM_DICT[str(i + 1)]['arg'])

    for NETWORK in MULTI_NETWORK:
        read = open(path + '{}.nt'.format(NETWORK.layer), 'wb')
        nx.write_multiline_adjlist(NETWORK.network, read, delimiter=',')
        read.close()

    read = open(path + 'detail.txt', 'w')
    read.write("#NODES #LAYERS" + '\n')
    read.write("{0:^5d} {1:^5d}".format(MULTI_NETWORK[0].num_nodes, N_LAYERS) +
               '\n')
    read.close()

    return True
    def save_results(self, image_name, *results):
        """
        Create a directory of the following format: current pipeline + fname.
        Save and put the results of algorithm processing in the directory.

        Args:
            | *image_name* (str): image name
            | *results* (list): a list of arguments to save

        """
        # saving the processed image
        try:
            cv2.imwrite(os.path.join(self.out_dir, image_name), results[0])
        except (IOError, cv2.error):
            print('ERROR! Could not write an image file, make sure there is ' +
                  'enough free space on disk')
            sys.exit(1)
        if not self.isui:
            print('Success!', image_name, 'saved in', self.out_dir)
        # exporting graph object
        if results[1]:
            image_name = os.path.splitext(image_name)[0] + '.txt'
            nx.write_multiline_adjlist(results[1], os.path.join(self.out_dir,
                                                                image_name),
                                       delimiter='|')
            print('Success!', image_name, 'saved in', self.out_dir)
Exemple #4
0
def shortest_path(src, dst, names, cost):
    G = nx.DiGraph()
    G.add_nodes_from(names)

    indices = {}
    for i in range(len(names)):
        indices[names[i]] = i

    # Add edges from src to stations before it
    for i in range(indices[src]):
        G.add_edge(src, names[i], weight=cost(src, names[i]))
    # Add edges from stations after dst to dst
    for i in range(indices[dst] + 1, len(names)):
        G.add_edge(names[i], dst, weight=cost(names[i], dst))
    # Add reverse edges from every station after src back
    for i in range(indices[src] + 1, len(names)):
        for j in range(indices[src] + 1, i):
            G.add_edge(names[i], names[j], weight=cost(names[i], names[j]))
    # Add edges from every station before dst to
    # every station after src
    for i in range(len(names)):
        for j in range(i + 1, len(names)):
            if i >= indices[dst] or j <= indices[src]:
                continue
            G.add_edge(names[i], names[j], weight=cost(names[i], names[j]))
    nx.write_multiline_adjlist(G, "test.adjlist")
    return nx.shortest_path(G, src, dst, weight='weight')
Exemple #5
0
def shortest_path(src, dst, names, cost):
    G = nx.DiGraph()
    G.add_nodes_from(names)

    indices = {}
    for i in range(len(names)):
        indices[names[i]] = i

    # Add edges from src to stations before it
    for i in range(indices[src]):
        G.add_edge(src, names[i],
                   weight=cost(src, names[i]))
    # Add edges from stations after dst to dst
    for i in range(indices[dst] + 1, len(names)):
        G.add_edge(names[i], dst, weight=cost(names[i], dst))
    # Add reverse edges from every station after src back
    for i in range(indices[src] + 1, len(names)):
        for j in range(indices[src] + 1, i):
            G.add_edge(names[i], names[j], weight=cost(names[i], names[j]))
    # Add edges from every station before dst to
    # every station after src
    for i in range(len(names)):
        for j in range(i + 1, len(names)):
            if i >= indices[dst] or j <= indices[src]:
                continue
            G.add_edge(names[i], names[j],
                       weight=cost(names[i], names[j]))
    nx.write_multiline_adjlist(G, "test.adjlist")
    return nx.shortest_path(G, src, dst, weight='weight')
 def test_multiline_adjlist_delimiter(self):
     fh = io.BytesIO()
     G = nx.path_graph(3)
     nx.write_multiline_adjlist(G, fh, delimiter=':')
     fh.seek(0)
     H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':')
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
 def test_multiline_adjlist_delimiter(self):
     fh = io.BytesIO()
     G = nx.path_graph(3)
     nx.write_multiline_adjlist(G, fh, delimiter=":")
     fh.seek(0)
     H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=":")
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
Exemple #8
0
 def test_multiline_adjlist_delimiter(self):
     fh=io.BytesIO()
     G = nx.path_graph(3)
     nx.write_multiline_adjlist(G, fh, delimiter=':')
     fh.seek(0)
     H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':')
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
 def test_multiline_adjlist_integers(self):
     (fd, fname) = tempfile.mkstemp()
     G = nx.convert_node_labels_to_integers(self.G)
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, nodetype=int)
     H2 = nx.read_multiline_adjlist(fname, nodetype=int)
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
Exemple #10
0
 def test_multiline_adjlist_integers(self):
     (fd, fname) = tempfile.mkstemp()
     G = nx.convert_node_labels_to_integers(self.G)
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, nodetype=int)
     H2 = nx.read_multiline_adjlist(fname, nodetype=int)
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
 def test_latin1(self):
     G = nx.Graph()
     name1 = 'Bj' + chr(246) + 'rk'
     name2 = chr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
def createDocumentsGraph(train, graph_file=None, sem_rel=None, sem_graph=None):
    """

    :param train:
    :param graph_file:
    :param sem_rel:
    :param sem_graph:
    :return:
    """
    if graph_file is not None and os.path.isfile(graph_file):
        return nx.read_multiline_adjlist(graph_file)

    G = nx.DiGraph()

    if sem_graph is not None:
        G = nx.compose(G, sem_graph.to_directed())

    for doc in train.keys():
        for sentence in train[doc]:
            lemmas = [l for l in sentence if isinstance(l, utils.instance)]

            for i in range(1, len(lemmas)):

                lemma = lemmas[i]
                lemma_key = lemma.lemma + '_' + lemma.pos
                prev_lemma = lemmas[i - 1]

                prev_lemma_key = prev_lemma.lemma + '_' + prev_lemma.pos

                if i == 1:
                    G.add_node(prev_lemma_key)

                G.add_node(lemma_key)
                G.add_edge(prev_lemma_key, lemma_key)

                if sem_rel is not None:

                    for r, nodes in sem_rel[lemma.instance].items():
                        for node in nodes:
                            if node not in G.nodes:
                                continue
                            G.add_edge(lemma_key, node)

                    if i == 1:
                        for r, nodes in sem_rel[prev_lemma.instance].items():
                            for node in nodes:
                                if node not in G.nodes:
                                    continue
                                G.add_edge(prev_lemma_key, node)

    if graph_file is not None:
        nx.write_multiline_adjlist(G, graph_file)

    return G
 def test_unicode(self):
     G = nx.Graph()
     name1 = chr(2344) + chr(123) + chr(6543)
     name2 = chr(5543) + chr(1543) + chr(324)
     G.add_edge(name1, "Radiohead", **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
 def test_multiline_adjlist_digraph(self):
     G = self.DG
     (fd, fname) = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     assert H is not H2  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
Exemple #15
0
 def test_multiline_adjlist_graph(self):
     G = self.G
     (fd, fname) = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     H2 = nx.read_multiline_adjlist(fname)
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
 def test_latin1(self):
     G = nx.Graph()
     name1 = "Bj" + chr(246) + "rk"
     name2 = chr(220) + "ber"
     G.add_edge(name1, "Radiohead", **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding="latin-1")
     H = nx.read_multiline_adjlist(fname, encoding="latin-1")
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
 def test_multiline_adjlist_digraph(self):
     G = self.DG
     (fd, fname) = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
Exemple #18
0
 def test_multiline_adjlist_graph(self):
     G=self.G
     (fd,fname)=tempfile.mkstemp()
     nx.write_multiline_adjlist(G,fname)  
     H=nx.read_multiline_adjlist(fname)
     H2=nx.read_multiline_adjlist(fname)
     assert_not_equal(H,H2) # they should be different graphs
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
Exemple #19
0
def barabasi_albert(n, m):
    """Generates a barabasi albert graph given the number of nodes and the m value"""

    click.echo('generating barabasi albert model with n = {}, m = {}'.format(
        n, m))
    m0 = m + 1
    t = n - m0
    G = nx.complete_graph(m0)
    for i in range(m0, n):
        G.add_node(i)
        sumaGradosNodos = 0

        for nodo in range(0, i):
            sumaGradosNodos += G.degree(nodo)

        probConexion = {}
        gradosNodos = nx.degree(G)
        for j in range(0, i):
            probConexion[j] = (float)(gradosNodos[j]) / sumaGradosNodos

        probAcumulada = []
        aux = 0
        for idNodo, probabilidad in probConexion.items():
            nodo = (idNodo, aux + probabilidad)
            probAcumulada.append(nodo)
            aux += probabilidad

        conexiones = 0
        nodosAdded = []
        while (conexiones < m):
            num = random.random()
            actual = 0
            while (actual < i and probAcumulada[actual][1] < num):
                actual += 1

            idDestino = probAcumulada[actual][0]
            if idDestino not in nodosAdded:
                nodosAdded.append(idDestino)
                G.add_edge(i, idDestino)
                conexiones += 1

        if i < 501 and i % 25 == 0 or (i < 20 and i % 5 == 0):
            nx.write_multiline_adjlist(
                G,
                '../graphs/barabasi-albert/steps/ba_n_{}_m_{}_steps_{:03d}.csv'
                .format(n, m, i),
                delimiter=",")

    nx.write_multiline_adjlist(G,
                               '../graphs/barabasi-albert/ba_{}_{}.csv'.format(
                                   m, n),
                               delimiter=",")
    return G
Exemple #20
0
 def test_multiline_adjlist_multidigraph(self):
     G=self.XDG
     (fd,fname)=tempfile.mkstemp()
     nx.write_multiline_adjlist(G,fname)
     H=nx.read_multiline_adjlist(fname,nodetype=int,
                                 create_using=nx.MultiDiGraph())
     H2=nx.read_multiline_adjlist(fname,nodetype=int,
                                  create_using=nx.MultiDiGraph())
     assert_not_equal(H,H2) # they should be different graphs
     assert_nodes_equal(H.nodes(),G.nodes())
     assert_edges_equal(H.edges(),G.edges())
     os.close(fd)
     os.unlink(fname)
 def test_unicode(self):
     G = nx.Graph()
     try:  # Python 3.x
         name1 = chr(2344) + chr(123) + chr(6543)
         name2 = chr(5543) + chr(1543) + chr(324)
     except ValueError:  # Python 2.6+
         name1 = unichr(2344) + unichr(123) + unichr(6543)
         name2 = unichr(5543) + unichr(1543) + unichr(324)
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
Exemple #22
0
 def test_unicode(self):
     G = nx.Graph()
     try:  # Python 3.x
         name1 = chr(2344) + chr(123) + chr(6543)
         name2 = chr(5543) + chr(1543) + chr(324)
     except ValueError:  # Python 2.6+
         name1 = unichr(2344) + unichr(123) + unichr(6543)
         name2 = unichr(5543) + unichr(1543) + unichr(324)
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = 'Bj' + chr(246) + 'rk'
         name2 = chr(220) + 'ber'
     except ValueError:  # Python 2.6+
         name1 = 'Bj' + unichr(246) + 'rk'
         name2 = unichr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
Exemple #24
0
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = 'Bj' + chr(246) + 'rk'
         name2 = chr(220) + 'ber'
     except ValueError:  # Python 2.6+
         name1 = 'Bj' + unichr(246) + 'rk'
         name2 = unichr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
Exemple #25
0
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = "Bj" + chr(246) + "rk"
         name2 = chr(220) + "ber"
     except ValueError:  # Python 2.6+
         name1 = "Bj" + unichr(246) + "rk"
         name2 = unichr(220) + "ber"
     G.add_edge(name1, "Radiohead", {name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding="latin-1")
     H = nx.read_multiline_adjlist(fname, encoding="latin-1")
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
Exemple #26
0
def load(json_name):
    #making G (networkx)
    if json_name == "pan12-sexual-predator-identification-training-corpus-2012-05-01":
        G = nx.read_multiline_adjlist(
            "adjlists/train_networkxBeforeRemove.adjlist")
    elif json_name == "pan12-sexual-predator-identification-test-corpus-2012-05-17":
        G = nx.read_multiline_adjlist(
            "adjlists/test_networkxBeforeRemove.adjlist")

    #generate picture of networkx
    # nx.draw(G, node_size=1)
    # plt.savefig("../API/client/public/models/load/networkx_before_remove.png")
    # Remove All 2-Connected-Components in G
    for component in list(nx.connected_components(G)):
        if len(component) <= 2:  # This will actually remove only 2-connected
            for node in component:
                G.remove_node(node)
    networkx.write_multiline_adjlist(G, "./adjlists/graphU.adjlist")
    # nx.draw(G, node_size=3)
    # plt.savefig("../API/client/public/models/load/networkx_after_remove.png")
    return G
 def save_graph(self, is_needed, df, graph, name):
     if is_needed:
         if not os.path.isdir("data/graphs/" + name):
             os.mkdir("data/graphs/" + name)
         df.to_csv("data/graphs/%s/%s.tsv" % (name, name), sep="\t")
         nx.write_gpickle(graph, "data/graphs/%s/%s.gpickle" % (name, name))
         nx.write_adjlist(graph,
                          "data/graphs/%s/%s.adjlist" % (name, name),
                          delimiter="\t")
         nx.write_multiline_adjlist(
             graph,
             "data/graphs/%s/%s.multiline_adjlist" % (name, name),
             delimiter="\t",
         )
         nx.write_edgelist(graph,
                           "data/graphs/%s/%s.edgelist" % (name, name),
                           delimiter="\t")
         with open("data/graphs/%s/%s.cyjs" % (name, name), "w") as outfile:
             outfile.write(json.dumps(nx.cytoscape_data(graph), indent=2))
         graph = stringify_list_attributes(graph)
         nx.write_gexf(graph, "data/graphs/%s/%s.gexf" % (name, name))
         nx.write_graphml(graph, "data/graphs/%s/%s.graphml" % (name, name))
Exemple #28
0
 def is_connected(self):
     vertex2bags = self.bag_occuences()
     # print self.hypergraph.number_of_edges()
     for v in self.hypergraph.nodes_iter():
         logging.debug("vertex %s" % v)
         SG = self.tree.subgraph(vertex2bags[v])
         if not nx.is_connected(SG.to_undirected()):
             logging.error(
                 'Subgraph induced by vertex "%s" is not connected' % v)
             string = StringIO()
             nx.write_multiline_adjlist(SG, string)
             logging.error('Involved bags: %s' % vertex2bags[v])
             logging.error(
                 'Nodes of the hypergraph (should be the same): %s' %
                 SG.nodes())
             logging.error('Begin Adjacency Matrix')
             # we skip comments from networkx
             for line in string.getvalue().split('\n')[3:-1]:
                 logging.error('%s' % line)
             logging.error('End Adjacency Matrix')
             return False
     return True
Exemple #29
0
def erdos_renyi(n, p, total):
    """ Generates an erdos renyi graph, also called random graph, given the number of nodes 'n' the probability 'p' and the total of graphs to generate with those variables"""

    click.echo('generating {} erdos renyi model(s) with n = {}, p = {}'.format(
        total, n, p))
    for i in range(total):
        G = nx.Graph()
        G.add_nodes_from(range(n))
        #Declaramos un grafo vacio y lo llenamos con nodos sin enlazar

        for nodo1 in range(n):  #cogemos un nodo
            for nodo2 in range(
                    nodo1 + 1, n
            ):  # desde el siguiente nodo a nodo1 buscamos nodos que se puedan enlazar con nodo1
                random_num = random.random(
                )  # generamos un numero aleatorio [0.0, 1.0)
                if random_num <= p:  # si el numero aleatorio generado es menor o igual que p nodo1 y nodo2 se pueden enlazar
                    G.add_edge(nodo1, nodo2)  #creamos dicho enlace en el grafo

        #nx.write_edgelist(er, '../graphs/erdos-renyi/erdos_renyi_{}_n{}_p{}.csv'.format(i,n,p), delimiter=",", data=True)
        nx.write_multiline_adjlist(
            G,
            '../graphs/erdos-renyi/erdos_renyi_{}_n{}_p{}.csv'.format(i, n, p),
            delimiter=",")
Exemple #30
0
def getFeatureGraph(mAllData,
                    dEdgeThreshold=0.30,
                    bResetGraph=True,
                    dMinDivergenceToKeep=np.log2(10e5)):

    try:
        if bResetGraph:
            raise Exception("User requested graph recreation.")

        print("Trying to load graph...")
        g = read_multiline_adjlist("graphAdjacencyList.txt")
        with open("usefulFeatureNames.pickle", "rb") as fIn:
            saUsefulFeatureNames = pickle.load(fIn)
        print("Trying to load graph... Done.")
        return g, saUsefulFeatureNames
    except Exception as e:
        print("Trying to load graph... Failed:\n%s\n Recomputing..." %
              (str(e)))

    # DEBUG LINES
    print("Got data of size %s." % (str(np.shape(mAllData))))
    print("Extracting graph...")
    #############
    # Init graph

    # Determine meaningful features (with a divergence of more than MIN_DIVERGENCE from the control mean)

    iFeatureCount = np.shape(mAllData)[1]
    mMeans = np.nanmean(mAllData, 0)  # Ignore nans

    vUseful = [
        abs(mMeans[iFieldNum]) - dMinDivergenceToKeep > 0.00
        for iFieldNum in range(1, iFeatureCount)
    ]

    saFeatures = getFeatureNames()[1:iFeatureCount]
    saUsefulIndices = [
        iFieldNum for iFieldNum, _ in enumerate(saFeatures)
        if vUseful[iFieldNum]
    ]
    saUsefulFeatureNames = [
        saFeatures[iFieldNum] for iFieldNum in saUsefulIndices
    ]
    iUsefulFeatureCount = len(saUsefulIndices)
    print("Keeping %d features out of %d." %
          (len(saUsefulIndices), len(saFeatures)))
    ###############################

    g = nx.Graph()
    print("Adding nodes...")
    # Add a node for each feature
    lIndexedNames = enumerate(saFeatures)
    for idx in saUsefulIndices:
        # Only act on useful features
        g.add_node(saFeatures[idx], label=idx)
    print("Adding nodes... Done.")

    # Measure correlations
    print("Creating edges for %d possible pairs..." %
          (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount)))
    lCombinations = itertools.combinations(saUsefulIndices, 2)

    # Create queue and threads
    qCombination = Queue(10000)
    threads = []
    num_worker_threads = 4
    for i in range(num_worker_threads):
        t = threading.Thread(target=addEdgeAboveThreshold,
                             args=(
                                 i,
                                 qCombination,
                             ))
        t.setDaemon(True)
        t.start()

    iCnt = 0
    dStartTime = clock()
    for iFirstFeatIdx, iSecondFeatIdx in lCombinations:
        qCombination.put(
            (iFirstFeatIdx, iSecondFeatIdx, g, mAllData, saFeatures,
             iFirstFeatIdx, iSecondFeatIdx, dEdgeThreshold))

        # DEBUG LINES
        if iCnt != 0 and (iCnt % 1000 == 0):
            sys.stdout.write(".")
            if iCnt % 10000 == 0 and (iCnt != 10000):
                dNow = clock()
                dRate = ((dNow - dStartTime) / iCnt)
                dRemaining = (0.5 *
                              (iUsefulFeatureCount * iUsefulFeatureCount) -
                              iCnt) * dRate
                sys.stdout.write(
                    "%d (Estimated remaining (sec): %4.2f - Working at a rate of %4.2f pairs/sec)\n"
                    % (iCnt, dRemaining, 1.0 / dRate))

        iCnt += 1
        #############

    print("Waiting for completion...")
    qCombination.join()
    print("Total time (sec): %4.2f" % (clock() - dStartTime))

    print("Creating edges for %d possible pairs... Done." %
          (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount)))

    print("Extracting graph... Done.")

    print("Removing single nodes... Nodes before removal: %d" %
          (g.number_of_nodes()))
    toRemove = [
        curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0
    ]
    while len(toRemove) > 0:
        g.remove_nodes_from(toRemove)
        toRemove = [
            curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0
        ]
        print("Nodes after removal step: %d" % (g.number_of_nodes()))
    print("Removing single nodes... Done. Nodes after removal: %d" %
          (g.number_of_nodes()))

    print("Saving graph...")
    write_multiline_adjlist(g, "graphAdjacencyList.txt")
    with open("usefulFeatureNames.pickle", "wb") as fOut:
        pickle.dump(saUsefulFeatureNames, fOut)

    print("Saving graph... Done.")

    print("Trying to load graph... Done.")

    return g, saUsefulFeatureNames
def main(rdfLocation, outputType):
    """Main function which parses the rdf graph then sends it to be queried.
    It uses the result from the queryAll function to call the required file parser.
    If multiple files are detected it will parse them all.
    Returns: Final Network output to file in a variety of common network file formats. 
    """
    files = []
    # Parse The RDF Graph Object by first checking if it is a file location or the file itself.
    if os.path.isfile(rdfLocation):
        g.parse(location=rdfLocation)
    else:
        g.parse(data=rdfLocation)
    
    # Specify Output File Type For graph Object
    outputFileType = outputType
    
    # Create NetworkX Graph object
    G = nx.MultiDiGraph()
    
    # Extract the file types from the rdf as this will be used to call a custom query for a certain filetype
    fileInfo = g.query("""SELECT ?name ?filetype WHERE { ?s network:fileType ?filetype .
                                                          ?s network:fileName ?name . }""")
                                                          
    # Loop through files contained within the dataset and issue relavent queries
    for element in fileInfo:
        # Extract the FileType and Name from the query result
        fileName = str(element[0])
        fileType = str(element[1])
        
        # Call XML functions
        if fileType == "xml":
            print "Found XML File"
            xmlData = queryAll(fileName,fileType)
            for f in xmlData:
                G = FP.parseXML(f,G)
            
        elif fileType == "json":
            print "Found JSON file"
            jsonData = queryAll(fileName,fileType)
            for f in jsonData:
                G = FP.parseJSON(f,G)
            
        elif fileType == "csv":
            print "Found CSV file"
            csvData = queryAll(fileName,fileType)
            for f in csvData:
                print f
                G = FP.parseCSV(f,G)

        elif fileType == "excel":
            print "Found Excel File"
            excelData = queryAll(fileName,fileType)
            for f in excelData:
                G = FP.parseEXCEL(f,G)
            
        else:
            print "Currently Not a Supported File"    
            
        print "Finished processing file"
        
    # Write out the graph object to file -------------------------------------------------------------------------------------
    if outputFileType == "gml":
        nx.write_gml(G,'output.gml')
        
    elif outputFileType == "adj":
        nx.write_adjlist(G,"output.adj")

    elif outputFileType == "mladj":
        nx.write_multiline_adjlist(G,"output.adjlist")
    
    elif outputFileType == "graphml":
        nx.write_graphml(G, "output.graphml")   
        
    elif outputFileType == "pajek":
        nx.write_pajek(G, "output.net")

    elif outputFileType == "neo4j":
        # Update to address of the Neo4j server
        results = neonx.write_to_neo("http://localhost:7474/db/data/", G, 'LINKS_TO')
Exemple #32
0
import networkx as nx
from networkx.readwrite import json_graph

import json

if __name__ == '__main__':
    graph = nx.read_edgelist('input/example_graph.edgelist', nodetype=int, data=(('weight', float),))
    assert isinstance(graph, nx.Graph)
    print 'edges:', graph.edges()

    # raw
    nx.write_adjlist(graph, 'output_raw/example_graph.adjlist')
    nx.write_multiline_adjlist(graph, 'output_raw/example_graph.multiline_adjlist')
    nx.write_edgelist(graph, 'output_raw/example_graph.edgelist')

    # better serialization
    nx.write_gpickle(graph, 'output_serialization/example_graph.pickle')
    nx.write_yaml(graph, 'output_serialization/example_graph.yaml')
    nx.write_graph6(graph, 'output_serialization/example_graph.graph6')

    # xml
    nx.write_gexf(graph, 'output_xml/example_graph.gexf')
    nx.write_graphml(graph, 'output_xml/example_graph.graphml')

    # json
    with open('output_json/node_link.json', 'w') as outfile:
        json.dump(json_graph.node_link_data(graph), outfile, indent=2)

    with open('output_json/adjacency.json', 'w') as outfile:
        json.dump(json_graph.adjacency_data(graph), outfile, indent=2)
    dt = 'Deatht' + chr(246) + 'ngue'

G = nx.Graph()
G.add_edge(hd, mh)
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open('edgelist.utf-8', 'wb')
fh.write('# -*- coding: utf-8 -*-\n'.encode('utf-8'))  # encoding hint for emacs
nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8')

# read and store in UTF-8
fh = open('edgelist.utf-8', 'rb')
H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8')

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
Exemple #34
0
def write_graph(gr, from_user, to_user):
    with get_file(edgelist_fname(**locals()), 'w') as fd:
        NX.write_multiline_adjlist(gr, fd, delimiter='\t')
Exemple #35
0
def main():
    args = set_manta().parse_args(sys.argv[1:])
    args = vars(args)
    if args['version']:
        info = VersionInfo('manta')
        logger.info('Version ' + info.version_string())
        exit(0)
    if args['graph'] != 'demo':
        filename = args['graph'].split(sep=".")
        extension = filename[len(filename) - 1]
        # see if the file can be detected
        # if not, try appending current working directory and then read.
        if not os.path.isfile(args['graph']):
            if os.path.isfile(os.getcwd() + '/' + args['graph']):
                args['graph'] = os.getcwd() + '/'
            else:
                logger.error(
                    'Could not find the specified file. Is your file path correct?'
                )
                exit()
        try:
            if extension == 'graphml':
                network = nx.read_graphml(args['graph'])
            elif extension == 'txt':
                network = nx.read_weighted_edgelist(args['graph'])
            elif extension == 'gml':
                network = nx.read_gml(args['graph'])
            elif extension == 'cyjs':
                network = read_cyjson(args['graph'])
            else:
                logger.warning(
                    'Format not accepted. '
                    'Please specify the filename including extension (e.g. test.graphml).',
                    exc_info=True)
                exit()
        except Exception:
            logger.error('Could not import network file!', exc_info=True)
            exit()
        # first need to convert network to undirected
    elif args['graph'] == 'demo':
        path = os.path.dirname(manta.__file__)
        path = path + '//demo.graphml'
        network = nx.read_graphml(path)
    if args['direction']:
        if extension == 'txt':
            logger.warning(
                'Directed networks from edge lists not supported, use graphml or cyjs! '
            )
            exit()
    else:
        network = nx.to_undirected(network)
    if args['bin']:
        orig_edges = dict()
        # store original edges for export
        for edge in network.edges:
            orig_edges[edge] = network.edges[edge]['weight']
            network.edges[edge]['weight'] = np.sign(
                network.edges[edge]['weight'])
    results = cluster_graph(network,
                            limit=args['limit'],
                            max_clusters=args['max'],
                            min_clusters=args['min'],
                            min_cluster_size=args['ms'],
                            iterations=args['iter'],
                            subset=args['subset'],
                            ratio=args['ratio'],
                            edgescale=args['edgescale'],
                            permutations=args['perm'],
                            verbose=args['verbose'])
    graph = results[0]
    if args['cr']:
        perm_clusters(graph=graph,
                      limit=args['limit'],
                      max_clusters=args['max'],
                      min_clusters=args['min'],
                      min_cluster_size=args['ms'],
                      iterations=args['iter'],
                      ratio=args['ratio'],
                      partialperms=args['perm'],
                      relperms=args['rel'],
                      subset=args['subset'],
                      error=args['error'],
                      verbose=args['verbose'])
    layout = None
    if args['bin']:
        for edge in network.edges:
            network.edges[edge]['weight'] = orig_edges[edge]
    if args['layout']:
        layout = generate_layout(graph, args['tax'])
    if args['fp']:
        if args['f'] == 'graphml':
            nx.write_graphml(graph, args['fp'] + '.graphml')
        elif args['f'] == 'edgelist':
            nx.write_weighted_edgelist(graph, args['fp'] + '.txt')
        elif args['f'] == 'gml':
            nx.write_gml(graph, args['fp'] + '.gml')
        elif args['f'] == 'adj':
            nx.write_multiline_adjlist(graph, args['fp'] + '.txt')
        elif args['f'] == 'cyjs':
            write_cyjson(graph=graph,
                         filename=args['fp'] + '.cyjs',
                         layout=layout)
        logger.info('Wrote clustered network to ' + args['fp'] + '.' +
                    args['f'])
    else:
        logger.error('Could not write network to disk, no file path given.')
    exit(0)
Exemple #36
0
boc = "Blue " + chr(214) + "yster Cult"
dt = "Deatht" + chr(246) + "ngue"

G = nx.Graph()
G.add_edge(hd, mh)
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open("edgelist.utf-8", "wb")
nx.write_multiline_adjlist(G, fh, delimiter="\t", encoding="utf-8")

# read and store in UTF-8
fh = open("edgelist.utf-8", "rb")
H = nx.read_multiline_adjlist(fh, delimiter="\t", encoding="utf-8")

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
for author1 in authors:
    papers = author_paper[author1]
    for paper in papers:
        coauthors = paper_author[paper]
        for author2 in coauthors:
            if author1 != author2:
                emb1 = embs[author1]
                emb2 = embs[author2]
                n_author1 = nodes[author1]
                n_author2 = nodes[author2]
                val = cosine(emb1,emb2)
                simadj[n_author1,n_author2] = val

SG = nx.from_scipy_sparse_matrix(simadj)
SG = nx.relabel_nodes(SG, inv_nodes)
nx.write_multiline_adjlist(SG, '../data/sim_collaboration_network.adjlist')



# Paper and paper similarity graph
# read the file to create a dictionary with paperId as key and paper embedding as value
f = open("../data/paper_embeddings_64_dm.txt","r")
papers = {}
s = ""
pattern = re.compile(r'(\s){2,}')
for l in f:
    if(":" in l and s!=""):
        papers[s.split(":")[0].strip()] = np.array(ast.literal_eval(re.sub(pattern, ',', s.split(":")[1]).replace(" ",",")))
        s = l.replace("\n","")
    else:
        s = s+" "+l.replace("\n","")
def barabasi_albert(nodos, m):
    m0 = m + 1
    t = nodos - m0
    G = nx.complete_graph(m0) #Creamos el grafo con una distribucion inicial de m0 nodos con al menos un enlace cada nodo
 
    '''
    --------------------------------------------------------------------------------------------------------------------------------------------------
                                         PREPROCESAMIENTO
    --------------------------------------------------------------------------------------------------------------------------------------------------
    '''
    for i in range (m0, nodos): #añadimos los N - n0 nodos restantes
        G.add_node(i) # añadimos el nodo nuevo queremos conecar
        sumaGradosNodos = 0

        for nodo in range (0, i): 
            sumaGradosNodos += G.degree(nodo) 
        #Sumamos los grados de todos los nodos que forman la red en este momento, para posteriormente calcular formula de la probablidad de conexion 
        probConexion = {} #Creamos un diccionario donde guardar la probabilidad de cada nodo para crear una nueva conexion con el nodo i 
        #Esto metodo es conocido como Conexion preferencial ya que se conectara con nodos que tenga mas conexiones 
        #probConexion -> clave: id del nodo, valor: probalid

        gradosNodos = nx.degree(G) #Sacamos la lista con los grados de cada nodo
        #Llenamos el diccionario con las probabilidades de cada nodos que hay hasta este momento con la formula: 
        #Pi = ki / SUM kj 
        #Pi es la probabilidad de que uno de los enlaces se conecte al nodo nuevo 
        # donde ki es el grado del nodo existente 
        #denominador suma de los grados de la red hasta este momento
        for j in range (0,i):
            probConexion[j] = (float)(gradosNodos[j])/sumaGradosNodos
        '''
        PARA AGRERAR ARISTAS AL NUEVO NODO USAREMOS EL METODO DE PROBABILIDADES ACUMULADAS
        Implementaremos la idea de conexion preferencial mediante este metodo. 

        Se genera un numero aleatorio [0.0, 1)
        Usaremos una nueva metrica llamada probabilidad acumulada que es la suma de las probabilidades anteriores

        Con estos dos valores podremos implementar la conexion preferencial ya que cuato mayor sea la probabilidad 
        acumulada mas conexiones tendrá. Con una probabilidad acumulada alta la ventana de posibulidades para ser escogida es mayor, en caso 
        contrario si la probabilidad acumulada es baja la venta de posibilidades es mas pequeña haciendo que sea menos probable que se escoja ese 
        nodo. 

        Ejemplo: 
        probabilidades = [0.2, 0.3, 0.5]
        probabilidadAcumulada = [0.2, 0.5, 1.0]

        Numero aleatorio de [0.0, 1.0)
        n = 0.4

        ventanas: 
        0.2: [0.0, 0.2]
        0.5: [0.0, 0,5]
        1.0: [0.0, 1.0]

        Cuanto mas grande sea la ventana, mas probable es que el numero aleatorio caiga dentro de esa ventana 
        '''

        #Vamos a crear una lista de probabilidades acumuladas, la cual contendrá tuplas. 
        #Cada tupla, tendra id del nodo y la probabilidad acumulada (id,probAcumulada)
        probAcumulada = [] #lista vacia
        aux = 0
        for idNodo, probabilidad in probConexion.items(): 
            nodo = (idNodo, aux + probabilidad) #creamos un elemento de la lista con la informacion necesaria 
            probAcumulada.append(nodo)

            aux += probabilidad #actualizamos lo anterior con lo actaul para la siguiente iteracion 
    
    
    #--------------------------------------------------------------------------------------------------------------------------------------------------
    #                                              CREACION DE CONEXIONES
    #--------------------------------------------------------------------------------------------------------------------------------------------------
    

    #Ahora hay que hacer m conexiones, m aristas, con m nodos. Basandonos en los datos extraidos anteriormente
        conexiones = 0 
        nodosAdded = [] #Lista de nodos selccionados para conectarlos con el nuevo nodo

        while(conexiones < m): 
            n = random.random() 
            actual = 0

            while(actual < i and probAcumulada[actual][1] < n): # No nos pasamos del nuevo nodo y la probabilidad acumulado es menor que la n, entonces pasamos al suiente nodo candidato 
                actual += 1
            
            idDestino = probAcumulada[actual][0] # extreamos el id del nodo seleccionado para formar la conexion 

            #Vamos a comprobar si idDestino no tiene conexion con el nodo nuevo 
            if idDestino not in nodosAdded: 
                nodosAdded.append(idDestino) # lo metemos en la lista de nodos selecionados 
                G.add_edge(i,idDestino) #añadimos la conexional grafo 
                conexiones += 1
              
            if conexiones < 501 and conexiones % 25 == 0 or (conexiones < 20 and conexiones % 5 == 0):
                nx.write_multiline_adjlist(G, '../graphs/barabasi-albert/steps/ba_steps_{:03d}.csv'.format(conexiones), delimiter=",")
                
    
    return G
Exemple #39
0
    def _traverse_and_execute(
            self, executable: GraphExecutable
    ) -> None:  # noqa mccabe: disable=MC0001
        """ Processes a single graph and loads the data into the replica if required

            To save memory after processing, the loaded dataframes are deleted, and
            garbage collection manually called.

            Args:
                executable (GraphExecutable): object that contains all of the necessary info for
                    executing a sample and loading it into the target
        """
        start_time = time.time()
        if self.barf:
            with open(
                    os.path.join(
                        self.barf_output,
                        f'{[n for n in executable.graph.nodes][0].dot_notation}.component'
                    ), 'wb') as cmp_file:  # noqa pylint: disable=unnecessary-comprehension
                nx.write_multiline_adjlist(executable.graph, cmp_file)
        try:
            logger.debug(
                f"Executing graph with {len(executable.graph)} relations in it..."
            )
            for i, relation in enumerate(
                    nx.algorithms.dag.topological_sort(executable.graph)):
                relation.population_size = executable.source_adapter.scalar_query(
                    executable.source_adapter.population_count_statement(
                        relation))
                logger.info(
                    f'Executing source query for relation {relation.dot_notation} '
                    f'({i+1} of {len(executable.graph)} in graph)...')

                relation.sampling.prepare(relation, executable.source_adapter)
                relation = RuntimeSourceCompiler.compile_queries_for_relation(
                    relation, executable.graph, executable.source_adapter,
                    executable.analyze)

                if executable.analyze:
                    if relation.is_view:
                        relation.population_size = "N/A"
                        relation.sample_size = "N/A"
                        logger.info(
                            f'Relation {relation.dot_notation} is a view, skipping.'
                        )
                    else:
                        result = executable.source_adapter.check_count_and_query(
                            relation.compiled_query, MAX_ALLOWED_ROWS,
                            relation.unsampled).iloc[0]
                        relation.population_size = result.population_size
                        relation.sample_size = result.sample_size
                        logger.info(
                            f'Analysis of relation {relation.dot_notation} completed in {duration(start_time)}.'
                        )
                else:
                    executable.target_adapter.create_database_if_not_exists(
                        relation.quoted(relation.database))
                    executable.target_adapter.create_schema_if_not_exists(
                        relation.quoted(relation.database),
                        relation.quoted(relation.schema))
                    if relation.is_view:
                        logger.info(
                            f'Retrieving DDL statement for view {relation.dot_notation} in source...'
                        )
                        relation.population_size = "N/A"
                        relation.sample_size = "N/A"
                        try:
                            relation.view_ddl = executable.source_adapter.scalar_query(
                                relation.compiled_query)
                        except Exception:
                            raise SystemError(
                                f'Failed to extract DDL statement: {relation.compiled_query}'
                            )
                        logger.info(
                            f'Successfully extracted DDL statement for view {relation.quoted_dot_notation}'
                        )
                    else:
                        logger.info(
                            f'Retrieving records from source {relation.dot_notation}...'
                        )
                        try:
                            relation.data = executable.source_adapter.check_count_and_query(
                                relation.compiled_query, MAX_ALLOWED_ROWS,
                                relation.unsampled)
                        except Exception as exc:
                            raise SystemError(
                                f'Failed execution of extraction sql statement: {relation.compiled_query} {exc}'
                            )

                        relation.sample_size = len(relation.data)
                        logger.info(
                            f'{relation.sample_size} records retrieved for relation {relation.dot_notation}.'
                        )

                    logger.info(
                        f'Inserting relation {relation.quoted_dot_notation} into target...'
                    )
                    try:
                        executable.target_adapter.create_and_load_relation(
                            relation)
                    except Exception as exc:
                        raise SystemError(
                            f'Failed to load relation {relation.quoted_dot_notation} into target: {exc}'
                        )

                    logger.info(
                        f'Done replication of relation {relation.dot_notation} in {duration(start_time)}.'
                    )
                    relation.target_loaded = True
                relation.source_extracted = True
                logger.info(
                    f'population:{relation.population_size}, sample:{relation.sample_size}'
                )
                if self.barf:
                    with open(
                            os.path.join(self.barf_output,
                                         f'{relation.dot_notation}.sql'),
                            'w') as barf_file:
                        barf_file.write(relation.compiled_query)
            try:
                for relation in executable.graph.nodes:
                    del relation.data
            except AttributeError:
                pass
            gc.collect()
        except Exception as exc:
            logger.error(f'failed with error of type {type(exc)}: {str(exc)}')
            raise exc
Exemple #40
0
def write_adjacency(graph, fname):
    """ Write the graph as an adjacency list """

    nx.write_multiline_adjlist(graph, fname)
Exemple #41
0
G = nx.Graph()
G.add_edge(hd, mh)
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open('edgelist.utf-8', 'wb')
fh.write(
    '# -*- coding: utf-8 -*-\n'.encode('utf-8'))  # encoding hint for emacs
nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8')

# read and store in UTF-8
fh = open('edgelist.utf-8', 'rb')
H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8')

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
def save(G, fname):
    fh = open(fname, 'wb')
    nx.write_multiline_adjlist(G, fh)