コード例 #1
0
 def test_multiline_adjlist_integers(self):
     (fd, fname) = tempfile.mkstemp()
     G = nx.convert_node_labels_to_integers(self.G)
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, nodetype=int)
     H2 = nx.read_multiline_adjlist(fname, nodetype=int)
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #2
0
ファイル: test_adjlist.py プロジェクト: c0ns0le/zenoss-4
 def test_multiline_adjlist_graph(self):
     G=self.G
     (fd,fname)=tempfile.mkstemp()
     nx.write_multiline_adjlist(G,fname)  
     H=nx.read_multiline_adjlist(fname)
     H2=nx.read_multiline_adjlist(fname)
     assert_not_equal(H,H2) # they should be different graphs
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #3
0
 def test_multiline_adjlist_digraph(self):
     G = self.DG
     (fd, fname) = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #4
0
 def test_multiline_adjlist_delimiter(self):
     fh = io.BytesIO()
     G = nx.path_graph(3)
     nx.write_multiline_adjlist(G, fh, delimiter=':')
     fh.seek(0)
     H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':')
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
コード例 #5
0
 def test_unicode(self):
     G = nx.Graph()
     name1 = chr(2344) + chr(123) + chr(6543)
     name2 = chr(5543) + chr(1543) + chr(324)
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #6
0
 def test_latin1(self):
     G = nx.Graph()
     name1 = 'Bj' + chr(246) + 'rk'
     name2 = chr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #7
0
def createDocumentsGraph(train, graph_file=None, sem_rel=None, sem_graph=None):
    """

    :param train:
    :param graph_file:
    :param sem_rel:
    :param sem_graph:
    :return:
    """
    if graph_file is not None and os.path.isfile(graph_file):
        return nx.read_multiline_adjlist(graph_file)

    G = nx.DiGraph()

    if sem_graph is not None:
        G = nx.compose(G, sem_graph.to_directed())

    for doc in train.keys():
        for sentence in train[doc]:
            lemmas = [l for l in sentence if isinstance(l, utils.instance)]

            for i in range(1, len(lemmas)):

                lemma = lemmas[i]
                lemma_key = lemma.lemma + '_' + lemma.pos
                prev_lemma = lemmas[i - 1]

                prev_lemma_key = prev_lemma.lemma + '_' + prev_lemma.pos

                if i == 1:
                    G.add_node(prev_lemma_key)

                G.add_node(lemma_key)
                G.add_edge(prev_lemma_key, lemma_key)

                if sem_rel is not None:

                    for r, nodes in sem_rel[lemma.instance].items():
                        for node in nodes:
                            if node not in G.nodes:
                                continue
                            G.add_edge(lemma_key, node)

                    if i == 1:
                        for r, nodes in sem_rel[prev_lemma.instance].items():
                            for node in nodes:
                                if node not in G.nodes:
                                    continue
                                G.add_edge(prev_lemma_key, node)

    if graph_file is not None:
        nx.write_multiline_adjlist(G, graph_file)

    return G
コード例 #8
0
ファイル: test_adjlist.py プロジェクト: nickp60/Ragout
    def test_read_multiline_adjlist_1(self):
        # Unit test for https://networkx.lanl.gov/trac/ticket/252
        s = b"""# comment line
1 2
# comment line
2
3
"""
        bytesIO = io.BytesIO(s)
        G = nx.read_multiline_adjlist(bytesIO)
        adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}}
        assert_equal(G.adj, adj)
コード例 #9
0
    def test_read_multiline_adjlist_1(self):
        # Unit test for https://networkx.lanl.gov/trac/ticket/252
        s = b"""# comment line
1 2
# comment line
2
3
"""
        bytesIO = io.BytesIO(s)
        G = nx.read_multiline_adjlist(bytesIO)
        adj = {"1": {"3": {}, "2": {}}, "3": {"1": {}}, "2": {"1": {}}}
        assert_graphs_equal(G, nx.Graph(adj))
コード例 #10
0
    def test_read_multiline_adjlist_1(self):
        # Unit test for https://networkx.lanl.gov/trac/ticket/252
        s = b"""# comment line
1 2
# comment line
2
3
"""
        bytesIO = io.BytesIO(s)
        G = nx.read_multiline_adjlist(bytesIO)
        adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}}
        assert_graphs_equal(G, nx.Graph(adj))
コード例 #11
0
def load(json_name):
    #making G (networkx)
    if json_name == "pan12-sexual-predator-identification-training-corpus-2012-05-01":
        G = nx.read_multiline_adjlist(
            "adjlists/train_networkxBeforeRemove.adjlist")
    elif json_name == "pan12-sexual-predator-identification-test-corpus-2012-05-17":
        G = nx.read_multiline_adjlist(
            "adjlists/test_networkxBeforeRemove.adjlist")

    #generate picture of networkx
    # nx.draw(G, node_size=1)
    # plt.savefig("../API/client/public/models/load/networkx_before_remove.png")
    # Remove All 2-Connected-Components in G
    for component in list(nx.connected_components(G)):
        if len(component) <= 2:  # This will actually remove only 2-connected
            for node in component:
                G.remove_node(node)
    networkx.write_multiline_adjlist(G, "./adjlists/graphU.adjlist")
    # nx.draw(G, node_size=3)
    # plt.savefig("../API/client/public/models/load/networkx_after_remove.png")
    return G
コード例 #12
0
 def test_unicode(self):
     G = nx.Graph()
     try:  # Python 3.x
         name1 = chr(2344) + chr(123) + chr(6543)
         name2 = chr(5543) + chr(1543) + chr(324)
     except ValueError:  # Python 2.6+
         name1 = unichr(2344) + unichr(123) + unichr(6543)
         name2 = unichr(5543) + unichr(1543) + unichr(324)
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #13
0
 def test_unicode(self):
     G = nx.Graph()
     try:  # Python 3.x
         name1 = chr(2344) + chr(123) + chr(6543)
         name2 = chr(5543) + chr(1543) + chr(324)
     except ValueError:  # Python 2.6+
         name1 = unichr(2344) + unichr(123) + unichr(6543)
         name2 = unichr(5543) + unichr(1543) + unichr(324)
     G.add_edge(name1, 'Radiohead', {name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname)
     H = nx.read_multiline_adjlist(fname)
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #14
0
def test_read_multiline_adjlist_1():
    # Unit test for https://networkx.lanl.gov/trac/ticket/252
    s = """# comment line
1 2
# comment line
2
3
"""
    import StringIO
    strIO = StringIO.StringIO(s)

    G = nx.read_multiline_adjlist(strIO)
    adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}}
    
    assert_equal(G.adj, adj)
コード例 #15
0
def test_read_multiline_adjlist_1():
    # Unit test for https://networkx.lanl.gov/trac/ticket/252
    s = """# comment line
1 2
# comment line
2
3
"""
    import StringIO
    strIO = StringIO.StringIO(s)

    G = nx.read_multiline_adjlist(strIO)
    adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}}

    assert_equal(G.adj, adj)
コード例 #16
0
ファイル: application.py プロジェクト: eilon26/Final-Project
def pca():
    # Getting datset from request
    dataset = request.get_json()["dataset"]
    # If use server data or do all process
    useServerData = request.get_json()["useServerData"]
    # Prefix for saving information
    prefix = "/data" + "/pca/" + dataset
    print(dataset)
    if not os.path.exists("." + prefix):
        os.makedirs("." + prefix)

    skip = True
    for algo in all_algorithms:
        if not os.path.isfile("." + prefix + "/" + algo + ".png"):
            skip = False

    if not os.path.isfile(get_tmpfile(dataset + "plotter.pbz2")):
        skip = False

    if not useServerData:
        skip = False

    app.logger.info('got /pca request with skip = %s and dataset = %s' %
                    (skip, dataset))

    if not skip:
        # Taking G from memory
        G = networkx.read_multiline_adjlist("." + "/data" + "/load/" +
                                            dataset + "/graph.adjlist")

        # Taking Memory from memory
        fname = dataset + "model.kv"
        path = get_tmpfile(fname)
        model = KeyedVectors.load(path, mmap='r')

        global plotter
        # PCA from 64D to 3D
        plotter = Plotter.Plotter(G, model)
        plotter.SaveAll(prefix)

        # saving a compress pickle file
        fname = dataset + "plotter.pbz2"
        path = get_tmpfile(fname)
        with bz2.BZ2File(path, 'w') as f:
            cPickle.dump(plotter, f)

    return jsonify(res="pca completed and saved in image",
                   path=prefix + "/base.png")
コード例 #17
0
ファイル: test_adjlist.py プロジェクト: nishnik/networkx
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = "Bj" + chr(246) + "rk"
         name2 = chr(220) + "ber"
     except ValueError:  # Python 2.6+
         name1 = "Bj" + unichr(246) + "rk"
         name2 = unichr(220) + "ber"
     G.add_edge(name1, "Radiohead", {name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding="latin-1")
     H = nx.read_multiline_adjlist(fname, encoding="latin-1")
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #18
0
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = 'Bj' + chr(246) + 'rk'
         name2 = chr(220) + 'ber'
     except ValueError:  # Python 2.6+
         name1 = 'Bj' + unichr(246) + 'rk'
         name2 = unichr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #19
0
 def test_latin1(self):
     G = nx.Graph()
     try:  # Python 3.x
         blurb = chr(1245)  # just to trigger the exception
         name1 = 'Bj' + chr(246) + 'rk'
         name2 = chr(220) + 'ber'
     except ValueError:  # Python 2.6+
         name1 = 'Bj' + unichr(246) + 'rk'
         name2 = unichr(220) + 'ber'
     G.add_edge(name1, 'Radiohead', **{name2: 3})
     fd, fname = tempfile.mkstemp()
     nx.write_multiline_adjlist(G, fname, encoding='latin-1')
     H = nx.read_multiline_adjlist(fname, encoding='latin-1')
     assert_graphs_equal(G, H)
     os.close(fd)
     os.unlink(fname)
コード例 #20
0
ファイル: application.py プロジェクト: eilon26/Final-Project
def embedding():
    # Getting datset from request
    dataset = request.get_json()["dataset"]
    # If use server data or do all process
    useServerData = request.get_json()["useServerData"]
    # Prefix for saving information
    prefix = "/data" + "/embedding/" + dataset

    skip = True
    print("." + prefix + "/walks.txt")
    if not os.path.isfile("." + prefix + "/walks.txt"):
        skip = False

    # if not os.path.isfile(get_tmpfile(dataset + "model.kv")):
    #     skip = False

    if not useServerData:
        skip = False

    app.logger.info('got /embedding request with skip = %s and dataset = %s' %
                    (skip, dataset))

    if not skip:
        G = networkx.read_multiline_adjlist("." + "/data" + "/load/" +
                                            dataset + "/graph.adjlist")

        # Precompute probabilities and generate walks
        node2vec = Node2Vec(G,
                            dimensions=64,
                            walk_length=25,
                            num_walks=10,
                            workers=1)
        saveWalks(list(node2vec.walks), prefix)

        # Creates the embeddings using gensim's Word2Vec.
        model = node2vec.fit(window=10, min_count=1, batch_words=4)

        # Save the model into
        fname = dataset + "model.kv"
        path = get_tmpfile(fname)
        model.wv.save(path)
    return jsonify(res="walks saved successfully",
                   walk_length=25,
                   num_walks=10,
                   walks=open("." + prefix + "/walks.txt", "r").read())
コード例 #21
0
ファイル: main.py プロジェクト: ZviMints/Final-Project
def main():
    # ======================================== Getting (G,model) ======================================== #
    # Taking G from memory
    G = nx.read_multiline_adjlist("adjlists/test_networkxAfterRemove.adjlist")

    # Taking Memory from memory
    fname = "test_embedded_vectors_model.kv"
    path = get_tmpfile(fname)
    model = KeyedVectors.load(path, mmap='r')

    # ======================================== Plotting ======================================== #
    plotter = Plotter.Plotter(G, model)
    # plt = plotter.BaseGraph.getPlot()

    # plotter.showWithBaseGraph()
    # plotter.showWithKMeans()
    # plotter.showWithCC()
    plotter.showWithSpectral()
    plotter.showCombined("kmeans+spectral+connected")
コード例 #22
0
ファイル: application.py プロジェクト: eilon26/Final-Project
def load():
    # Getting datset from request
    dataset = request.get_json()["dataset"]
    # If use server data or do all process
    useServerData = request.get_json()["useServerData"]
    # Prefix for saving information
    prefix = "/data" + "/load/" + dataset

    skip = True
    if not os.path.isfile(
            "." + prefix + "/networkx_after_remove.png"
    ):  # and os.path.isfile("." + prefix + dataset + "/networkx_before_remove.png"):
        skip = False

    if not useServerData:
        skip = False

    app.logger.info('got /load request with skip = %s and dataset = %s' %
                    (skip, dataset))

    # Making G (networkx)
    if dataset == "pan12-sexual-predator-identification-training-corpus-2012-05-01":
        G = networkx.read_multiline_adjlist(
            "./data/start/train_networkxBeforeRemove.adjlist")

    elif dataset == "pan12-sexual-predator-identification-test-corpus-2012-05-17":
        G = networkx.read_multiline_adjlist(
            "./data/start/test_networkxBeforeRemove.adjlist")
    else:
        return jsonify(err="405", msg="Invalid JSON file name")

    # if not skip:
    #    # Plotting
    #    networkx.draw(G, node_size=1)
    #    plt.savefig("." + prefix + "/networkx_before_remove.png")

    # write json formatted data
    app.logger.debug('loaded dataset with %s nodes before remove' %
                     len(G.nodes()))
    before = json_graph.node_link_data(G)[
        "links"]  # node-link format to serialize
    graphData = []
    graphData.append("%s Nodes, %s links" % (len(G.nodes()), len(G.edges())))

    # After Remove
    for component in list(networkx.connected_components(G)):
        if len(component) <= 2:  # This will actually remove only 2-connected
            for node in component:
                G.remove_node(node)

    # write json formatted data
    app.logger.debug('loaded dataset with %s nodes after remove' %
                     len(G.nodes()))
    after = json_graph.node_link_data(G)[
        "links"]  # node-link format to serialize
    graphData.append("%s Nodes, %s links" % (len(G.nodes()), len(G.edges())))

    # Save after remove graph
    if not os.path.exists("." + prefix):
        os.makedirs("." + prefix)
    networkx.write_multiline_adjlist(G, "." + prefix + "/graph.adjlist")
    if not skip:
        # Plotting
        networkx.draw(G, node_size=3)
        plt.savefig("." + prefix + "/networkx_after_remove.png")

    return jsonify(before_path=prefix + "/networkx_before_remove.png",
                   after_path=prefix + "/networkx_after_remove.png",
                   before=before,
                   after=after,
                   graphData=graphData)
コード例 #23
0
import networkx
from gensim.models import KeyedVectors
from gensim.test.utils import get_tmpfile

from BERT.clustersBy3DVec import clustersBy3DVec
from BERT.json2conversation import json2conversation
from BERT.vectors2text import Vectors2MatchConversions
from Step3 import Plotter

#========================================initialization of data=========================================#
# Taking G from memory
G = networkx.read_multiline_adjlist("./adjlists/graphU.adjlist")
# Taking Memory from memory
fname = "model.kv"
path = get_tmpfile(fname)
model = KeyedVectors.load(path, mmap='r')

#convert the json file to list of Conversation objects
conversations = json2conversation.parse_data_to_case_class(
    "C:/Users/EILON/PycharmProjects/data_set/test"
    "/pan12-sexual-predator-identification-test-corpus-2012-05-21"
    "/pan12-sexual-predator-identification-test-corpus-2012-05-17")

#=======================================preparing the intut data for bert========================================#
#get centers with name of all
plotter = Plotter.Plotter(G, model)

#get all algorithms dictionary of center by cluster name
(kmeans_centers_by_name, spectral_centers_by_name,
 connected_center_by_name) = plotter.getAllCentersName()
コード例 #24
0
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = codecs.open('edgelist.utf-8', 'w', encoding='utf-8')
fh.write('# -*- coding: %s -*-\n' % fh.encoding)  # encoding hint for emacs
NX.write_multiline_adjlist(G, fh, delimiter='\t')

# read and store in UTF-8
fh = codecs.open('edgelist.utf-8', 'r', encoding='utf-8')
H = NX.read_multiline_adjlist(fh, delimiter='\t')

for n in G.nodes():
    if n not in H:
        print False

print G.nodes()

try:
    pos = NX.spring_layout(G)
    NX.draw(G, pos, font_size=16, with_labels=False)
    for p in pos:  # raise text positions
        pos[p][1] += 0.07
    NX.draw_networkx_labels(G, pos)
    P.show()
except:
コード例 #25
0
import networkx as nx
import matplotlib.pyplot as plt

G = nx.read_multiline_adjlist('graph.multiline_adjlist')
nx.draw_networkx(G, arrows=True, node_size=180, node_shape="o")
# nx.draw_spectral(G,arrows=True,node_size = 200,node_shape = "8",node_color = "red")

# nx.write_gml(G,"test.gml")
plt.show()
コード例 #26
0
def read_adjacency(fname):
    """ Read the graph as an adjacency list """

    return nx.read_multiline_adjlist(fname, nodetype=int)
def load(fname):
    G = nx.read_multiline_adjlist(fname)
    return G
コード例 #28
0
G = nx.Graph()
G.add_edge(hd, mh)
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open("edgelist.utf-8", "wb")
nx.write_multiline_adjlist(G, fh, delimiter="\t", encoding="utf-8")

# read and store in UTF-8
fh = open("edgelist.utf-8", "rb")
H = nx.read_multiline_adjlist(fh, delimiter="\t", encoding="utf-8")

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
nx.draw_networkx_labels(G, pos)
plt.show()
コード例 #29
0
ファイル: get_paths.py プロジェクト: kwargs/ya-foaf
def read_graph(from_user, to_user):
    try:
        with get_file(edgelist_fname(**locals()), 'r') as fd:
            return NX.read_multiline_adjlist(fd, delimiter='\t')
    except IOError:
        return None
コード例 #30
0
ファイル: shortestPath.py プロジェクト: cxy229/colla
def load_graph(year):
    with open(GRAPH_PATH + str(year) + '.txt', 'rb') as f:
        return nx.read_multiline_adjlist(f)
コード例 #31
0
# Number of simulation runs
nSimulation = 1
# Number of payments in each simulation run
nPayments = 200
# Payments gaussian mean weight to be multiplied by average channel balance
payments_mu_weight = 0.1
# Payemnts gaussian standard deviation
payments_sigma_weight = payments_mu_weight / 2
# Number of nodes to have before stopping removing nodes
nNodes = 280
# Number of routing gossip messages to be sent in-between payments
nRoutingGossip = 10

# Open adjencency list file and build the undirected graph
f = open("adjList.txt", 'rb')
G = nx.read_multiline_adjlist(f)
f.close()

# Clean graph from smallest components
largest_cc = max(nx.connected_components(G), key=len)
cleanG = G.subgraph(largest_cc).copy()

print("Number of nodes: " + str(G.number_of_nodes()))
print("Number of edges: " + str(G.number_of_edges()))

# Read alias file and create a pub_key -> alias dic
aliasDic = {}
f = open("nodeAlias.txt", 'r')

lines = f.read().splitlines()
コード例 #32
0
def saveWalks(walks):
    f = open("walks_test.txt", "w+")
    row = 1
    for sentence in walks:
        f.write("row %s:    " % str(row))
        row = row + 1
        for word in sentence:
            f.write(word)
            f.write("  ")
        f.write("\n")
    f.close()


# Start Point:
G = nx.read_multiline_adjlist("test_networkxAfterRemove.adjlist")

# Part A
"""
     :param G: Input graph
     :param dimensions: Embedding dimensions
     :param walk_length: Number of nodes in each walk
     :param num_walks: Number of walks per node
     :param workers: Number of workers for parallel execution
"""

# Precompute probabilities and generate walks
node2vec = Node2Vec(G, dimensions=64, walk_length=25, num_walks=10, workers=1)

saveWalks(list(node2vec.walks))
コード例 #33
0
ファイル: application.py プロジェクト: eilon26/Final-Project
def bert():
    dataset = request.get_json()["dataset"]
    option_cluster_name = request.get_json()["cluster"]
    app.logger.info('got /bert request with dataset = %s' % (dataset))

    # Taking G from memory
    G = networkx.read_multiline_adjlist("." + "/data" + "/load/" + dataset +
                                        "/graph.adjlist")

    global conversations
    if conversations is None:
        if dataset == "pan12-sexual-predator-identification-training-corpus-2012-05-01":
            conversations = loadDataset2Conversation.loadConversations(
                "C:/Users/EILON/PycharmProjects/data_set/traning"
                "/pan12-sexual-predator-identification-training-corpus-2012-05-01"
                "/pan12-sexual-predator-identification-training-corpus-2012-05-01"
            )  # 40820 conversations
        elif dataset == "pan12-sexual-predator-identification-test-corpus-2012-05-17":
            conversations = loadDataset2Conversation.loadConversations(
                "C:/Users/EILON/PycharmProjects/data_set/test"
                "/pan12-sexual-predator-identification-test-corpus-2012-05-21"
                "/pan12-sexual-predator-identification-test-corpus-2012-05-17")

    # load plotter
    global plotter
    if plotter is None:
        fname = dataset + "plotter.pbz2"
        path = get_tmpfile(fname)
        data = bz2.BZ2File(path, 'rb')
        plotter = cPickle.load(data)

    global clusters
    if clusters is None:
        # Get all algorithms dictionary of center by cluster names
        (kmeans_centers_by_name, spectral_centers_by_name,
         connected_center_by_name) = plotter.getAllCentersName()

        # Make all algorithms dictionary of cluster's nodes by cluster names
        clusters = clustersBy3DVec.clustersBy3DVec(
            kmeans_centers_by_name, spectral_centers_by_name,
            connected_center_by_name, plotter.all_vectors_after_pca)

    # get list of vectors that matching to the input cluster name
    selected_vectors = clusters.getAllVectorsByCombinationClustersName(
        option_cluster_name)

    app.logger.info("Start process of extracting topics...")
    # get list of Conversation objects from list of vectors
    vectors2Conversations = Vectors2MatchConversions.Vectors2MatchConversions(
        G, plotter.all_vectors_after_pca, conversations)
    selected_conversations = vectors2Conversations.getConversationsFromGroupOfVecs(
        selected_vectors)

    # get list of 5 most similar topics from list of Conversation objects
    conversations2Topics = convert_Conversations_2_topic.convert_Conversations_2_topic(
    )
    clusters_vector = conversations2Topics.clustersEmbedding(
        selected_conversations)
    topics_list = conversations2Topics.vector2Topic(clusters_vector)

    return jsonify(topic=str(
        topics_list))  # ['notice', 'clothe', 'feet', 'ship', 'quart']
コード例 #34
0
    #Create f given on page 120 of The Game of Cops and Robbers on Graphs
    for node in Pnodes:
        f[str(node)] = Gnodes - NGP[str(node)]


#If we are running from the cmd line use arguments
if __name__ == '__main__':
    import sys

    if len(sys.argv) != 3:
        print('The format for the arguments is: graphFileLocation k')
        exit()
    k = int(sys.argv[2])

    G = nx.read_multiline_adjlist(sys.argv[1])
    initGraph()
    numK = getCopNumber()

    if numK:
        print('The cop number is >', k)
    else:
        print('The cop number is <=', k)

G = 0
k = 0


#Otherwise we are using a module
def copk(graph, kVal):
    global G, k
コード例 #35
0
def getFeatureGraph(mAllData,
                    dEdgeThreshold=0.30,
                    bResetGraph=True,
                    dMinDivergenceToKeep=np.log2(10e5)):

    try:
        if bResetGraph:
            raise Exception("User requested graph recreation.")

        print("Trying to load graph...")
        g = read_multiline_adjlist("graphAdjacencyList.txt")
        with open("usefulFeatureNames.pickle", "rb") as fIn:
            saUsefulFeatureNames = pickle.load(fIn)
        print("Trying to load graph... Done.")
        return g, saUsefulFeatureNames
    except Exception as e:
        print("Trying to load graph... Failed:\n%s\n Recomputing..." %
              (str(e)))

    # DEBUG LINES
    print("Got data of size %s." % (str(np.shape(mAllData))))
    print("Extracting graph...")
    #############
    # Init graph

    # Determine meaningful features (with a divergence of more than MIN_DIVERGENCE from the control mean)

    iFeatureCount = np.shape(mAllData)[1]
    mMeans = np.nanmean(mAllData, 0)  # Ignore nans

    vUseful = [
        abs(mMeans[iFieldNum]) - dMinDivergenceToKeep > 0.00
        for iFieldNum in range(1, iFeatureCount)
    ]

    saFeatures = getFeatureNames()[1:iFeatureCount]
    saUsefulIndices = [
        iFieldNum for iFieldNum, _ in enumerate(saFeatures)
        if vUseful[iFieldNum]
    ]
    saUsefulFeatureNames = [
        saFeatures[iFieldNum] for iFieldNum in saUsefulIndices
    ]
    iUsefulFeatureCount = len(saUsefulIndices)
    print("Keeping %d features out of %d." %
          (len(saUsefulIndices), len(saFeatures)))
    ###############################

    g = nx.Graph()
    print("Adding nodes...")
    # Add a node for each feature
    lIndexedNames = enumerate(saFeatures)
    for idx in saUsefulIndices:
        # Only act on useful features
        g.add_node(saFeatures[idx], label=idx)
    print("Adding nodes... Done.")

    # Measure correlations
    print("Creating edges for %d possible pairs..." %
          (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount)))
    lCombinations = itertools.combinations(saUsefulIndices, 2)

    # Create queue and threads
    qCombination = Queue(10000)
    threads = []
    num_worker_threads = 4
    for i in range(num_worker_threads):
        t = threading.Thread(target=addEdgeAboveThreshold,
                             args=(
                                 i,
                                 qCombination,
                             ))
        t.setDaemon(True)
        t.start()

    iCnt = 0
    dStartTime = clock()
    for iFirstFeatIdx, iSecondFeatIdx in lCombinations:
        qCombination.put(
            (iFirstFeatIdx, iSecondFeatIdx, g, mAllData, saFeatures,
             iFirstFeatIdx, iSecondFeatIdx, dEdgeThreshold))

        # DEBUG LINES
        if iCnt != 0 and (iCnt % 1000 == 0):
            sys.stdout.write(".")
            if iCnt % 10000 == 0 and (iCnt != 10000):
                dNow = clock()
                dRate = ((dNow - dStartTime) / iCnt)
                dRemaining = (0.5 *
                              (iUsefulFeatureCount * iUsefulFeatureCount) -
                              iCnt) * dRate
                sys.stdout.write(
                    "%d (Estimated remaining (sec): %4.2f - Working at a rate of %4.2f pairs/sec)\n"
                    % (iCnt, dRemaining, 1.0 / dRate))

        iCnt += 1
        #############

    print("Waiting for completion...")
    qCombination.join()
    print("Total time (sec): %4.2f" % (clock() - dStartTime))

    print("Creating edges for %d possible pairs... Done." %
          (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount)))

    print("Extracting graph... Done.")

    print("Removing single nodes... Nodes before removal: %d" %
          (g.number_of_nodes()))
    toRemove = [
        curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0
    ]
    while len(toRemove) > 0:
        g.remove_nodes_from(toRemove)
        toRemove = [
            curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0
        ]
        print("Nodes after removal step: %d" % (g.number_of_nodes()))
    print("Removing single nodes... Done. Nodes after removal: %d" %
          (g.number_of_nodes()))

    print("Saving graph...")
    write_multiline_adjlist(g, "graphAdjacencyList.txt")
    with open("usefulFeatureNames.pickle", "wb") as fOut:
        pickle.dump(saUsefulFeatureNames, fOut)

    print("Saving graph... Done.")

    print("Trying to load graph... Done.")

    return g, saUsefulFeatureNames
コード例 #36
0
# read test data
df_test = pd.read_csv('../data/test.csv', dtype={'authorID': np.int64})
n_test = df_test.shape[0]

# read collaboration graph
G = nx.read_edgelist('../data/collaboration_network.edgelist',
                     delimiter=' ',
                     nodetype=int)

# read weighted collaboration graph
WG = nx.read_edgelist("../data/weighted_collaboration_network.edgelist",
                      nodetype=int,
                      data=(("weight", float), ))

# read author similarity graph
SG = nx.read_multiline_adjlist("../data/author_similarity_network.adjlist",
                               nodetype=int)

nodes = {k: v for v, k in enumerate(list(G.nodes()))}

# compute graph features for each node
avg_neighbor_degree_wg = nx.average_neighbor_degree(WG)
avg_neighbor_degree_g = nx.average_neighbor_degree(G)
core_number_g = nx.core_number(G)
page_rank_g = nx.pagerank(G)
page_rank_wg = nx.pagerank(WG)
avg_neighbor_degree_sg = nx.average_neighbor_degree(SG)
page_rank_sg = nx.pagerank(SG)
eigenvector_centrality_sg = nx.eigenvector_centrality(SG)

# load precomputed features for each node
f = open("../data/n_papers.pkl", "rb")
コード例 #37
0
ファイル: main.py プロジェクト: ZviMints/Final-Project
                        workers=1)

    # Embed nodes
    model = node2vec.fit(window=10, min_count=1, batch_words=4)

    # Save the model into
    fname = "model.kv"
    path = get_tmpfile(fname)
    model.wv.save(path)

    return model.wv


#========================================initialization of data=========================================#
# Taking G from memory
G = nx.read_multiline_adjlist("./adjlists/train_networkxAfterRemove.adjlist")

# Taking Memory from memory
fname = "model.kv"
path = get_tmpfile(fname)
model = KeyedVectors.load(path, mmap='r')

# the embeding section
# model = embedding(G)

#convert the json file to list of Conversation objects
data = bz2.BZ2File(
    "saved_objects/conversations_train_dataset_after_remove.pbz2",
    'rb')  # 40820 conversations
conversations = cPickle.load(data)
print("data conversations amount " + str(len(conversations)))
コード例 #38
0
G.add_edge(hd, mh)
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open('edgelist.utf-8', 'wb')
fh.write('# -*- coding: utf-8 -*-\n'.encode('utf-8'))  # encoding hint for emacs
nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8')

# read and store in UTF-8
fh = open('edgelist.utf-8', 'rb')
H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8')

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
nx.draw_networkx_labels(G, pos)
plt.show()
コード例 #39
0
G.add_edge(mc, st)
G.add_edge(boc, mc)
G.add_edge(boc, dt)
G.add_edge(st, dt)
G.add_edge(q, st)
G.add_edge(dt, mh)
G.add_edge(st, mh)

# write in UTF-8 encoding
fh = open('edgelist.utf-8', 'wb')
fh.write(
    '# -*- coding: utf-8 -*-\n'.encode('utf-8'))  # encoding hint for emacs
nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8')

# read and store in UTF-8
fh = open('edgelist.utf-8', 'rb')
H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8')

for n in G.nodes():
    if n not in H:
        print(False)

print(list(G.nodes()))

pos = nx.spring_layout(G)
nx.draw(G, pos, font_size=16, with_labels=False)
for p in pos:  # raise text positions
    pos[p][1] += 0.07
nx.draw_networkx_labels(G, pos)
plt.show()
コード例 #40
0
# Zvi Mints and Eilon Tsadok - Mac Version

def saveWalks(walks):
    f = open("walks.txt", "w+")
    row = 1
    for sentence in walks:
        f.write("row %s:" % str(row))
        row = row + 1
        for word in sentence:
            f.write(word)
            f.write(" ")
        f.write("\n")
    f.close()

# Start Point:
G = nx.read_multiline_adjlist("convesations.adjlist")


# Part A
"""
     :param G: Input graph
     :param dimensions: Embedding dimensions
     :param walk_length: Number of nodes in each walk
     :param num_walks: Number of walks per node
     :param workers: Number of workers for parallel execution
"""

# Precompute probabilities and generate walks
node2vec = Node2Vec(G, dimensions=64, walk_length=25, num_walks=10, workers=1)

saveWalks(list(node2vec.walks))