Esempi in Python per HigherOrderNetwork, esempi in Python per pathpy.HigherOrderNetwork

Esempio n. 1

0

Mostra file

def test_estimate_order_strongly_connected():
    """
    Example with single strongly connected component in first- 
    and two connected components in second-order network
    """
    paths = pp.Paths()

    ngram_list = [
        'a,b,c', 'b,c,b', 'c,b,a', 'b,a,b', 'e,b,f', 'b,f,b', 'f,b,e', 'b,e,b'
    ]

    for ngram in ngram_list:
        paths.addPath(ngram)

    g1 = pp.HigherOrderNetwork(paths, k=1)
    g1.reduceToGCC()
    assert g1.vcount(
    ) == 5, "Error, wrong number of nodes in first-order network"
    assert g1.ecount(
    ) == 8, "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    g2.reduceToGCC()
    assert g2.vcount(
    ) == 4, "Error, wrong number of nodes in second-order network"
    assert g2.ecount(
    ) == 4, "Error, wrong number of links in second-order network"

    # test mapping of higher-order nodes and paths
    assert g2.HigherOrderNodeToPath('a-b') == ('a', 'b'), \
        "Error: mapping from higher-order node to first-order path failed"
    assert g2.HigherOrderPathToFirstOrder(('a-b', 'b-c')) == ('a', 'b', 'c'), \
        "Error: mapping from higher-order path to first-order path failed"

Esempio n. 2

0

Mostra file

File: test_estimation.py Progetto: uzhdag/pathpy

def test_estimate_order_strongly_connected():
    """
    Example with single strongly connected component in first-
    and two connected components in second-order network
    """
    paths = pp.Paths()

    ngram_list = [
        'a,b,c', 'b,c,b', 'c,b,a', 'b,a,b', 'e,b,f', 'b,f,b', 'f,b,e', 'b,e,b'
    ]

    for ngram in ngram_list:
        paths.add_path(ngram)

    g1 = pp.HigherOrderNetwork(paths, k=1)
    pp.algorithms.components.reduce_to_gcc(g1)
    assert g1.ncount(
    ) == 5, "Error, wrong number of nodes in first-order network"
    assert g1.ecount(
    ) == 8, "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    pp.algorithms.components.reduce_to_gcc(g2)
    assert g2.ncount(
    ) == 4, "Error, wrong number of nodes in second-order network"
    assert g2.ecount(
    ) == 4, "Error, wrong number of links in second-order network"

    # test mapping of higher-order nodes and paths
    assert g2.higher_order_node_to_path('a,b') == ('a', 'b'), \
        "Error: mapping from higher-order node to first-order path failed"
    assert g2.higher_order_path_to_first_order(('a,b', 'b,c')) == ('a', 'b', 'c'), \
        "Error: mapping from higher-order path to first-order path failed"

Esempio n. 3

0

Mostra file

def test_distance_matrix_equal_across_objects(random_paths):
    """test that the distance matrix is the same if constructed from to path objects with
    the same paths but different instances"""
    p1 = random_paths(40, 20, num_nodes=9)
    p2 = random_paths(40, 20, num_nodes=9)
    hon1 = pp.HigherOrderNetwork(paths=p1, k=1)
    hon2 = pp.HigherOrderNetwork(paths=p2, k=1)
    d_matrix1 = shortest_paths.distance_matrix(hon1)
    d_matrix2 = shortest_paths.distance_matrix(hon2)
    assert d_matrix1 == d_matrix2

Esempio n. 4

0

Mostra file

def test_distance_matrix_first_order(random_paths, n_nodes, k, paths, e_sum):
    p = random_paths(paths, 10, n_nodes)
    hon_k, hon_1 = pp.HigherOrderNetwork(p, k=k), pp.HigherOrderNetwork(p, k=1)
    dist_k = shortest_paths.distance_matrix(hon_k)
    dist_1 = shortest_paths.distance_matrix(hon_1)
    total_distance = 0
    for source, target in itertools.product(hon_1.nodes, hon_1.nodes):
        dist_st = dist_k[source][target]
        assert dist_1[source][target] <= dist_k[source][target], \
            "not all distances at order k are at least as long as at order 1"
        if dist_st < np.inf:
            total_distance += dist_st

    assert total_distance == e_sum

Esempio n. 5

0

Mostra file

def test_laplacian_matrix(random_paths):
    paths = random_paths(30, 10, 5)
    hon = pp.HigherOrderNetwork(paths, k=1)
    L = hon.laplacian_matrix().toarray()
    assert np.trace(L) > 0
    assert np.tril(L, k=-1).sum() < 0
    assert np.triu(L, k=1).sum() < 0

Esempio n. 6

0

Mostra file

def test_get_adjacency_mat(random_paths, paths, k_order, sub, num_nodes, s_sum,
                           s_mean):
    p = random_paths(paths, 10, num_nodes)
    hon = pp.HigherOrderNetwork(p, k=k_order)
    adj = hon.adjacency_matrix(include_subpaths=sub)
    assert adj.sum() == s_sum
    assert adj.mean() == pytest.approx(s_mean)

Esempio n. 7

0

Mostra file

File: test_spectral.py Progetto: uzhdag/pathpy

def test_fiedler_vector_dense(random_paths, k, e_sum, e_var):
    import numpy as np
    p = random_paths(90, 0, 20)
    hon = pp.HigherOrderNetwork(p, k=k)
    fv = pp.algorithms.spectral.fiedler_vector_dense(hon)
    assert fv.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL)
    assert np.sum(fv) == pytest.approx(e_sum, abs=EIGEN_ABS_TOL)

Esempio n. 8

0

Mostra file

def test_strong_connected_tmp(random_temp_network):
    from pathpy.path_extraction.temporal_paths import paths_from_temporal_network_dag
    from pathpy.algorithms.components import connected_components
    from pathpy.classes.network import network_to_networkx
    from networkx import strongly_connected_components
    from pathpy.utils.log import Log, Severity
    Log.set_min_severity(Severity.WARNING)

    for delta in range(1, 900, 50):
        print(delta)
        tn = random_temp_network(n=10, m=100, min_t=0, max_t=800, seed=90)  # type: pp.TemporalNetwork
        obs_times = np.array([t[-1] for t in tn.tedges])
        obs_times.sort()

        p = paths_from_temporal_network_dag(tn, delta=delta)
        hn = pp.HigherOrderNetwork(p, k=2)

        # using NetworkX
        nx_network = network_to_networkx(hn)
        giant_size_nx = len(max(strongly_connected_components(nx_network), key=len))

        # using pathpy
        components = connected_components(hn)
        if giant_size_nx > 3:
            print(giant_size_nx)
        giant_size_pp = max(len(c) for c in components)

        assert giant_size_nx == giant_size_pp

Esempio n. 9

0

Mostra file

File: compare_db_graph_output.py Progetto: vrautela/De-Bruijn-Graph-Construction

def main():
    # TODO: make the input file a variable/command line argument?
    # Read input path data from the finished paths text file
    # my_file = open("paths_finished.txt", "r")
    my_file = open("basic_path_data", "r")
    path_data = [line.split(';') for line in my_file.read().splitlines()]

    # Create first graph using the basic construction algorithm
    print("Creating baseline graph")
    basic_graph = basic_db_graph_construction.constructDBGraph(path_data, 3)
    print("Finished")
    print()

    # Create second graph using the Divide and Conquer algorithm
    print("Creating graph using Divide and Conquer Algorithm")
    divide_conquer_graph = divide_and_conqer_db_graph_construction.constructDBGraph(
        path_data, 3)
    print("Finished")
    print()

    # Create the third graph using pathpy
    print("Reading path data from file into Paths object")
    paths = pp.Paths.read_file(filename="paths_finished.txt",
                               separator=';',
                               frequency=False,
                               expand_sub_paths=False)
    # paths = pp.Paths()
    # for path in path_data:
    #     paths.add_path(path, separator=";")
    print("Finished")
    print()

    print(paths)
    print()

    print("Creating HigherOrderNetwork using pathpy")
    pathpy_graph = pp.HigherOrderNetwork(paths, 3)
    print("Finished")
    print()

    # dictionary of edges to compare against the others
    # print(pathpy_graph.edges)
    comparable_pathpy_edges = convert_pathpy_edges_to_multiset(
        pathpy_graph.edges)

    basic_equals_divide_conquer = compare_dictionary_graphs(
        basic_graph, divide_conquer_graph)
    basic_equals_pathpy = compare_dictionary_graphs(basic_graph,
                                                    comparable_pathpy_edges)

    if basic_equals_divide_conquer:
        print("BASIC AND DIVIDE CONQUER ARE THE SAME")
    else:
        print("BASIC AND DIVIDE CONQUER ARE DIFFERENT")

    if basic_equals_pathpy:
        print("BASIC AND PATHPY ARE THE SAME")
    else:
        print("BASIC AND PATHPY ARE DIFFERENT")

Esempio n. 10

0

Mostra file

File: test_spectral.py Progetto: uzhdag/pathpy

def test_algebraic_connectivity(random_paths, k, e_sum):
    import pathpy
    p = random_paths(120, 0, 40)
    hon = pp.HigherOrderNetwork(p, k=k)
    ac = pp.algorithms.spectral.algebraic_connectivity(hon,
                                                       lanczos_vectors=60,
                                                       maxiter=40)
    assert ac == pytest.approx(e_sum, rel=1e-7)

Esempio n. 11

0

Mostra file

def test_eigen_centrality_hon(random_paths, sub, projection, k, e_sum, e_var):
    import numpy as np
    p = random_paths(50, 0, 8)
    hon = pp.HigherOrderNetwork(p, k=k)
    eigen = pp.algorithms.centralities.eigenvector(hon, projection, sub)
    values = np.array(list(eigen.values()))
    assert values.sum() == pytest.approx(e_sum, abs=EIGEN_ABS_TOL)
    assert values.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL)

Esempio n. 12

0

Mostra file

def test_closeness_centrality_hon(random_paths, k, e_sum, e_var):
    import numpy as np
    p = random_paths(50, 0, 8)
    hon = pp.HigherOrderNetwork(p, k=k)
    closeness = pp.algorithms.centralities.closeness(hon)
    np_closeness = np.array(list(closeness.values()))
    assert np_closeness.sum() == pytest.approx(e_sum)
    assert np_closeness.var() == pytest.approx(e_var)

Esempio n. 13

0

Mostra file

def test_distance_matrix_from_file(path_from_edge_file):
    p = path_from_edge_file
    hon = pp.HigherOrderNetwork(paths=p, k=1)
    d_matrix = shortest_paths.distance_matrix(hon)

    np_matrix = dict_of_dicts_to_matrix(d_matrix)
    assert np.sum(np_matrix) == 8
    assert np.min(np_matrix) == 0
    assert np.max(np_matrix) == 2

Esempio n. 14

0

Mostra file

def test_distance_matrix(random_paths, paths, n_nodes, k, e_var, e_sum):
    p = random_paths(paths, 20, num_nodes=n_nodes)
    hon = pp.HigherOrderNetwork(paths=p, k=k)
    d_matrix = shortest_paths.distance_matrix(hon)

    np_matrix = dict_of_dicts_to_matrix(d_matrix)

    assert np.var(np_matrix) == pytest.approx(e_var)
    assert np.sum(np_matrix) == e_sum

Esempio n. 15

0

Mostra file

File: test_spectral.py Progetto: uzhdag/pathpy

def test_eigen_value_gap(random_paths, k, sub, e_gap):
    import numpy as np
    p = random_paths(200, 0, 40)
    hon = pp.HigherOrderNetwork(p, k=k)
    np.random.seed(0)
    eigen_gap = pp.algorithms.spectral.eigenvalue_gap(hon,
                                                      include_sub_paths=sub,
                                                      lanczos_vectors=90)
    assert eigen_gap

Esempio n. 16

0

Mostra file

def test_pagerank_centrality_hon(random_paths, sub, proj, k, e_sum, e_var):
    import numpy as np
    p = random_paths(50, 0, 8)
    hon = pp.HigherOrderNetwork(p, k=k)
    page = pp.algorithms.centralities.pagerank(hon,
                                               include_sub_paths=sub,
                                               projection=proj)
    values = np.array(list(page.values()))
    assert values.sum() == pytest.approx(e_sum)
    assert values.var() == pytest.approx(e_var)

Esempio n. 17

0

Mostra file

def test_betweenness_centrality_hon(random_paths, norm, k, e_sum, e_var,
                                    e_max):
    import numpy as np
    p = random_paths(50, 0, 8)
    hon = pp.HigherOrderNetwork(p, k=k)
    betweenness = pp.algorithms.centralities.betweenness(hon, normalized=norm)
    values = np.array(list(betweenness.values()))
    assert values.sum() == pytest.approx(e_sum)
    assert max(values) == pytest.approx(e_max)
    assert values.var() == pytest.approx(e_var)

Esempio n. 18

0

Mostra file

def test_shortest_path_length(random_paths, paths, k, num_nodes, s_mean, s_var,
                              s_max):
    p = random_paths(paths, 10, num_nodes=num_nodes)
    hon = pp.HigherOrderNetwork(p, k=k)

    all_paths = shortest_paths.shortest_paths(hon)

    distances = dict_of_dicts_to_matrix(all_paths, agg=len)
    assert np.mean(distances) == pytest.approx(s_mean)
    assert np.var(distances) == pytest.approx(s_var)
    assert np.max(distances) == s_max

Esempio n. 19

0

Mostra file

def test_distance_matrix_first_order_eq_dist_matrix(random_paths, paths,
                                                    num_nodes):
    """test that the distance matrix of k=1 is equal to
    distance_matrix_first_order"""
    p = random_paths(paths, 10, num_nodes)
    hon = pp.HigherOrderNetwork(p, k=1)
    dist = shortest_paths.distance_matrix(hon)
    dist_alt = shortest_paths.distance_matrix(hon)
    m = dict_of_dicts_to_matrix(dist)
    m_alt = dict_of_dicts_to_matrix(dist_alt)
    assert np.allclose(m, m_alt)

Esempio n. 20

0

Mostra file

def test_estimate_order_2():
    # Example with second-order correlations
    paths = pp.Paths()

    paths.addPath('a,c')
    paths.addPath('b,c')
    paths.addPath('c,d')
    paths.addPath('c,e')

    for k in range(4):
        paths.addPath('a,c,d')
        paths.addPath('b,c,e')

    m = pp.MultiOrderModel(paths, maxOrder=2)
    assert m.estimateOrder(
        paths) == 2, "Error, did not detect second-order correlations"

    x = list(map(str, _np.random.choice(range(10), 100000)))
    ms = pp.MarkovSequence(x)
    assert ms.estimateOrder(maxOrder=2, method='BIC') == 1, \
        "Error, wrongly detected higher-order correlations"
    assert ms.estimateOrder(maxOrder=2, method='AIC') == 1, \
        "Error, wrongly detected higher-order correlations"

    g1 = pp.HigherOrderNetwork(paths, k=1)
    assert g1.vcount() == 5, \
        "Error, wrong number of nodes in first-order network"
    assert g1.ecount() == 4, \
        "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    assert g2.vcount() == 4, \
        "Error, wrong number of nodes in second-order network"
    assert g2.ecount() == 2, \
        "Error, wrong number of links in second-order network"

    g2.reduceToGCC()
    assert g2.vcount() == 1, \
        "Error, wrong number of nodes in giant connected component"
    assert g2.ecount() == 0, \
        "Error, wrong number of links in giant connected component"

Esempio n. 21

0

Mostra file

def test_transition_probability(random_paths, k, sub):
    paths = random_paths(30, 45, 14)
    hon = pp.HigherOrderNetwork(paths, k=k)
    T = hon.transition_matrix(include_subpaths=sub).toarray()
    if sub:
        transitions = sum(hon.nodes[w]["outweight"].sum() > 0
                          for w in hon.nodes)
    else:
        transitions = sum(hon.nodes[x]["outweight"][1] > 0 for x in hon.nodes)
    assert T.sum() == pytest.approx(transitions)
    assert np.all(T <= 1), "not all probabilities are smaller then 1"
    assert np.all(T >= 0), "not all probabilities are positive"

Esempio n. 22

0

Mostra file

File: compare_db_construction_runtimes.py Progetto: vrautela/De-Bruijn-Graph-Construction

def main():
    start_time = time.time()
    my_file = open("paths_finished.txt", "r")
    path_data = [line.split(';') for line in my_file.read().splitlines()]
    end_time = time.time()
    print("Read file in " + str(round(end_time - start_time, 2)) + " seconds")
    print(path_data[:10])

    K = 10

    # constructing each of the models from orders 1-10 using a basic method
    basic_models = []
    start_time = time.time()
    for k in range(1, K + 1):
        basic_models.append(
            basic_db_graph_construction.constructDBGraph(path_data, k))
    end_time = time.time()
    print("Basic: Constructed " + str(K) + "th order graph in " +
          str(round(end_time - start_time, 2)) + " seconds")

    # constructing each of the models from order 1-10 using divide and conquer
    dc_models = []
    start_time = time.time()
    for k in range(1, K + 1):
        dc_models.append(
            divide_and_conqer_db_graph_construction.constructDBGraph(
                path_data, k))
    end_time = time.time()
    print("Divide and Conquer: Constructed " + str(K) + "th order graph in " +
          str(round(end_time - start_time, 2)) + " seconds")

    # constructing a higher order network with all models of order 1-10 using pathpy
    start_time = time.time()
    # paths = pp.Paths()
    # for path in path_data:
    #     paths.add_path(path, separator=";")
    paths = pp.Paths.read_file(filename="paths_finished.txt",
                               separator=';',
                               frequency=False,
                               expand_sub_paths=False)
    end_time = time.time()
    print("Pathpy: Read file in " + str(round(end_time - start_time, 2)) +
          " seconds")

    # pathpy
    start_time = time.time()
    pathpy_graph = pp.HigherOrderNetwork(paths, K)
    end_time = time.time()
    print("Pathpy: Constructed " + str(K) + "th order graph in " +
          str(round(end_time - start_time, 2)) + " seconds")

Esempio n. 23

0

Mostra file

def test_extract_distribute(test_data_directory, ):
    network_path = os.path.join(test_data_directory, 'example_network.edges')
    od_path = os.path.join(test_data_directory, 'example_origin_destination.csv')

    # read the network topology
    p = pp.Paths.read_edges(network_path, undirected=True)
    network = pp.HigherOrderNetwork(p)

    OD = pp.path_extraction.read_origin_destination(od_path)

    paths = pp.path_extraction.paths_from_origin_destination(OD, network)

    assert (paths.paths[3][('A', 'B', 'F', 'H')][1] == 2.0 and
            paths.paths[3][('A', 'C', 'G', 'H')][1] == 3.0) or \
           (paths.paths[3][('A', 'B', 'F', 'H')][1] == 3.0 and
            paths.paths[3][('A', 'C', 'G', 'H')][1] == 2.0)
    assert paths.paths[3][('D', 'B', 'C', 'E')][1] == 7.0
    assert paths.paths[2][('A', 'B', 'F')][1] == 3.0
    assert paths.paths[2][('B', 'C', 'E')][1] == 3.0

Esempio n. 24

0

Mostra file

def test_model_size(random_paths, k, n_nodes, expected):
    p = random_paths(20, 10, n_nodes)
    hon_1 = pp.HigherOrderNetwork(p, k=k)
    assert np.allclose(hon_1.model_size(), expected)

Esempio n. 25

0

Mostra file

def test_degrees(path_from_edge_file):
    hon_1 = pp.HigherOrderNetwork(path_from_edge_file, k=1)
    expected_degrees = {'1': 52, '2': 0, '3': 2, '5': 5}
    for v in hon_1.nodes:
        assert expected_degrees[v] == hon_1.nodes[v]["outweight"][1], \
            "Wrong degree calculation in HigherOrderNetwork"

Esempio n. 26

0

Mostra file

File: 3_higher_order.py Progetto: mislam5285/csh2018-tutorial

""")

#%%
md("""
The data analysis and modelling framework outlined in these works builds on a generalisation of standard, first-order networks to $k$-dimensional De Bruijn graph models for paths in complex networks.

The class `HigherOrderNetwork` allows us to generate such higher-order network models of paths. In the documentation, we find that the constructor takes a parameter `paths`, i.e. the statistics of the observed paths that we want to model. With the parameter `k` we specify the order $k$ of the higher-order model that we want to fit. To understand this better, let us do this for our toy example.

<span style="color:red">**TODO:** Read the toy example from unit 1.2 from the file `data/toy_paths.ngram`, generate a **first-order** model instance `hon_1` and print a summary of the resulting instance.</span>
""")

#%% In [2]
toy_paths = pp.Paths.read_file('data/toy_paths.ngram')
print(toy_paths)

hon_1 = pp.HigherOrderNetwork(toy_paths, k=1)
print(hon_1)

#%%
md("""
This generates a first-order model of our paths, with five nodes $a,b,c,d$ and $e$, and four links $(a,c), (b,c), (c,d), (c,e)$. It is identicaly to the `Network` instance that we have previously created using `Network.from_paths`. Indeed, each `HigherOrderNetwork` instance is derived from the class `Network`, which means we can store edge and node attributes and visualise it by exactly the same methods.

<span style="color:red">**TODO:** Plot the `HigherOrderModel` instance `hon_1` and print the weight of all edges.</span>
""")

#%% In [3]
style = { 'label_offset': [0,-1], 'label_color' : 'black', 'width': 800, 'height': 250}
pp.visualisation.plot(hon_1, **style)
for e in hon_1.edges:
    print(e, hon_1.edges[e]['weight'])

Esempio n. 27

0

Mostra file

t = pp.TemporalNetwork.read_file('data/temporal_clusters.tedges')
style = {
    'max_time': 250,
    'ms_per_frame': 10,
    'ts_per_frame': 1
}
pp.visualisation.plot(t, **style)

#%% In [17]
walk = pp.algorithms.temporal_walk.generate_walk(t, 500)
style['ms_per_frame'] = 250
pp.visualisation.plot_walk(pp.Network.from_temporal_network(t), walk, **style)

#%% In [18]
p = pp.path_extraction.paths_from_temporal_network_dag(t)
hon_2 = pp.HigherOrderNetwork(p, k=2)

clusters = { v: 'red' if len(v)<2 else ('green' if v.startswith('1') else 'blue') for v in p.nodes}

pp.visualisation.plot(hon_2, plot_higher_order_nodes=False, node_color = clusters)

#%% In [19]
pp.visualisation.plot_walk(hon_2, walk, **style, plot_higher_order_nodes=False)

#%% In [20]
hon_3 = pp.HigherOrderNetwork(p, k=3)
pp.visualisation.plot(hon_3, plot_higher_order_nodes=False, node_color = clusters)

#%% In [21]
print('Second-order model: {0}'.format(pp.algorithms.spectral.algebraic_connectivity(hon_2)))

Esempio n. 28

0

Mostra file

File: views.py Progetto: roshnipadhi/embolden

def run(card_text):
    numFrac = 3.5

    def similarity(s1, s2):
        wList1 = []
        for i in range(0, len(s1)):
            wList1.append(s1[i])
        wList2 = []
        for i in range(0, len(s2)):
            wList2.append(s2[i])
        denom = math.log(len(s1)) + math.log(len(s2))

        count = 0

        for word in wList1:
            if word in wList2:
                count += 1
        score = count / denom
        return score

    fWriterInt = open('cardSpaced.txt', 'w')

    fullText = card_text
    combinedParagraphs = " ".join(line.strip() for line in fullText)

    LineList = combinedParagraphs.split('. ')
    for line in LineList:
        fWriterInt.write(line.strip() + "." + "\n")

    fWriterInt.close()

    fReaderSentences = open('cardSpaced.txt', 'r')
    fWriteNodes = open('sentenceNodes.txt', 'w')
    fWriteAllEdges = open('sentenceAllEdgeWeights.txt', 'w')

    sentences = fReaderSentences.readlines()
    length = len(sentences)
    numSentences = int(length / numFrac)

    for i in range(0, len(sentences)):
        fWriteNodes.write(str(i) + "\n")

    sentenceListLists = []

    for sent1 in sentences:
        for sent2 in sentences:
            i1 = sentences.index(sent1)
            i2 = sentences.index(sent2)
            sentenceList = []
            score = similarity(sent1, sent2)
            sentenceList.append(str(i1))
            sentenceList.append(str(i2))
            sentenceList.append(str(score))
            sentenceListLists.append(sentenceList)

    sentenceListLists.sort(key=lambda x: float(x[2]))
    sentenceListLists.reverse()
    fWriteSortEdges = open('sentenceSortedEdgeWeights.txt', 'w')

    for list in sentenceListLists:
        fWriteSortEdges.write(list[1] + "," + list[0] + "," +
                              str(round(float(list[2]))) + "\n")

    fWriteSortEdges.close()

    paths = pp.Paths.read_edges('sentenceSortedEdgeWeights.txt', weight=True)
    network = pp.HigherOrderNetwork(paths, k=1)

    prDict = pp.algorithms.centralities.pagerank(network, weighted=True)
    print(prDict)

    prListTotal = []
    for tuple in prDict:
        prList = []
        prList.append(tuple)
        prList.append(prDict[tuple])
        prListTotal.append(prList)

    prListTotal.sort(key=lambda x: float(x[1]))
    prListTotal.reverse()

    fSummarizedSentences = open('keySentences.txt', 'w')

    newList = []
    for i in range(0, numSentences):
        newList.append(int(prListTotal[i][0]))
    newList.sort()

    return newList

Esempio n. 29

0

Mostra file

File: pathpy_graph_construction.py Progetto: vrautela/De-Bruijn-Graph-Construction

import pathpy as pp
import time


# paths = pp.Paths.read_file(filename="paths_finished.txt", separator=';', frequency=False, expand_sub_paths=False)
paths = pp.Paths.read_file(filename="paths_finished.txt", separator=';', frequency=False, expand_sub_paths=False)
start_time = time.time()
graph = pp.HigherOrderNetwork(paths, 3)
end_time = time.time()

print("Running time of pathpy: " + str(round(end_time - start_time, 2)) + " seconds")

print(dict(list(graph.edges.items())))

Esempio n. 30

0

Mostra file

#%% In [1]
import pathpy as pp

toy_paths = pp.Paths()
toy_paths.add_path('a,c,d', 2)
toy_paths.add_path('b,c,e', 2)
print(toy_paths)

#%% In [2]
hon_1 = pp.HigherOrderNetwork(toy_paths)
pp.visualisation.plot(hon_1)
print(hon_1.transition_matrix())

#%% In [3]
print(hon_1.likelihood(toy_paths, log=False))

#%% In [4]
hon_2 = pp.HigherOrderNetwork(toy_paths, k=2)
print(hon_2.transition_matrix())
hon_2.likelihood(toy_paths, log=False)

#%% In [5]
hon_2_null = pp.HigherOrderNetwork(toy_paths, k=2, null_model=True)
pp.visualisation.plot(hon_2_null)
print(hon_2.transition_matrix())
hon_2_null.likelihood(toy_paths, log=False)

#%% In [6]
from scipy.stats import chi2

d = hon_2.degrees_of_freedom() - hon_1.degrees_of_freedom()