Exemple #1
0
def test_write_state_file(random_paths, k, tmpdir):
    file_path = str(tmpdir.mkdir("sub").join("multi_order_state"))
    p = random_paths(20, 40, 6)
    multi = pp.MultiOrderModel(p, max_order=k)

    for i in range(1, k+1):
        multi.save_state_file(file_path + '.' + str(i), layer=i)
Exemple #2
0
def test_single_path_likelihood(random_paths):
    p1 = random_paths(size=10, rnd_seed=20, num_nodes=10)  # type: pp.Paths
    p2 = random_paths(size=100, rnd_seed=0, num_nodes=50)
    p12 = p1 + p2
    mom = pp.MultiOrderModel(p12, max_order=3)
    lkh1 = mom.likelihood(p1)
    lkh2 = mom.likelihood(p2)
    lkh12 = mom.likelihood(p12)

    assert lkh1 > lkh2  # second paths must be
    assert (lkh1 + lkh2) == pytest.approx(lkh12)

    assert mom.path_likelihood(('1', '2'), layer=0, freq=4) < 0

    lkl_last = None
    for i in range(3):  # likelihoods must be increasing
        lkl = mom.path_likelihood(('6', '7', '2', '0', '6'), layer=i, freq=9)
        if lkl_last is not None:
            assert lkl >= lkl_last
            lkl_last = lkl

    path_likelihoods = []
    for p, freq in p12.paths[3].items():  # print the path with the highest likelihood
        lkl = mom.path_likelihood(p, layer=2, freq=freq.sum(), log=False)
        path_likelihoods.append((lkl, p))

    assert max(path_likelihoods)[1] == ('23', '32', '19', '8')
Exemple #3
0
def test_save_statefile(random_paths, tmpdir):
    file_path = str(tmpdir.join("statefile.sf"))
    p = random_paths(3, 20, 6)
    multi = pp.MultiOrderModel(p, max_order=2)
    multi.save_state_file(file_path, layer=2)
    with open(file_path) as f:
        for line in f:
            assert '{' not in line  # make sure that we did not write a dictionary
Exemple #4
0
def test_estimate_order_2():
    # Example with second-order correlations
    paths = pp.Paths()

    paths.add_path('a,c')
    paths.add_path('b,c')
    paths.add_path('c,d')
    paths.add_path('c,e')

    for k in range(4):
        paths.add_path('a,c,d')
        paths.add_path('b,c,e')

    m = pp.MultiOrderModel(paths, max_order=2)
    assert m.estimate_order() == 2
Exemple #5
0
def test_estimate_order_1():
    """Example without second-order correlations"""
    paths = pp.Paths()

    paths.add_path('a,c')
    paths.add_path('b,c')
    paths.add_path('c,d')
    paths.add_path('c,e')

    for k in range(4):
        paths.add_path('a,c,d')
        paths.add_path('b,c,e')
        paths.add_path('b,c,d')
        paths.add_path('a,c,e')

    m = pp.MultiOrderModel(paths, max_order=2)
    assert m.estimate_order() == 1, \
        "Error, wrongly detected higher-order correlations"
Exemple #6
0
def test_estimate_order_2():
    # Example with second-order correlations
    paths = pp.Paths()

    paths.addPath('a,c')
    paths.addPath('b,c')
    paths.addPath('c,d')
    paths.addPath('c,e')

    for k in range(4):
        paths.addPath('a,c,d')
        paths.addPath('b,c,e')

    m = pp.MultiOrderModel(paths, maxOrder=2)
    assert m.estimateOrder(
        paths) == 2, "Error, did not detect second-order correlations"

    x = list(map(str, _np.random.choice(range(10), 100000)))
    ms = pp.MarkovSequence(x)
    assert ms.estimateOrder(maxOrder=2, method='BIC') == 1, \
        "Error, wrongly detected higher-order correlations"
    assert ms.estimateOrder(maxOrder=2, method='AIC') == 1, \
        "Error, wrongly detected higher-order correlations"

    g1 = pp.HigherOrderNetwork(paths, k=1)
    assert g1.vcount() == 5, \
        "Error, wrong number of nodes in first-order network"
    assert g1.ecount() == 4, \
        "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    assert g2.vcount() == 4, \
        "Error, wrong number of nodes in second-order network"
    assert g2.ecount() == 2, \
        "Error, wrong number of links in second-order network"

    g2.reduceToGCC()
    assert g2.vcount() == 1, \
        "Error, wrong number of nodes in giant connected component"
    assert g2.ecount() == 0, \
        "Error, wrong number of links in giant connected component"
Exemple #7
0
#%% In [1]
import pathpy as pp

t = pp.TemporalNetwork.read_file('data/temporal_clusters.tedges')
paths = pp.path_extraction.paths_from_temporal_network_dag(t)

mog = pp.MultiOrderModel(paths, 3)

# Color nodes according to known ground-truth clusters
clusters = { v: 'red' if len(v)<2 else ('green' if v.startswith('1') else 'blue') for v in paths.nodes}

pp.visualisation.plot(mog.layers[mog.estimate_order()], plot_higher_order_nodes=False, node_color=clusters)

#%% In [2]
from random import shuffle

edges = [(v,w) for (v,w,t) in t.tedges]
times = [t for (v,w,t) in t.tedges]
shuffle(times)

t_shuffled = pp.TemporalNetwork()
for i in range(len(edges)):
    t_shuffled.add_edge(edges[i][0], edges[i][1], times[i])
    
paths = pp.path_extraction.paths_from_temporal_network_dag(t_shuffled)

mog = pp.MultiOrderModel(paths, 3)

clusters = { v: 'red' if len(v)<2 else ('green' if v.startswith('1') else 'blue') for v in paths.nodes}

pp.visualisation.plot(mog.layers[mog.estimate_order()], plot_higher_order_nodes=False, node_color=clusters)
Exemple #8
0
hon_1 = pp.HigherOrderNetwork(p, k=1)
hon_2 = pp.HigherOrderNetwork(p, k=2, null_model=True)
hon_5 = pp.HigherOrderNetwork(p, k=5, null_model=True)

print(hon_1.likelihood(p, log=False))
print(hon_2.likelihood(p, log=False))
print(hon_5.likelihood(p, log=False))

#%% In [9]
print('Path consists of {0} nodes'.format(len(path)))
print('Number of transitions in  first-order model = ', str(len(hon_1.path_to_higher_order_nodes(path)[1:])))
print('Number of transitions in second-order model = ', str(len(hon_2.path_to_higher_order_nodes(path)[1:])))
print('Number of transitions in  fifth-order model = ', str(len(hon_5.path_to_higher_order_nodes(path)[1:])))

#%% In [10]
mog = pp.MultiOrderModel(toy_paths, max_order=2)
print(mog)

pp.visualisation.plot(mog.layers[0])
pp.visualisation.plot(mog.layers[1])
pp.visualisation.plot(mog.layers[2])

#%% In [11]
mog = pp.MultiOrderModel(toy_paths, max_order=2)

d = mog.degrees_of_freedom(max_order=2) - mog.degrees_of_freedom(max_order=1)
x = - 2 * (mog.likelihood(toy_paths, log=True, max_order=1) - mog.likelihood(toy_paths, log=True, max_order=2))
p = 1 - chi2.cdf(x, d)

print('p value of null hypothesis that data has maximum order 1 = {0}'.format(p))
Exemple #9
0
def test_summary_multi_order_model(random_paths):
    p = random_paths(90, 90)
    multi = pp.MultiOrderModel(paths=p, maxOrder=3)
    print(multi)
Exemple #10
0
def estimate_user_kopt(user, top_nodes):

    USER = user

    ##PATH COLLECTION

    paths = list()
    path = list()
    filename = PATH + FILENAME
    with open(filename, 'r', encoding='utf-8') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter='\t')
        print(f"Parsed file: {FILENAME}")
        line_count = 0
        user_count = 0
    
        user_last_clicks = {}
        for row in csv_reader:
            # Ignoring header row
            if line_count == 0:
                print(f'Columns: {", ".join(row)}')
                line_count += 1
                # Ignoring data from other users
            elif USER == "all":
                line_count += 1
                user = row[2]
                article = row[3]
                game = row[4]
          
                if user_last_clicks.get('game', "") == game:
                    if user_last_clicks['article'] != article:
                        path.append(article)
                else:
                    if len(path) != 0:
                        paths.append(path)
                    
                    path = list()
                    path.append(article)
                user_last_clicks = {"article": article, "game": game}               
            elif row[2] == USER:
                line_count += 1
                user = row[2]
                article = row[3]
                game = row[4]
          
                if user_last_clicks.get('game', "") == game:
                    if user_last_clicks['article'] != article:
                        path.append(article)
                else:
                    if len(path) != 0:
                        paths.append(path)
                    
                    path = list()
                    path.append(article)
                user_last_clicks = {"article": article, "game": game}
            else:
                continue

    ##PATH FILTERING

    top_node_number=top_nodes
    flat_list=Counter([item for path in paths for item in path])
    #print(flat_list)
    sorted_nodes=[ x[0] for x in sorted( flat_list.items() , key=lambda x: x[1], reverse=True)]
    top_sorted_nodes=sorted_nodes[0:top_node_number]
    #print(top_sorted_nodes, end="\n\n")

    paths_reduced = list()
    for path in paths:
        runs = listrun(path, top_sorted_nodes)
        for run in runs:
            paths_reduced.append(run)
    #print(paths_reduced)

    ## Add paths to pathpy 
    p = pp.Paths()
    for path in paths_reduced:
        p.add_path(path)
    print(p)
                        
    mog = pp.MultiOrderModel(p, max_order=2)
    #print('Optimal order = ', mog.estimate_order())
    return (len(paths_reduced), mog.estimate_order())
Exemple #11
0
def test_print(random_paths):
    p = random_paths(90, 0, 20)
    multi = pp.MultiOrderModel(p, max_order=3)
    print(multi)
Exemple #12
0
def test_test_network_hypothesis_values(random_paths, k, method, e_ic0, e_ic1):
    p = random_paths(20, 40, 6)
    multi = pp.MultiOrderModel(p, max_order=k)
    (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method)
    assert e_ic0 == pytest.approx(ic0)
    assert e_ic1 == pytest.approx(ic1)
Exemple #13
0
def test_test_network_hypothesis(random_paths, k, method):
    p = random_paths(20, 40, 6)
    multi = pp.MultiOrderModel(p, max_order=k)
    (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method)
Exemple #14
0
def test_init(random_paths, k):
    p = random_paths(90, 0, 20)
    multi = pp.MultiOrderModel(p, max_order=k)
    assert len(multi.layers) == k+1
Exemple #15
0
for order in range(2, 5):
    # generate random (strongly connected) network
    g = igraph.Graph.Erdos_Renyi(n=n,
                                 m=int(n * deg),
                                 directed=True,
                                 loops=True).clusters(mode='STRONG').giant()
    # generate k-th-order path model
    pathModel = KOrderPathModel.KOrderPathModel(g, k=int(order))
    batch = 1
    while batch <= 20:
        try:
            pathset = pathModel.generatePaths(pathCount=ceil(10**(0.25 *
                                                                  batch)),
                                              pathLength=20)

            model = pp.MultiOrderModel(pathset, max_order=order + 1)
            #estimate optimal order
            optimal_order = model.estimate_order()
            print('k = ' + str(order) + ', batch = ' + str(batch) + ':')
            print('the optimal Markovian order of the data is ' +
                  str(optimal_order))
            detected_order[order - 2].append(optimal_order)
        except:
            print('k = ' + str(order) + ', batch = ' + str(batch) +
                  ' PathsTooShort')
            print('retrying...')
        else:
            batch += 1

print(detected_order)
Exemple #16
0
original_path_set.pop()
for i in range(len(original_path_set)):
    temp = original_path_set[i].split('+')
    temp.remove('')
    if i != 0:
        temp.remove('')
    original_path_set[i] = temp

#extract paths without redundant nodes
real_path_set = pathpy.Paths()
print('generating pathset without redundant nodes...')
break_and_add_path(original_path_set, real_path_set)

print('information of the pathset without redundant nodes:')
print(real_path_set)

#The high-order model is generated from the path set S and named as Model. The maximum order is preliminarily set as 5. If the final estimated optimal order is the same as the maximum order, the maximum order should be increased
max_order = 5
success = False
while not success:
    try:
        model = pathpy.MultiOrderModel(real_path_set, max_order=max_order)
        success = True
    except:
        max_order -= 1

#estimate optimal order of the pathset
print('detecting the optimal order...')
optimal_order = model.estimate_order()
print('the optimal Markovian order of the data is ' + str(optimal_order))