Beispiel #1
0
def scramble(text, path=None):

    if path:
        file = open(path, 'r').read()
        sentences = tokenizer.tokenize(file)

    sentences = tokenizer.tokenize(text)

    split_sent = []

    for s in sentences:
        s = s.translate(str.maketrans('', '', punctuation))
        split_sent.append(s.lower().split())

    for sentence in split_sent:
        for i in range(len(sentence)):
            p = morph.parse(sentence[i])[0]
            sentence[i] = p.normal_form

    shuff_sent = shuffle_t(split_sent)

    a = Persistent(split_sent=split_sent, min_count=5, window=5)
    dgms_n = a.persistent()

    b = Persistent(split_sent=shuff_sent, min_count=5, window=5)
    dgms_sh = b.persistent()

    return dn.wasserstein_distance(dgms_n[0], dgms_sh[0])
Beispiel #2
0
def scramble(text, path=None):

    if path:
        file = open(path, 'r').read()
        sentences = tokenizer.tokenize(file)

    sentences = tokenizer.tokenize(text)

    split_sent = []

    for s in sentences:
        s = s.translate(str.maketrans('', '', punctuation))
        split_sent.append(s.lower().split())

    for sentence in split_sent:
        for i in range(len(sentence)):
            p = morph.parse(sentence[i])[0]
            sentence[i] = p.normal_form

    shuff_sent = shuffle_t(split_sent)

    a = Persistent(split_sent=split_sent, min_count=5, window=5)
    dgms_n = a.persistent()

    b = Persistent(split_sent=shuff_sent, min_count=5, window=5)
    dgms_sh = b.persistent()

    return dn.wasserstein_distance(dgms_n[0], dgms_sh[0])
def compute_bottleneck_distance(all_seeds_rips_files,
                                remove_infinity=False,
                                compute_wass_distance=False,
                                use_persim=False,
                                M=10):
    matrix = []
    x = []
    y = []
    for file1 in all_seeds_rips_files:
        print('Computing file: {}'.format(file1))
        row = np.zeros(len(all_seeds_rips_files))
        # example file1: LTHT/remote_data/saves/alexnet_nmp/mnist/42/pickle/8.pickle
        split1_name = file1.split('/')
        # print(split1_name)
        seed, model_name, dataset, file1_name = split1_name[-5], split1_name[
            -7], split1_name[-6], split1_name[-1]
        # appending 'alexnet_nmp-mnist-42-8'
        x.append(model_name + "-" + dataset + "-" + seed + "-" +
                 file1_name.split(".")[0])

        rips1 = pickle.load(open(file1, 'rb'))
        if remove_infinity:
            l1 = list(rips1['dgms'][0][rips1['dgms'][0][:, 1] < np.inf])
        else:
            l1 = list(rips1['dgms'][0])
        d1 = dion.Diagram(l1)

        for i, file2 in enumerate(all_seeds_rips_files):
            rips2 = pickle.load(open(file2, 'rb'))

            if remove_infinity:
                l2 = list(rips2['dgms'][0][rips2['dgms'][0][:, 1] < np.inf])
            else:
                l2 = list(rips2['dgms'][0])

            d2 = dion.Diagram(l2)

            if compute_wass_distance:
                if use_persim:
                    wdist = persim.sliced_wasserstein_kernel(d1, d2, M=M)
                else:
                    wdist = dion.wasserstein_distance(d1, d2, q=2)
                row[i] = wdist
            else:
                if use_persim:
                    bdist = persim.bottleneck(d1, d2)
                else:
                    bdist = dion.bottleneck_distance(d1, d2)
                row[i] = bdist

        matrix.append(row)
    #
    x = list(
        map(
            lambda y: '{}-{} seed:{}-{}'.format(
                y.split('-')[0],
                y.split('-')[1],
                y.split('-')[2],
                y.split('-')[3]), x))
    return matrix, x
def wasserstein_dist_mat(pds):
    diag = pds2diagrams(pds);
    n = len(pds);
    K = np.zeros((n, n));
    for i in range(n):
        print(str(i) + '/' + str(n));
        for j in range(i, n):
            # print str(i) + '/' + str(n) + ' (' + str(j) + ')'
            K[i, j] = d.wasserstein_distance(diag[i], diag[j]);
            K[j, i] = K[i, j];
    return K;
def comp_row(i, p, dic, out):
    n = dic['n'];
    diag = dic['diag'];
    z = int(n/p) + 1;

    I = [x*p+i for x in range(z) if x*p+i < n];
    # print(I);
    start = 0;
    end = 0;
    
    for r in I:
        row = np.zeros(n);
        lb = r*n;
        rb = (r+1)*n;

        print(str(r) + '/' + str(n) + '\t(' + str(end - start) + 's)')
        start = time.time();
        for j in range(r, n):
            # print(str(j) + '\t' + str(r) + '/' + str(n))
            row[j] = d.wasserstein_distance(diag[r], diag[j]);
        out[lb:rb] = row;
        # print(row);
        end = time.time();
Beispiel #6
0
def test_issue39():
    dgm1 = np.loadtxt('data/issue39/dgm1.txt', delimiter=',')
    dgm2 = np.loadtxt('data/issue39/dgm2.txt', delimiter=',')
    dgm1 = d.Diagram(dgm1)
    dgm2 = d.Diagram(dgm2)
    dist = d.wasserstein_distance(dgm1,dgm2,q=5)
Beispiel #7
0
def wasserstein_distance(dia1, dia2, p=1):
    return dionysus.wasserstein_distance(dia1, dia2, p)
Beispiel #8
0
def wasserstein_distance(barcode1, barcode2):
    dg1, dg2 = barcode_to_diagram(barcode1), barcode_to_diagram(barcode2)
    return di.wasserstein_distance(dg1, dg2)
        distances = [get_distance(other_point, point) for other_point in trees]
        trees = trees[np.argpartition(distances, num_points)]
        samples.append(trees[:num_points])  # closest num_points

samples = np.asarray(samples)

for i in range(len(samples)):
    np.savetxt('./samples/samples{:d}.csv'.format(i),
               samples[i],
               delimiter=',')

# Constructing the persistent homology diagrams and distance matrix
dgms_list = []
for i in range(len(samples)):
    sample_set = samples[i]
    simplices = diode.fill_alpha_shapes(sample_set)
    f = d.Filtration(simplices)
    m = d.homology_persistence(f)
    dgms = d.init_diagrams(m, f)
    dgms_list.append(dgms[homology_dimension])

distance_matrix = np.zeros((len(dgms_list), len(dgms_list)))
for i in range(len(dgms_list)):
    for j in range(i):
        # Chose Wasserstein, q=1 to emphasise many near-diagonal points
        dist = d.wasserstein_distance(dgms_list[i], dgms_list[j], q=1)
        distance_matrix[i, j] = dist
        distance_matrix[j, i] = distance_matrix[i, j]

np.savetxt('./processed_data/dist_matrix.csv', distance_matrix, delimiter=',')
Beispiel #10
0
def wasserstein_distance(dia1, dia2, p=1):
    return dionysus.wasserstein_distance(dia1, dia2, p)