def scramble(text, path=None): if path: file = open(path, 'r').read() sentences = tokenizer.tokenize(file) sentences = tokenizer.tokenize(text) split_sent = [] for s in sentences: s = s.translate(str.maketrans('', '', punctuation)) split_sent.append(s.lower().split()) for sentence in split_sent: for i in range(len(sentence)): p = morph.parse(sentence[i])[0] sentence[i] = p.normal_form shuff_sent = shuffle_t(split_sent) a = Persistent(split_sent=split_sent, min_count=5, window=5) dgms_n = a.persistent() b = Persistent(split_sent=shuff_sent, min_count=5, window=5) dgms_sh = b.persistent() return dn.wasserstein_distance(dgms_n[0], dgms_sh[0])
def scramble(text, path=None): if path: file = open(path, 'r').read() sentences = tokenizer.tokenize(file) sentences = tokenizer.tokenize(text) split_sent = [] for s in sentences: s = s.translate(str.maketrans('', '', punctuation)) split_sent.append(s.lower().split()) for sentence in split_sent: for i in range(len(sentence)): p = morph.parse(sentence[i])[0] sentence[i] = p.normal_form shuff_sent = shuffle_t(split_sent) a = Persistent(split_sent=split_sent, min_count=5, window=5) dgms_n = a.persistent() b = Persistent(split_sent=shuff_sent, min_count=5, window=5) dgms_sh = b.persistent() return dn.wasserstein_distance(dgms_n[0], dgms_sh[0])
def compute_bottleneck_distance(all_seeds_rips_files, remove_infinity=False, compute_wass_distance=False, use_persim=False, M=10): matrix = [] x = [] y = [] for file1 in all_seeds_rips_files: print('Computing file: {}'.format(file1)) row = np.zeros(len(all_seeds_rips_files)) # example file1: LTHT/remote_data/saves/alexnet_nmp/mnist/42/pickle/8.pickle split1_name = file1.split('/') # print(split1_name) seed, model_name, dataset, file1_name = split1_name[-5], split1_name[ -7], split1_name[-6], split1_name[-1] # appending 'alexnet_nmp-mnist-42-8' x.append(model_name + "-" + dataset + "-" + seed + "-" + file1_name.split(".")[0]) rips1 = pickle.load(open(file1, 'rb')) if remove_infinity: l1 = list(rips1['dgms'][0][rips1['dgms'][0][:, 1] < np.inf]) else: l1 = list(rips1['dgms'][0]) d1 = dion.Diagram(l1) for i, file2 in enumerate(all_seeds_rips_files): rips2 = pickle.load(open(file2, 'rb')) if remove_infinity: l2 = list(rips2['dgms'][0][rips2['dgms'][0][:, 1] < np.inf]) else: l2 = list(rips2['dgms'][0]) d2 = dion.Diagram(l2) if compute_wass_distance: if use_persim: wdist = persim.sliced_wasserstein_kernel(d1, d2, M=M) else: wdist = dion.wasserstein_distance(d1, d2, q=2) row[i] = wdist else: if use_persim: bdist = persim.bottleneck(d1, d2) else: bdist = dion.bottleneck_distance(d1, d2) row[i] = bdist matrix.append(row) # x = list( map( lambda y: '{}-{} seed:{}-{}'.format( y.split('-')[0], y.split('-')[1], y.split('-')[2], y.split('-')[3]), x)) return matrix, x
def wasserstein_dist_mat(pds): diag = pds2diagrams(pds); n = len(pds); K = np.zeros((n, n)); for i in range(n): print(str(i) + '/' + str(n)); for j in range(i, n): # print str(i) + '/' + str(n) + ' (' + str(j) + ')' K[i, j] = d.wasserstein_distance(diag[i], diag[j]); K[j, i] = K[i, j]; return K;
def comp_row(i, p, dic, out): n = dic['n']; diag = dic['diag']; z = int(n/p) + 1; I = [x*p+i for x in range(z) if x*p+i < n]; # print(I); start = 0; end = 0; for r in I: row = np.zeros(n); lb = r*n; rb = (r+1)*n; print(str(r) + '/' + str(n) + '\t(' + str(end - start) + 's)') start = time.time(); for j in range(r, n): # print(str(j) + '\t' + str(r) + '/' + str(n)) row[j] = d.wasserstein_distance(diag[r], diag[j]); out[lb:rb] = row; # print(row); end = time.time();
def test_issue39(): dgm1 = np.loadtxt('data/issue39/dgm1.txt', delimiter=',') dgm2 = np.loadtxt('data/issue39/dgm2.txt', delimiter=',') dgm1 = d.Diagram(dgm1) dgm2 = d.Diagram(dgm2) dist = d.wasserstein_distance(dgm1,dgm2,q=5)
def wasserstein_distance(dia1, dia2, p=1): return dionysus.wasserstein_distance(dia1, dia2, p)
def wasserstein_distance(barcode1, barcode2): dg1, dg2 = barcode_to_diagram(barcode1), barcode_to_diagram(barcode2) return di.wasserstein_distance(dg1, dg2)
distances = [get_distance(other_point, point) for other_point in trees] trees = trees[np.argpartition(distances, num_points)] samples.append(trees[:num_points]) # closest num_points samples = np.asarray(samples) for i in range(len(samples)): np.savetxt('./samples/samples{:d}.csv'.format(i), samples[i], delimiter=',') # Constructing the persistent homology diagrams and distance matrix dgms_list = [] for i in range(len(samples)): sample_set = samples[i] simplices = diode.fill_alpha_shapes(sample_set) f = d.Filtration(simplices) m = d.homology_persistence(f) dgms = d.init_diagrams(m, f) dgms_list.append(dgms[homology_dimension]) distance_matrix = np.zeros((len(dgms_list), len(dgms_list))) for i in range(len(dgms_list)): for j in range(i): # Chose Wasserstein, q=1 to emphasise many near-diagonal points dist = d.wasserstein_distance(dgms_list[i], dgms_list[j], q=1) distance_matrix[i, j] = dist distance_matrix[j, i] = distance_matrix[i, j] np.savetxt('./processed_data/dist_matrix.csv', distance_matrix, delimiter=',')
def wasserstein_distance(dia1, dia2, p=1): return dionysus.wasserstein_distance(dia1, dia2, p)