def test_inf_deathtime(self): dgm = np.array([[1, 2]]) empty = np.array([[0, np.inf]]) with pytest.warns(UserWarning, match="dgm1 has points with non-finite death") as w: dist1 = bottleneck(empty, dgm) with pytest.warns(UserWarning, match="dgm2 has points with non-finite death") as w: dist2 = bottleneck(dgm, empty) assert (dist1 == 0.5) and (dist2 == 0.5)
def compute_bottleneck_distance(all_seeds_rips_files, remove_infinity=False, compute_wass_distance=False, use_persim=False, M=10): matrix = [] x = [] y = [] for file1 in all_seeds_rips_files: print('Computing file: {}'.format(file1)) row = np.zeros(len(all_seeds_rips_files)) # example file1: LTHT/remote_data/saves/alexnet_nmp/mnist/42/pickle/8.pickle split1_name = file1.split('/') # print(split1_name) seed, model_name, dataset, file1_name = split1_name[-5], split1_name[ -7], split1_name[-6], split1_name[-1] # appending 'alexnet_nmp-mnist-42-8' x.append(model_name + "-" + dataset + "-" + seed + "-" + file1_name.split(".")[0]) rips1 = pickle.load(open(file1, 'rb')) if remove_infinity: l1 = list(rips1['dgms'][0][rips1['dgms'][0][:, 1] < np.inf]) else: l1 = list(rips1['dgms'][0]) d1 = dion.Diagram(l1) for i, file2 in enumerate(all_seeds_rips_files): rips2 = pickle.load(open(file2, 'rb')) if remove_infinity: l2 = list(rips2['dgms'][0][rips2['dgms'][0][:, 1] < np.inf]) else: l2 = list(rips2['dgms'][0]) d2 = dion.Diagram(l2) if compute_wass_distance: if use_persim: wdist = persim.sliced_wasserstein_kernel(d1, d2, M=M) else: wdist = dion.wasserstein_distance(d1, d2, q=2) row[i] = wdist else: if use_persim: bdist = persim.bottleneck(d1, d2) else: bdist = dion.bottleneck_distance(d1, d2) row[i] = bdist matrix.append(row) # x = list( map( lambda y: '{}-{} seed:{}-{}'.format( y.split('-')[0], y.split('-')[1], y.split('-')[2], y.split('-')[3]), x)) return matrix, x
def test_single(self): d = bottleneck( np.array([[0.5, 1]]), np.array([[0.5, 1.1]]) ) # These are very loose bounds assert d == pytest.approx(0.1, 0.001)
def test_matching_to_self(self): # Matching a diagram to itself should yield 0 pd = np.array([[0. , 1.71858561], [0. , 1.74160683], [0. , 2.43430877], [0. , 2.56949258], [0. , np.inf]]) dist = bottleneck(pd, pd) assert dist == 0
def bottleneck_dist_mat(gdat, verbose=True): """ Generate distance matrix for persistence diagrams of images INPUTS gdat - gestures data matrix; use output of load_data OUTPUTS distance matrix using bottleneck metric """ ## code below taken from gen_all_pds.py - look there for os cmds and saving # iterate through all subjects pd_dict = dict() for sbj, sdict in gdat.items(): # Dictionary of each subject with all gestures for gnum, garray in sdict.items(): # loop through each signal in the gesture t_axis = garray[:, 0] # time data for s in range(1, garray.shape[1] - 1): # sublevel set filtraton sls = sublevel_set_time_series_dist(garray[:, s]) # generate persistence diagram pd = ripser(sls, distance_matrix=True) # remove inf persistence point pd["dgms"][0] = pd["dgms"][0][np.isfinite(pd["dgms"][0][:, 1]), :] pd_dict[sbj + "_" + gnum] = pd # ordered list of keys klist = [k for k in pd_dict.keys()] klist.sort() # ordered ascending by subject, gesture # initialize bottleneck distance matrix bd_mat = np.zeros(len(klist)**2).reshape(len(klist), len(klist)) for n, k in enumerate(klist): if verbose: ### progress bar ### pb = "~" * (int(n / len(klist) * 100)) + " " * (int( (1 - n / len(klist)) * 100)) + "|" print(pb, end="\r") #################### for m, j in enumerate(klist): if n == m: bd_mat[n, m] = 0.0 else: bd_mat[n, m] = bottleneck(pd_dict[k]["dgms"][0], pd_dict[j]["dgms"][0]) return bd_mat
def test_matching(self): dgm1 = np.array([[0.5, 1], [0.6, 1.1]]) dgm2 = np.array([ [0.5, 1.1], [0.6, 1.1], [0.8, 1.1], [1.0, 1.1], ]) d, (m, D) = bottleneck(dgm1, dgm2, matching=True) # These are very loose bounds assert len(m) == len(dgm1) + len(dgm2) assert D.shape == (len(dgm1) + len(dgm2), len(dgm1) + len(dgm2))
def test_matching(self): dgm1 = np.array([[0.5, 1], [0.6, 1.1]]) dgm2 = np.array([ [0.5, 1.1], [0.6, 1.1], [0.8, 1.1], [1.0, 1.1], ]) d, m = bottleneck(dgm1, dgm2, matching=True) u1 = np.unique(m[:, 0]) u1 = u1[u1 >= 0] u2 = np.unique(m[:, 1]) u2 = u2[u2 >= 0] assert u1.size == dgm1.shape[0] and u2.size == dgm2.shape[0]
def test_diagonal(self): d = bottleneck( np.array([ [10.5, 10.5], [10.6, 10.5], [10.3, 10.3] ]), np.array([ [0.5, 1.0], [0.6, 1.2], [0.3, 0.7] ]) ) # I expect this to be 0.6 assert d == pytest.approx(0.3, 0.001)
def intersketch_bd(transport_plans_a, transport_plans_b): """Find the bottleneck distance between two greedy sketches. Parameters ---------- transport_plans_a, transport_plans_b Transportation plans of arbitrary persistence diagrams. Each plan corresponds to one persistence diagram. Returns ------- float Bottleneck distance between the two sketches. """ # if len(perm_a)+1 != len(transport_plans_a): # raise ValueError( # "Mismatch between transportation plans and permutation for sketch a" # ) # if len(perm_b)+1 != len(transport_plans_b): # raise ValueError( # "Mismatch between transportation plans and permutation for sketch b" # ) mult_a = compute_mult(transport_plans_a) mult_b = compute_mult(transport_plans_b) sketch_a = np.empty((transport_plans_a[0][DIAGONAL], 2)) sketch_b = np.empty((transport_plans_b[0][DIAGONAL], 2)) points_a = mult_a.keys() points_b = mult_b.keys() i = 0 for point in points_a: for count in range(mult_a[tuple(point)]): sketch_a[i] = point i += 1 i = 0 for point in points_b: for count in range(mult_b[tuple(point)]): sketch_b[i] = point i += 1 return persim.bottleneck(sketch_a, sketch_b, matching=False)
mask[:, :] = (outdata[:, 0] != a)[:, np.newaxis] curData = np.ma.MaskedArray(outdata, mask=mask) #compress the mask curData = np.ma.compress_rows(curData) print "\tSize:", curData.shape str(bn) + "," + str(h) + "," + str(ws) + "," #if(a == 0) # for i in range(0,d0_count): # outdata = np.vstack([outdata, [0,0,epsilon]]) if (len(curData.data) > 0): print "Metric", len(curData), a bn = persim.bottleneck(comparePers, curData, matching=False) h = persim.heat(comparePers, curData) ws = persim.wasserstein(comparePers, curData) outString += str(bn) + "," + str(h) + "," + str(ws) + "," end = time.time() stat_time = (end - start) outfile = file(os.getcwd() + "/aggResults.csv", 'a') outfile.write(outString + str(stat_time) + ",") outfile.close() else:
def test_one_empty(self): dgm1 = np.array([[1, 2]]) empty = np.array([[]]) dist = bottleneck(dgm1, empty) assert dist == 0.5
def test_2x2_bisect_bug(self): dgm1 = np.array([[6, 9], [6, 8]]) dgm2 = np.array([[4, 10], [9, 10]]) dist = bottleneck(dgm1, dgm2) assert dist == 2
def test_single_point_same(self): dgm = np.array([[0.11371516, 4.45734882]]) dist = bottleneck(dgm, dgm) assert dist == 0
print(current_id) fig, ax = plt.subplots(figsize=(6, 5)) rips.plot(current_diagramm, show=False) plt.title("PD of $H_k$ for id{:04d}".format(current_id)) plt.tight_layout() plt.draw() fig.savefig("diagramms/id{:04d}".format(current_id)) diagramms.append(current_diagramm) product = ((i, j) for i in range(MAX_BLC_ID + 1) for j in range(MAX_BLC_ID + 1)) distances = np.zeros((2, (MAX_BLC_ID + 1), (MAX_BLC_ID + 1))) for i, j in product: # distances[0, i, j] = persim.sliced_wasserstein(diagramms[i][0], diagramms[j][0]) # distances[1, i, j] = persim.sliced_wasserstein(diagramms[i][1], diagramms[j][1]) distances[0, i, j] = persim.bottleneck(diagramms[i][0], diagramms[j][0]) distances[1, i, j] = persim.bottleneck(diagramms[i][1], diagramms[j][1]) T_n_labels = ["id{:04d}".format(tmp_i) for tmp_i in range(MAX_BLC_ID + 1)] pd.DataFrame(distances[0, :, :], columns=T_n_labels, index=T_n_labels).to_csv('distancesH0.csv') pd.DataFrame(distances[1, :, :], columns=T_n_labels, index=T_n_labels).to_csv('distancesH1.csv') outpath = "" fig, ax = plt.subplots(figsize=(60, 60)) im = ax.imshow(distances[0, :, :]) ax.set_xticks(np.arange(len(T_n_labels))) ax.set_yticks(np.arange(len(T_n_labels))) ax.set_xticklabels(T_n_labels)
def match_bottleneck(dgms1, dgms2): return bottleneck(dgms1[1], dgms2[1], matching=True)
def test_bottleneck_matching(): dgm1 = np.array([[0.1, 0.2], [0.2, 0.4]]) dgm2 = np.array([[0.1, 0.2], [0.3, 0.45]]) d, (matching, D) = persim.bottleneck(dgm1, dgm2, matching=True) persim.plot.bottleneck_matching(dgm1, dgm2, matching, D)
if len(sys.argv) >= 4: epsilon = float(sys.argv[3]) if len(sys.argv) >= 5: dim = int(sys.argv[4]) originalPers = np.genfromtxt(SourcePers, delimiter=',') comparePers = np.genfromtxt(outDir + "/Eirene_Output.csv", delimiter=',') try: upscalePers = np.genfromtxt(outDir + "/upscaledPersistence.csv", delimiter=',') except: upscalePers = np.genfromtxt(outDir + "/ripser_Output.csv", delimiter=',') start = time.time() print "Computing bottlenecks..." bn = persim.bottleneck(originalPers, comparePers, matching=False) bn_u = persim.bottleneck(originalPers, upscalePers, matching=False) print "Computing heat kernel distance..." h = persim.heat(originalPers, comparePers) h_u = persim.heat(originalPers, upscalePers) print "Computing wasserstein..." ws = persim.wasserstein(originalPers, comparePers) ws_u = persim.wasserstein(originalPers, upscalePers) end = time.time() #gh = persim.gromov_hausdorff(originalPers, comparePers) #gh_u = persim.gromov_hausdorff(originalPers, upscalePers) print bn, bn_u, h, h_u, ws, ws_u
#at last, compare each diag with the 10 refs for the gestures (1st person, 1st sample has been chosen for the ref). #a prediction is then chosen based on the diagram. the argmin of the distances to the refs. diag_ref = np.array([ diags[0], diags[100], diags[200], diags[300], diags[400], diags[500], diags[600], diags[700], diags[800], diags[900] ]) n = np.array(diags).shape[0] res = np.zeros(n) print("Classifying hands...") for i in tqdm(range(n)): dists = [] for j in range(10): distance_bottleneck, _ = persim.bottleneck(diags[i], diag_ref[j], matching=True) dists += [distance_bottleneck] res[i] = np.argmin(np.array(dists)) #it appears that it is not a very good solution. 621 wrong guesses out of 1000. Maybe use Wasserstein instead of Bottleneck. #or using the first sample is not a good idea ideal = np.array([int(i / 100) for i in range(1000)]) np.sum(ideal != res) #confusion matrix m = 10 confus = np.zeros((m, m)) for i in range(n): k = int(i * m / n) l = int(res[i])
def test_repeated(self): # Issue #44 G = np.array([[0, 1], [0, 1]]) H = np.array([[0, 1]]) dist = bottleneck(G, H) assert dist == 0.5
def test_different_size(self): d = bottleneck(np.array([[0.5, 1], [0.6, 1.1]]), np.array([[0.5, 1.1]])) assert d == 0.25
def test_plot_labels(): dgm1 = np.array([[0.1, 0.2], [0.2, 0.4]]) dgm2 = np.array([[0.1, 0.2], [0.3, 0.45]]) d, (matching, D) = persim.bottleneck(dgm1, dgm2, matching=True) persim.plot.bottleneck_matching(dgm1, dgm2, matching, D, labels=["X", "Y"])
elif use_first_persistence: dgms = ripser(dist_matrix, distance_matrix=True)['dgms'][1] else: print("should use either zero or first persistence metric") assert False persist_diagrams.append(dgms) print("computing bottleneck distance") bottleneck_distances = [] for i, dist_matrix1 in enumerate(persist_diagrams): distances = [] for j, dist_matrix2 in enumerate(persist_diagrams): if i > j: distances.append(bottleneck_distances[j][i]) else: distance = persim.bottleneck(dist_matrix1, dist_matrix2) distances.append(distance) bottleneck_distances.append(distances) print("inverting") # Find max value max_value = 0 for bottleneck_distance in bottleneck_distances: cur_max_value = max(bottleneck_distance) if cur_max_value > max_value: max_value = cur_max_value # Invert values for bottleneck_distance in bottleneck_distances: for i in range(0, len(bottleneck_distance)): bottleneck_distance[i] = max_value - bottleneck_distance[i]