Esempio n. 1
0
 def test_inf_deathtime(self):
     dgm = np.array([[1, 2]])
     empty = np.array([[0, np.inf]])
     with pytest.warns(UserWarning,
                       match="dgm1 has points with non-finite death") as w:
         dist1 = bottleneck(empty, dgm)
     with pytest.warns(UserWarning,
                       match="dgm2 has points with non-finite death") as w:
         dist2 = bottleneck(dgm, empty)
     assert (dist1 == 0.5) and (dist2 == 0.5)
def compute_bottleneck_distance(all_seeds_rips_files,
                                remove_infinity=False,
                                compute_wass_distance=False,
                                use_persim=False,
                                M=10):
    matrix = []
    x = []
    y = []
    for file1 in all_seeds_rips_files:
        print('Computing file: {}'.format(file1))
        row = np.zeros(len(all_seeds_rips_files))
        # example file1: LTHT/remote_data/saves/alexnet_nmp/mnist/42/pickle/8.pickle
        split1_name = file1.split('/')
        # print(split1_name)
        seed, model_name, dataset, file1_name = split1_name[-5], split1_name[
            -7], split1_name[-6], split1_name[-1]
        # appending 'alexnet_nmp-mnist-42-8'
        x.append(model_name + "-" + dataset + "-" + seed + "-" +
                 file1_name.split(".")[0])

        rips1 = pickle.load(open(file1, 'rb'))
        if remove_infinity:
            l1 = list(rips1['dgms'][0][rips1['dgms'][0][:, 1] < np.inf])
        else:
            l1 = list(rips1['dgms'][0])
        d1 = dion.Diagram(l1)

        for i, file2 in enumerate(all_seeds_rips_files):
            rips2 = pickle.load(open(file2, 'rb'))

            if remove_infinity:
                l2 = list(rips2['dgms'][0][rips2['dgms'][0][:, 1] < np.inf])
            else:
                l2 = list(rips2['dgms'][0])

            d2 = dion.Diagram(l2)

            if compute_wass_distance:
                if use_persim:
                    wdist = persim.sliced_wasserstein_kernel(d1, d2, M=M)
                else:
                    wdist = dion.wasserstein_distance(d1, d2, q=2)
                row[i] = wdist
            else:
                if use_persim:
                    bdist = persim.bottleneck(d1, d2)
                else:
                    bdist = dion.bottleneck_distance(d1, d2)
                row[i] = bdist

        matrix.append(row)
    #
    x = list(
        map(
            lambda y: '{}-{} seed:{}-{}'.format(
                y.split('-')[0],
                y.split('-')[1],
                y.split('-')[2],
                y.split('-')[3]), x))
    return matrix, x
Esempio n. 3
0
    def test_single(self):
        d = bottleneck(
            np.array([[0.5, 1]]),
            np.array([[0.5, 1.1]])
        )

        # These are very loose bounds
        assert d == pytest.approx(0.1, 0.001)
Esempio n. 4
0
 def test_matching_to_self(self):
     # Matching a diagram to itself should yield 0
     pd = np.array([[0.        , 1.71858561],
                   [0.        , 1.74160683],
                   [0.        , 2.43430877],
                   [0.        , 2.56949258],
                   [0.        , np.inf]])
     dist = bottleneck(pd, pd)
     assert dist == 0
def bottleneck_dist_mat(gdat, verbose=True):
    """
    Generate distance matrix for persistence diagrams of images

    INPUTS
    gdat - gestures data matrix; use output of load_data

    OUTPUTS
    distance matrix using bottleneck metric
    """
    ## code below taken from gen_all_pds.py - look there for os cmds and saving
    # iterate through all subjects
    pd_dict = dict()

    for sbj, sdict in gdat.items():
        # Dictionary of each subject with all gestures
        for gnum, garray in sdict.items():
            # loop through each signal in the gesture
            t_axis = garray[:, 0]  # time data
            for s in range(1, garray.shape[1] - 1):
                # sublevel set filtraton
                sls = sublevel_set_time_series_dist(garray[:, s])
                # generate persistence diagram
                pd = ripser(sls, distance_matrix=True)
                # remove inf persistence point
                pd["dgms"][0] = pd["dgms"][0][np.isfinite(pd["dgms"][0][:,
                                                                        1]), :]
                pd_dict[sbj + "_" + gnum] = pd

    # ordered list of keys
    klist = [k for k in pd_dict.keys()]
    klist.sort()  # ordered ascending by subject, gesture

    # initialize bottleneck distance matrix
    bd_mat = np.zeros(len(klist)**2).reshape(len(klist), len(klist))

    for n, k in enumerate(klist):
        if verbose:
            ### progress bar ###
            pb = "~" * (int(n / len(klist) * 100)) + " " * (int(
                (1 - n / len(klist)) * 100)) + "|"
            print(pb, end="\r")
            ####################
        for m, j in enumerate(klist):
            if n == m: bd_mat[n, m] = 0.0
            else:
                bd_mat[n, m] = bottleneck(pd_dict[k]["dgms"][0],
                                          pd_dict[j]["dgms"][0])

    return bd_mat
Esempio n. 6
0
    def test_matching(self):
        dgm1 = np.array([[0.5, 1], [0.6, 1.1]])
        dgm2 = np.array([
            [0.5, 1.1],
            [0.6, 1.1],
            [0.8, 1.1],
            [1.0, 1.1],
        ])

        d, (m, D) = bottleneck(dgm1, dgm2, matching=True)

        # These are very loose bounds
        assert len(m) == len(dgm1) + len(dgm2)
        assert D.shape == (len(dgm1) + len(dgm2), len(dgm1) + len(dgm2))
Esempio n. 7
0
    def test_matching(self):
        dgm1 = np.array([[0.5, 1], [0.6, 1.1]])
        dgm2 = np.array([
            [0.5, 1.1],
            [0.6, 1.1],
            [0.8, 1.1],
            [1.0, 1.1],
        ])

        d, m = bottleneck(dgm1, dgm2, matching=True)
        u1 = np.unique(m[:, 0])
        u1 = u1[u1 >= 0]
        u2 = np.unique(m[:, 1])
        u2 = u2[u2 >= 0]
        assert u1.size == dgm1.shape[0] and u2.size == dgm2.shape[0]
Esempio n. 8
0
    def test_diagonal(self):
        d = bottleneck(
            np.array([
                [10.5, 10.5],
                [10.6, 10.5],
                [10.3, 10.3]
            ]),
            np.array([
                [0.5, 1.0],
                [0.6, 1.2],
                [0.3, 0.7]
            ])
        )

        # I expect this to be 0.6
        assert d == pytest.approx(0.3, 0.001)
Esempio n. 9
0
def intersketch_bd(transport_plans_a, transport_plans_b):
    """Find the bottleneck distance between two greedy sketches.

    Parameters
    ----------
    transport_plans_a, transport_plans_b
        Transportation plans of arbitrary persistence diagrams. Each plan
        corresponds to one persistence diagram.

    Returns
    -------
    float
        Bottleneck distance between the two sketches.
    """

    # if len(perm_a)+1 != len(transport_plans_a):
    #     raise ValueError(
    #         "Mismatch between transportation plans and permutation for sketch a"
    #     )
    # if len(perm_b)+1 != len(transport_plans_b):
    #     raise ValueError(
    #         "Mismatch between transportation plans and permutation for sketch b"
    #     )

    mult_a = compute_mult(transport_plans_a)
    mult_b = compute_mult(transport_plans_b)
    sketch_a = np.empty((transport_plans_a[0][DIAGONAL], 2))
    sketch_b = np.empty((transport_plans_b[0][DIAGONAL], 2))
    points_a = mult_a.keys()
    points_b = mult_b.keys()
    i = 0
    for point in points_a:
        for count in range(mult_a[tuple(point)]):
            sketch_a[i] = point
            i += 1
    i = 0
    for point in points_b:
        for count in range(mult_b[tuple(point)]):
            sketch_b[i] = point
            i += 1
    return persim.bottleneck(sketch_a, sketch_b, matching=False)
Esempio n. 10
0
                    mask[:, :] = (outdata[:, 0] != a)[:, np.newaxis]
                    curData = np.ma.MaskedArray(outdata, mask=mask)

                    #compress the mask
                    curData = np.ma.compress_rows(curData)
                    print "\tSize:", curData.shape
                    str(bn) + "," + str(h) + "," + str(ws) + ","

                    #if(a == 0)
                    #	for i in range(0,d0_count):
                    #		outdata = np.vstack([outdata, [0,0,epsilon]])

                    if (len(curData.data) > 0):
                        print "Metric", len(curData), a
                        bn = persim.bottleneck(comparePers,
                                               curData,
                                               matching=False)

                        h = persim.heat(comparePers, curData)

                        ws = persim.wasserstein(comparePers, curData)

                    outString += str(bn) + "," + str(h) + "," + str(ws) + ","
                end = time.time()
                stat_time = (end - start)

                outfile = file(os.getcwd() + "/aggResults.csv", 'a')
                outfile.write(outString + str(stat_time) + ",")
                outfile.close()

            else:
Esempio n. 11
0
 def test_one_empty(self):
     dgm1 = np.array([[1, 2]])
     empty = np.array([[]])
     dist = bottleneck(dgm1, empty)
     assert dist == 0.5
Esempio n. 12
0
 def test_2x2_bisect_bug(self):
     dgm1 = np.array([[6, 9], [6, 8]])
     dgm2 = np.array([[4, 10], [9, 10]])
     dist = bottleneck(dgm1, dgm2)
     assert dist == 2
Esempio n. 13
0
 def test_single_point_same(self):
     dgm = np.array([[0.11371516, 4.45734882]])
     dist = bottleneck(dgm, dgm)
     assert dist == 0
Esempio n. 14
0
    print(current_id)
    fig, ax = plt.subplots(figsize=(6, 5))
    rips.plot(current_diagramm, show=False)
    plt.title("PD of $H_k$ for id{:04d}".format(current_id))
    plt.tight_layout()
    plt.draw()
    fig.savefig("diagramms/id{:04d}".format(current_id))
    diagramms.append(current_diagramm)

product = ((i, j) for i in range(MAX_BLC_ID + 1)
           for j in range(MAX_BLC_ID + 1))
distances = np.zeros((2, (MAX_BLC_ID + 1), (MAX_BLC_ID + 1)))
for i, j in product:
    # distances[0, i, j] = persim.sliced_wasserstein(diagramms[i][0], diagramms[j][0])
    # distances[1, i, j] = persim.sliced_wasserstein(diagramms[i][1], diagramms[j][1])
    distances[0, i, j] = persim.bottleneck(diagramms[i][0], diagramms[j][0])
    distances[1, i, j] = persim.bottleneck(diagramms[i][1], diagramms[j][1])
T_n_labels = ["id{:04d}".format(tmp_i) for tmp_i in range(MAX_BLC_ID + 1)]
pd.DataFrame(distances[0, :, :], columns=T_n_labels,
             index=T_n_labels).to_csv('distancesH0.csv')
pd.DataFrame(distances[1, :, :], columns=T_n_labels,
             index=T_n_labels).to_csv('distancesH1.csv')

outpath = ""

fig, ax = plt.subplots(figsize=(60, 60))
im = ax.imshow(distances[0, :, :])
ax.set_xticks(np.arange(len(T_n_labels)))
ax.set_yticks(np.arange(len(T_n_labels)))

ax.set_xticklabels(T_n_labels)
Esempio n. 15
0
 def match_bottleneck(dgms1, dgms2):
     return bottleneck(dgms1[1], dgms2[1], matching=True)
Esempio n. 16
0
def test_bottleneck_matching():
    dgm1 = np.array([[0.1, 0.2], [0.2, 0.4]])
    dgm2 = np.array([[0.1, 0.2], [0.3, 0.45]])

    d, (matching, D) = persim.bottleneck(dgm1, dgm2, matching=True)
    persim.plot.bottleneck_matching(dgm1, dgm2, matching, D)
Esempio n. 17
0
if len(sys.argv) >= 4:
    epsilon = float(sys.argv[3])
if len(sys.argv) >= 5:
    dim = int(sys.argv[4])

originalPers = np.genfromtxt(SourcePers, delimiter=',')
comparePers = np.genfromtxt(outDir + "/Eirene_Output.csv", delimiter=',')
try:
    upscalePers = np.genfromtxt(outDir + "/upscaledPersistence.csv",
                                delimiter=',')
except:
    upscalePers = np.genfromtxt(outDir + "/ripser_Output.csv", delimiter=',')

start = time.time()
print "Computing bottlenecks..."
bn = persim.bottleneck(originalPers, comparePers, matching=False)
bn_u = persim.bottleneck(originalPers, upscalePers, matching=False)

print "Computing heat kernel distance..."
h = persim.heat(originalPers, comparePers)
h_u = persim.heat(originalPers, upscalePers)

print "Computing wasserstein..."
ws = persim.wasserstein(originalPers, comparePers)
ws_u = persim.wasserstein(originalPers, upscalePers)
end = time.time()
#gh = persim.gromov_hausdorff(originalPers, comparePers)
#gh_u = persim.gromov_hausdorff(originalPers, upscalePers)

print bn, bn_u, h, h_u, ws, ws_u
Esempio n. 18
0
#at last, compare each diag with the 10 refs for the gestures (1st person, 1st sample has been chosen for the ref).
#a prediction is then chosen based on the diagram. the argmin of the distances to the refs.
diag_ref = np.array([
    diags[0], diags[100], diags[200], diags[300], diags[400], diags[500],
    diags[600], diags[700], diags[800], diags[900]
])

n = np.array(diags).shape[0]
res = np.zeros(n)
print("Classifying hands...")
for i in tqdm(range(n)):
    dists = []
    for j in range(10):
        distance_bottleneck, _ = persim.bottleneck(diags[i],
                                                   diag_ref[j],
                                                   matching=True)
        dists += [distance_bottleneck]
    res[i] = np.argmin(np.array(dists))

#it appears that it is not a very good solution. 621 wrong guesses out of 1000. Maybe use Wasserstein instead of Bottleneck.
#or using the first sample is not a good idea
ideal = np.array([int(i / 100) for i in range(1000)])
np.sum(ideal != res)

#confusion matrix
m = 10
confus = np.zeros((m, m))
for i in range(n):
    k = int(i * m / n)
    l = int(res[i])
Esempio n. 19
0
 def test_repeated(self):
     # Issue #44
     G = np.array([[0, 1], [0, 1]])
     H = np.array([[0, 1]])
     dist = bottleneck(G, H)
     assert dist == 0.5
Esempio n. 20
0
 def test_different_size(self):
     d = bottleneck(np.array([[0.5, 1], [0.6, 1.1]]), np.array([[0.5,
                                                                 1.1]]))
     assert d == 0.25
Esempio n. 21
0
def test_plot_labels():
    dgm1 = np.array([[0.1, 0.2], [0.2, 0.4]])
    dgm2 = np.array([[0.1, 0.2], [0.3, 0.45]])

    d, (matching, D) = persim.bottleneck(dgm1, dgm2, matching=True)
    persim.plot.bottleneck_matching(dgm1, dgm2, matching, D, labels=["X", "Y"])
    elif use_first_persistence:
        dgms = ripser(dist_matrix, distance_matrix=True)['dgms'][1]
    else:
        print("should use either zero or first persistence metric")
        assert False
    persist_diagrams.append(dgms)

print("computing bottleneck distance")
bottleneck_distances = []
for i, dist_matrix1 in enumerate(persist_diagrams):
    distances = []
    for j, dist_matrix2 in enumerate(persist_diagrams):
        if i > j:
            distances.append(bottleneck_distances[j][i])
        else:
            distance = persim.bottleneck(dist_matrix1, dist_matrix2)
            distances.append(distance)
    bottleneck_distances.append(distances)

print("inverting")
# Find max value
max_value = 0
for bottleneck_distance in bottleneck_distances:
    cur_max_value = max(bottleneck_distance)
    if cur_max_value > max_value:
        max_value = cur_max_value

# Invert values
for bottleneck_distance in bottleneck_distances:
    for i in range(0, len(bottleneck_distance)):
        bottleneck_distance[i] = max_value - bottleneck_distance[i]