Ejemplo n.º 1
0
def distance_matrices_ext(samples, metric, metric_params=None, ordn=2):
    pdist = PairwiseDistance(metric=metric,
                             metric_params=metric_params,
                             order=None,
                             n_jobs=-1)
    D = pdist.fit_transform(samples)
    scale = (D[:, :, 0].max() - D[:, :, 0].min()) / (D[:, :, 1].max() -
                                                     D[:, :, 1].min())
    Dscaled = np.stack((D[:, :, 0], D[:, :, 1] * scale), axis=2)
    Dn = np.linalg.norm(D, ord=ordn, axis=2)
    Dscaledn = np.linalg.norm(Dscaled, ord=ordn, axis=2)
    plt.figure(figsize=(8, 8))
    #plot H0 and H1 distances separated
    plt.subplot(221)
    plt.imshow(D[:, :, 0])
    plt.axis('off')
    plt.colorbar(shrink=0.8)
    plt.title('H0')
    plt.subplot(222)
    plt.imshow(D[:, :, 1])
    plt.axis('off')
    plt.colorbar(shrink=0.8)
    plt.title('H1')
    #plot complessive distance (whith or whithout scaling)
    plt.subplot(223)
    plt.imshow(Dn)
    plt.axis('off')
    plt.colorbar(shrink=0.8)
    plt.title('Not scaled, norm=' + str(ordn) + '-norm')
    plt.subplot(224)
    plt.imshow(Dscaledn)
    plt.axis('off')
    plt.colorbar(shrink=0.8)
    plt.title('Scaled, norm=' + str(ordn) + '-norm')
    plt.suptitle(metric)
Ejemplo n.º 2
0
def test_not_fitted():
    dd = PairwiseDistance()
    da = Amplitude()

    with pytest.raises(NotFittedError):
        dd.transform(X_1)

    with pytest.raises(NotFittedError):
        da.transform(X_1)
Ejemplo n.º 3
0
def compute_distance_matrix(
    diagrams,
    metric,
    metric_params,
    plot_distance_matrix=False,
    title=None,
    file_prefix=None,
):
    PD = PairwiseDistance(metric=metric,
                          metric_params=metric_params,
                          order=None,
                          n_jobs=N_JOBS)
    X_distance = PD.fit_transform(diagrams)
    return X_distance
Ejemplo n.º 4
0
def distance_matrices(samples, metric, metric_params=None, ordn=2):
    pdist = PairwiseDistance(metric=metric,
                             metric_params=metric_params,
                             order=None,
                             n_jobs=2)
    D = pdist.fit_transform(samples)
    scale = (D[:, :, 0].max() - D[:, :, 0].min()) / (D[:, :, 1].max() -
                                                     D[:, :, 1].min())
    Dscaled = np.stack((D[:, :, 0], D[:, :, 1] * scale), axis=2)
    Dn = np.linalg.norm(D, ord=ordn, axis=2)
    Dscaledn = np.linalg.norm(Dscaled, ord=ordn, axis=2)
    plt.figure(figsize=(10, 5))
    plt.subplot(121)
    plt.imshow(Dn)
    plt.axis('off')
    plt.colorbar()
    plt.title('Not scaled, norm=' + str(ordn) + '-norm')
    plt.subplot(122)
    plt.imshow(Dscaledn)
    plt.axis('off')
    plt.colorbar()
    plt.title('Scaled, norm=' + str(ordn) + '-norm')
    plt.suptitle(metric)
Ejemplo n.º 5
0
def test_dd_transform(metric, metric_params, order, n_jobs):
    # X_fit == X_transform
    dd = PairwiseDistance(metric=metric, metric_params=metric_params,
                          order=order, n_jobs=n_jobs)
    X_res = dd.fit_transform(X1)
    assert (X_res.shape[0], X_res.shape[1]) == (X1.shape[0], X1.shape[0])
    if order is None:
        assert X_res.shape[2] == n_homology_dimensions

    # X_fit != X_transform
    dd = PairwiseDistance(metric=metric, metric_params=metric_params,
                          order=order, n_jobs=n_jobs)
    X_res = dd.fit(X1).transform(X2)
    assert (X_res.shape[0], X_res.shape[1]) == (X2.shape[0], X1.shape[0])
    if order is None:
        assert X_res.shape[2] == n_homology_dimensions

    # X_fit != X_transform, default metric_params
    dd = PairwiseDistance(metric=metric, order=order, n_jobs=n_jobs)
    X_res = dd.fit(X1).transform(X2)
    assert (X_res.shape[0], X_res.shape[1]) == (X2.shape[0], X1.shape[0])
    if order is None:
        assert X_res.shape[2] == n_homology_dimensions
Ejemplo n.º 6
0
def test_dd_transform(metric, metric_params, order, n_jobs):
    # X_fit == X_transform
    dd = PairwiseDistance(metric=metric,
                          metric_params=metric_params,
                          order=order,
                          n_jobs=n_jobs)
    X_res = dd.fit_transform(X_1)
    assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_1.shape[0])

    # X_fit != X_transform
    dd = PairwiseDistance(metric=metric,
                          metric_params=metric_params,
                          order=order,
                          n_jobs=n_jobs)
    X_res = dd.fit(X_1).transform(X_2)
    assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_2.shape[0])

    if order is None:
        assert X_res.shape[2] == len(np.unique(X_2[:, :, 2]))

    # X_fit != X_transform, default metric_params
    dd = PairwiseDistance(metric=metric, order=order, n_jobs=n_jobs)
    X_res = dd.fit(X_1).transform(X_2)
    assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_2.shape[0])
def get_pairwise_distance_metrics():

    metrics = {
        "bottleneck":
        PairwiseDistance(metric="bottleneck"),
        "wasserstein":
        PairwiseDistance(metric="wasserstein", metric_params={"p": 1}),
        "betti":
        PairwiseDistance(metric="betti"),
        "landscape":
        PairwiseDistance(metric="landscape"),
        "silhouette":
        PairwiseDistance(metric="silhouette"),
        "heat":
        PairwiseDistance(metric="heat"),
        "persistence_image":
        PairwiseDistance(metric="persistence_image"),
    }

    return metrics
Ejemplo n.º 8
0
def main():
    path_to_diags = "../data/collected_diagnoses_complete.json"
    with open(path_to_diags) as f:
        diagnoses = json.load(f)

    patients = list(diagnoses.keys())
    # Sort diagnoses key
    diagnoses = collections.OrderedDict(sorted(diagnoses.items()))

    # Where the data comes from
    data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/"

    # Where the resulting distance matrices are saved.
    distance_dir = "/temporal_evolution/"
    utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + distance_dir)

    distances_to_evaluate = [
        # "bottleneck",
        # "wasserstein",
        # "betti",
        "landscape",
        # "silhouette",
        # "heat",
        "persistence_image",
    ]
    # patients = ["sub-ADNI011S0023", "sub-ADNI029S0878"]
    # If we want to process multiple patients, we just throw them in a loop.
    for i, patient in tqdm(enumerate(patients), total=len(patients)):
        for distance in distances_to_evaluate:
            patches = []
            for mri in diagnoses[patient]:
                try:
                    patches.append(
                        np.load(
                            data_dir
                            + patient
                            + mri.replace("ses", "")
                            + "-MNI.npy"
                        )
                    )
                except FileNotFoundError:
                    pass
                # print(
                #     data_dir
                #     + patient
                #     + mri.replace("ses", "")
                #     + "-MNI.npy"
                #     + " not found"
                # )
            # Stacking enables multiprocessing
            patches = np.stack(patches)

            cp = CubicalPersistence(
                homology_dimensions=HOMOLOGY_DIMENSIONS,
                coeff=2,
                periodic_dimensions=None,
                infinity_values=None,
                reduced_homology=True,
                n_jobs=-1,
            )
            diagrams_cubical_persistence = cp.fit_transform(patches)

            pl_dist = PairwiseDistance(
                metric=distance,
                metric_params=None,
                order=None,
                n_jobs=-1,
            )
            X_distance = pl_dist.fit_transform(
                diagrams_cubical_persistence
            )
            with open(
                DOTENV_KEY2VAL["GEN_DATA_DIR"]
                + distance_dir
                + f"patient_evolution_distance_data_patient_{patient}"
                f"_{distance}.npy",
                "wb",
            ) as f:
                np.save(f, X_distance)
Ejemplo n.º 9
0
 def compute_all_distances(X, metric):
     if metric == "euclidean":
         return squareform(pdist(X, metric))
     else:
         return PairwiseDistance(metric=metric,
                                 n_jobs=self.n_jobs).fit_transform(X)
Ejemplo n.º 10
0
def compute_distance(emb2diagram, metric: str = "bottleneck"):
    dis = PairwiseDistance(metric=metric)
    dis.fit(emb2diagram["SBert"])
    dis.transform(emb2diagram["LF"])
    return dis
Ejemplo n.º 11
0
     [0, 1, 0.],
     [0, 0, 1.]],  # Expected bottleneck ampl: [1, 0]

    [[3, 3.5, 0.],
     [0, 0, 0.],
     [5, 9, 1.]]  # Expected bottleneck ampl: [1/4, 2]
])

X_bottleneck_res_exp = np.array([
    [1/2, 2],
    [1, 0],
    [1/4, 2]
    ])


@pytest.mark.parametrize('transformer', [PairwiseDistance(), Amplitude()])
def test_not_fitted(transformer):
    with pytest.raises(NotFittedError):
        transformer.transform(X1)


parameters_distance = [
    ('bottleneck', None),
    ('wasserstein', {'p': 2, 'delta': 0.1}),
    ('betti', {'p': 2.1, 'n_bins': 10}),
    ('landscape', {'p': 2.1, 'n_bins': 10, 'n_layers': 2}),
    ('silhouette', {'p': 2.1, 'power': 1.2, 'n_bins': 10}),
    ('heat', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}),
    ('persistence_image',
     {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}),
    ('persistence_image',
Ejemplo n.º 12
0
def main():
    path_to_diags = "../data/collected_diagnoses_complete.json"
    patients = ["sub-ADNI005S0223"]
    progr = ["cn_mci_ad"]
    with open(path_to_diags) as f:
        diagnoses = json.load(f)

    # Sort diagnoses key
    diagnoses = collections.OrderedDict(sorted(diagnoses.items()))

    # Where the data comes from
    data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/"

    # Where the figures are saved
    temporal_progression_dir = "/temporal_evolution/"
    utils.make_dir(
        DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + temporal_progression_dir
    )

    # Where the resulting distance matrices are saved.
    time_series_dir = "/temporal_evolution/"
    utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + temporal_progression_dir)

    # If we want to process multiple patients, we just throw them in a loop.
    for i, patient in enumerate(patients):
        print(
            "Processing longitudinal data for "
            + patient
            + " with progression pattern "
            + progr[i]
        )
        patches = []
        for mri in diagnoses[patient]:
            try:
                patches.append(
                    np.load(
                        data_dir
                        + patient
                        + mri.replace("ses", "")
                        + "-MNI.npy"
                    )
                )
            except FileNotFoundError:
                print(
                    data_dir
                    + patient
                    + mri.replace("ses", "")
                    + "-MNI.npy"
                    + " not found"
                )
        # Stacking enables multiprocessing
        patches = np.stack(patches)

        cp = CubicalPersistence(
            homology_dimensions=HOMOLOGY_DIMENSIONS,
            coeff=2,
            periodic_dimensions=None,
            infinity_values=None,
            reduced_homology=True,
            n_jobs=-1,
        )
        diagrams_cubical_persistence = cp.fit_transform(patches)

        pl_dist = PairwiseDistance(
            metric="landscape", metric_params=None, order=None, n_jobs=-1
        )
        X_distance = pl_dist.fit_transform(
            diagrams_cubical_persistence
        )
        with open(
            DOTENV_KEY2VAL["GEN_DATA_DIR"]
            + time_series_dir
            + f"distance_data_patient_{patient}_{progr[i]}_landscape.npy",
            "wb",
        ) as f:
            np.save(f, X_distance)