예제 #1
0
def test_da_transform_bottleneck(metric, metric_params, order, n_jobs):
    da = Amplitude(metric=metric,
                   metric_params=metric_params,
                   order=order,
                   n_jobs=n_jobs)
    X_bottleneck_res = da.fit_transform(X_bottleneck)
    assert_almost_equal(X_bottleneck_res, X_bottleneck_res_exp)
예제 #2
0
def extract_topological_features(diagrams):
    metrics = ['bottleneck', 'wasserstein', 'landscape', 'betti', 'heat']
    new_features = []
    for metric in metrics:
        amplitude = Amplitude(metric=metric)
        new_features.append(amplitude.fit_transform(diagrams))
    new_features = np.concatenate(new_features, axis=1)
    return new_features
예제 #3
0
def test_not_fitted():
    dd = PairwiseDistance()
    da = Amplitude()

    with pytest.raises(NotFittedError):
        dd.transform(X_1)

    with pytest.raises(NotFittedError):
        da.transform(X_1)
예제 #4
0
def test_da_transform(metric, metric_params, n_jobs):
    da = Amplitude(metric=metric, metric_params=metric_params, n_jobs=n_jobs)
    X_res = da.fit_transform(X_1)
    assert X_res.shape == (X_1.shape[0], 1)

    # X_fit != X_transform
    da = Amplitude(metric=metric, metric_params=metric_params, n_jobs=n_jobs)
    X_res = da.fit(X_1).transform(X_2)
    assert X_res.shape == (X_2.shape[0], 1)
예제 #5
0
def bettiAmplitude(img_file):
    """
    Pipeline: Cubical Perisitance --> Amplitude of Betti Curve
    """
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # blur the image to reduce noise
    figure_size = 9  # the dimension of the x and y axis of the kernal.
    img = cv2.blur(img, (figure_size, figure_size))

    shape = img.shape
    images = np.zeros((1, *shape))
    images[0] = img
    p = make_pipeline(CubicalPersistence(), Amplitude(metric='betti'))
    return p.fit_transform(images)
예제 #6
0
def pipeline1(images):
    """
    Binarizer --> Height Filtration, Erosion Filtration, Dilation Filtration --> Cubical Persistance --> Amp, PE
    return: Array of pipelines
    """
    # Pipeline parameters
    bin_thresholds = [np.percentile(images[0], 93) / np.max(images[0])]
    directions = [
        np.array([np.cos(t), np.sin(t)])
        for t in np.linspace(0, 2 * np.pi, 8)[:-1]
    ]
    n_iterations = np.linspace(1, 21, 5).astype(int).tolist()

    features = [('bottleneck', Amplitude(metric='bottleneck', n_jobs=-1)),
                ('PE', PersistenceEntropy(n_jobs=-1))]

    # Make filtrations
    binned_steps = [('binarizer_{}'.format(t), Binarizer(threshold=t,
                                                         n_jobs=-1))
                    for t in bin_thresholds]
    filtrations = [('height_{}'.format(d),
                    HeightFiltration(direction=d, n_jobs=-1))
                   for d in directions]
    filtrations += [('erosion_{}'.format(i),
                     ErosionFiltration(n_iterations=i, n_jobs=-1))
                    for i in n_iterations]
    filtrations += [('dilation_{}'.format(i),
                     DilationFiltration(n_iterations=i, n_jobs=-1))
                    for i in n_iterations]

    # Make pipelines
    cubical_lower = ('cubical', CubicalPersistence(n_jobs=-1))

    partial_pipeline_steps = []
    partial_pipeline_steps.append([cubical_lower])
    partial_pipeline_steps.append([('inverter', Inverter(n_jobs=-1)),
                                   cubical_lower])

    for b, f in itertools.product(binned_steps, filtrations):
        partial_pipeline_steps.append(
            [b, f, ('cubical', CubicalPersistence(n_jobs=-1))])

    feature_pipelines = []
    for s, f in itertools.product(partial_pipeline_steps, features):
        feature_pipelines.append(Pipeline(s + [f]))

    return feature_pipelines
예제 #7
0
def test_da_transform(metric, metric_params, order, n_jobs):
    n_expected_columns = n_homology_dimensions if order is None else 1

    da = Amplitude(metric=metric, metric_params=metric_params, order=order,
                   n_jobs=n_jobs)
    X_res = da.fit_transform(X1)
    assert X_res.shape == (X1.shape[0], n_expected_columns)

    # X_fit != X_transform
    da = Amplitude(metric=metric, metric_params=metric_params, order=order,
                   n_jobs=n_jobs)
    X_res = da.fit(X1).transform(X2)
    assert X_res.shape == (X2.shape[0], n_expected_columns)
#
# What if we suspect that the way in which the **correlations** between the variables evolve over time can help forecast the target ``y``? This is a common situation in neuroscience, where each variable could be data from a single EEG sensor, for instance.
#
# ``giotto-tda`` exposes a ``PearsonDissimilarity`` transformer which creates a 2D dissimilarity matrix from each window in ``X_sw``, and stacks them together into a single 3D object. This is the correct format (and information content!) for a typical topological transformer in ``gtda.homology``. See also [Topological feature extraction from graphs](https://github.com/giotto-ai/giotto-tda/blob/master/examples/persistent_homology_graphs.ipynb) for an in-depth look. Finally, we can extract simple scalar features using a selection of transformers in ``gtda.diagrams``.

# In[6]:

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude

PD = PearsonDissimilarity()
X_pd = PD.fit_transform(X_sw)
VR = VietorisRipsPersistence(metric="precomputed")
X_vr = VR.fit_transform(X_pd)  # "precomputed" required on dissimilarity data
Ampl = Amplitude()
X_a = Ampl.fit_transform(X_vr)
X_vr

# Notice that we are not acting on ``y`` above. We are simply creating features from each window using topology! *Note*: it's two features per window because we used the default value for ``homology_dimensions`` in ``VietorisRipsPersistence``, not because we had two variables in the time series initially!
#
# We can now put this all together into a ``giotto-tda`` ``Pipeline`` which combines both the sliding window transformation on ``X`` and resampling of ``y`` with the feature extraction from the windows on ``X``.
#
# *Note*: while we could import the ``Pipeline`` class and use its constructor, we use the convenience function ``make_pipeline`` instead, which is a drop-in replacement for [scikit-learn's](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html).
#
# 请注意,我们没有对上面的“y”采取行动。我们只是在使用拓扑从每个窗口创建特征! *注意*:每个窗口有两个特征,因为我们在“VietorisRipsPersistence”中使用了“ homology_dimensions”的默认值,而不是因为我们最初在时间序列中有两个变量!
#
# 现在我们可以将所有这些放到giotto-tda“Pipeline”中,它将“X”上的滑动窗口转换和“y”的重采样与从“X”的Windows窗口上提取的特征结合在一起。
#
# *注意*:虽然我们可以导入“ Pipeline”类并使用其构造函数,但我们使用便利功能“ make_pipeline”代替,它是[scikit-learn's](https:// scikit-learn.org/stable/modules/generation/sklearn.pipeline.make_pipeline.html)。
예제 #9
0
               ('Radial UC', img.RadialFiltration(center=np.array((13,6)), n_jobs=num_jobs)),\
               ('Radial UR', img.RadialFiltration(center=np.array((20,6)), n_jobs=num_jobs)),\
               ('Radial CL', img.RadialFiltration(center=np.array((6,13)), n_jobs=num_jobs)),\
               ('Radial C', img.RadialFiltration(center=np.array((13,13)), n_jobs=num_jobs)),\
               ('Radial CR', img.RadialFiltration(center=np.array((20,13)), n_jobs=num_jobs)),\
               ('Radial DL', img.RadialFiltration(center=np.array((6,20)), n_jobs=num_jobs)),\
               ('Radial DC', img.RadialFiltration(center=np.array((13,20)), n_jobs=num_jobs)),\
               ('Radial DR', img.RadialFiltration(center=np.array((20,20)), n_jobs=num_jobs)),\
               ('Density 2', img.DensityFiltration(radius=2, n_jobs=num_jobs)),\
               ('Density 4', img.DensityFiltration(radius=4, n_jobs=num_jobs)),\
               ('Density 6', img.DensityFiltration(radius=6, n_jobs=num_jobs)),\
               ('Dilation', img.DilationFiltration(n_jobs=num_jobs)),\
               ('Erosion', img.ErosionFiltration(n_jobs=num_jobs)),\
               ('Signed distance', img.SignedDistanceFiltration(n_jobs=num_jobs)),\
               ('Vietoris-Rips', img.ImageToPointCloud(n_jobs=num_jobs))]
vectorizations = [('Bottleneck', Amplitude(metric='bottleneck', order=None, n_jobs=num_jobs)),\
                  ('Wasserstein L1', Amplitude(metric='wasserstein', metric_params={'p': 1}, order=None, n_jobs=num_jobs)),\
                  ('Wasserstein L2', Amplitude(metric='wasserstein', metric_params={'p': 2}, order=None, n_jobs=num_jobs)),\
                  ('Betti L1', Amplitude(metric='betti', metric_params={'p': 1}, order=None, n_jobs=num_jobs)),\
                  ('Betti L2', Amplitude(metric='betti', metric_params={'p': 2}, order=None, n_jobs=num_jobs)),\
                  ('Landscape L1 k=1', Amplitude(metric='landscape', metric_params={'p': 1, 'n_layers':1}, order=None, n_jobs=num_jobs)),\
                  ('Landscape L1 k=2', Amplitude(metric='landscape', metric_params={'p': 1, 'n_layers':2}, order=None, n_jobs=num_jobs)),\
                  ('Landscape L2 k=1', Amplitude(metric='landscape', metric_params={'p': 2, 'n_layers':1}, order=None, n_jobs=num_jobs)),\
                  ('Landscape L2 k=2', Amplitude(metric='landscape', metric_params={'p': 2, 'n_layers':2}, order=None, n_jobs=num_jobs)),\
                  ('Heat kernel L1 sigma=10', Amplitude(metric='heat', metric_params={'p': 1, 'sigma':10}, order=None, n_jobs=num_jobs)),\
                  ('Heat kernel L1 sigma=15', Amplitude(metric='heat', metric_params={'p': 1, 'sigma':15}, order=None, n_jobs=num_jobs)),\
                  ('Heat kernel L2 sigma=10', Amplitude(metric='heat', metric_params={'p': 2, 'sigma':10}, order=None, n_jobs=num_jobs)),\
                  ('Heat kernel L2 sigma=15', Amplitude(metric='heat', metric_params={'p': 2, 'sigma':15}, order=None, n_jobs=num_jobs)),\
                  ('Persistence entropy', PersistenceEntropy(n_jobs=num_jobs))]

if os.path.exists("train_labels.csv"):
예제 #10
0
     [0, 1, 0.],
     [0, 0, 1.]],  # Expected bottleneck ampl: [1, 0]

    [[3, 3.5, 0.],
     [0, 0, 0.],
     [5, 9, 1.]]  # Expected bottleneck ampl: [1/4, 2]
])

X_bottleneck_res_exp = np.array([
    [1/2, 2],
    [1, 0],
    [1/4, 2]
    ])


@pytest.mark.parametrize('transformer', [PairwiseDistance(), Amplitude()])
def test_not_fitted(transformer):
    with pytest.raises(NotFittedError):
        transformer.transform(X1)


parameters_distance = [
    ('bottleneck', None),
    ('wasserstein', {'p': 2, 'delta': 0.1}),
    ('betti', {'p': 2.1, 'n_bins': 10}),
    ('landscape', {'p': 2.1, 'n_bins': 10, 'n_layers': 2}),
    ('silhouette', {'p': 2.1, 'power': 1.2, 'n_bins': 10}),
    ('heat', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}),
    ('persistence_image',
     {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}),
    ('persistence_image',