def test_da_transform_bottleneck(metric, metric_params, order, n_jobs): da = Amplitude(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_bottleneck_res = da.fit_transform(X_bottleneck) assert_almost_equal(X_bottleneck_res, X_bottleneck_res_exp)
def extract_topological_features(diagrams): metrics = ['bottleneck', 'wasserstein', 'landscape', 'betti', 'heat'] new_features = [] for metric in metrics: amplitude = Amplitude(metric=metric) new_features.append(amplitude.fit_transform(diagrams)) new_features = np.concatenate(new_features, axis=1) return new_features
def test_not_fitted(): dd = PairwiseDistance() da = Amplitude() with pytest.raises(NotFittedError): dd.transform(X_1) with pytest.raises(NotFittedError): da.transform(X_1)
def test_da_transform(metric, metric_params, n_jobs): da = Amplitude(metric=metric, metric_params=metric_params, n_jobs=n_jobs) X_res = da.fit_transform(X_1) assert X_res.shape == (X_1.shape[0], 1) # X_fit != X_transform da = Amplitude(metric=metric, metric_params=metric_params, n_jobs=n_jobs) X_res = da.fit(X_1).transform(X_2) assert X_res.shape == (X_2.shape[0], 1)
def bettiAmplitude(img_file): """ Pipeline: Cubical Perisitance --> Amplitude of Betti Curve """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img p = make_pipeline(CubicalPersistence(), Amplitude(metric='betti')) return p.fit_transform(images)
def pipeline1(images): """ Binarizer --> Height Filtration, Erosion Filtration, Dilation Filtration --> Cubical Persistance --> Amp, PE return: Array of pipelines """ # Pipeline parameters bin_thresholds = [np.percentile(images[0], 93) / np.max(images[0])] directions = [ np.array([np.cos(t), np.sin(t)]) for t in np.linspace(0, 2 * np.pi, 8)[:-1] ] n_iterations = np.linspace(1, 21, 5).astype(int).tolist() features = [('bottleneck', Amplitude(metric='bottleneck', n_jobs=-1)), ('PE', PersistenceEntropy(n_jobs=-1))] # Make filtrations binned_steps = [('binarizer_{}'.format(t), Binarizer(threshold=t, n_jobs=-1)) for t in bin_thresholds] filtrations = [('height_{}'.format(d), HeightFiltration(direction=d, n_jobs=-1)) for d in directions] filtrations += [('erosion_{}'.format(i), ErosionFiltration(n_iterations=i, n_jobs=-1)) for i in n_iterations] filtrations += [('dilation_{}'.format(i), DilationFiltration(n_iterations=i, n_jobs=-1)) for i in n_iterations] # Make pipelines cubical_lower = ('cubical', CubicalPersistence(n_jobs=-1)) partial_pipeline_steps = [] partial_pipeline_steps.append([cubical_lower]) partial_pipeline_steps.append([('inverter', Inverter(n_jobs=-1)), cubical_lower]) for b, f in itertools.product(binned_steps, filtrations): partial_pipeline_steps.append( [b, f, ('cubical', CubicalPersistence(n_jobs=-1))]) feature_pipelines = [] for s, f in itertools.product(partial_pipeline_steps, features): feature_pipelines.append(Pipeline(s + [f])) return feature_pipelines
def test_da_transform(metric, metric_params, order, n_jobs): n_expected_columns = n_homology_dimensions if order is None else 1 da = Amplitude(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = da.fit_transform(X1) assert X_res.shape == (X1.shape[0], n_expected_columns) # X_fit != X_transform da = Amplitude(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = da.fit(X1).transform(X2) assert X_res.shape == (X2.shape[0], n_expected_columns)
# # What if we suspect that the way in which the **correlations** between the variables evolve over time can help forecast the target ``y``? This is a common situation in neuroscience, where each variable could be data from a single EEG sensor, for instance. # # ``giotto-tda`` exposes a ``PearsonDissimilarity`` transformer which creates a 2D dissimilarity matrix from each window in ``X_sw``, and stacks them together into a single 3D object. This is the correct format (and information content!) for a typical topological transformer in ``gtda.homology``. See also [Topological feature extraction from graphs](https://github.com/giotto-ai/giotto-tda/blob/master/examples/persistent_homology_graphs.ipynb) for an in-depth look. Finally, we can extract simple scalar features using a selection of transformers in ``gtda.diagrams``. # In[6]: from gtda.time_series import PearsonDissimilarity from gtda.homology import VietorisRipsPersistence from gtda.diagrams import Amplitude PD = PearsonDissimilarity() X_pd = PD.fit_transform(X_sw) VR = VietorisRipsPersistence(metric="precomputed") X_vr = VR.fit_transform(X_pd) # "precomputed" required on dissimilarity data Ampl = Amplitude() X_a = Ampl.fit_transform(X_vr) X_vr # Notice that we are not acting on ``y`` above. We are simply creating features from each window using topology! *Note*: it's two features per window because we used the default value for ``homology_dimensions`` in ``VietorisRipsPersistence``, not because we had two variables in the time series initially! # # We can now put this all together into a ``giotto-tda`` ``Pipeline`` which combines both the sliding window transformation on ``X`` and resampling of ``y`` with the feature extraction from the windows on ``X``. # # *Note*: while we could import the ``Pipeline`` class and use its constructor, we use the convenience function ``make_pipeline`` instead, which is a drop-in replacement for [scikit-learn's](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html). # # 请注意,我们没有对上面的“y”采取行动。我们只是在使用拓扑从每个窗口创建特征! *注意*:每个窗口有两个特征,因为我们在“VietorisRipsPersistence”中使用了“ homology_dimensions”的默认值,而不是因为我们最初在时间序列中有两个变量! # # 现在我们可以将所有这些放到giotto-tda“Pipeline”中,它将“X”上的滑动窗口转换和“y”的重采样与从“X”的Windows窗口上提取的特征结合在一起。 # # *注意*:虽然我们可以导入“ Pipeline”类并使用其构造函数,但我们使用便利功能“ make_pipeline”代替,它是[scikit-learn's](https:// scikit-learn.org/stable/modules/generation/sklearn.pipeline.make_pipeline.html)。
('Radial UC', img.RadialFiltration(center=np.array((13,6)), n_jobs=num_jobs)),\ ('Radial UR', img.RadialFiltration(center=np.array((20,6)), n_jobs=num_jobs)),\ ('Radial CL', img.RadialFiltration(center=np.array((6,13)), n_jobs=num_jobs)),\ ('Radial C', img.RadialFiltration(center=np.array((13,13)), n_jobs=num_jobs)),\ ('Radial CR', img.RadialFiltration(center=np.array((20,13)), n_jobs=num_jobs)),\ ('Radial DL', img.RadialFiltration(center=np.array((6,20)), n_jobs=num_jobs)),\ ('Radial DC', img.RadialFiltration(center=np.array((13,20)), n_jobs=num_jobs)),\ ('Radial DR', img.RadialFiltration(center=np.array((20,20)), n_jobs=num_jobs)),\ ('Density 2', img.DensityFiltration(radius=2, n_jobs=num_jobs)),\ ('Density 4', img.DensityFiltration(radius=4, n_jobs=num_jobs)),\ ('Density 6', img.DensityFiltration(radius=6, n_jobs=num_jobs)),\ ('Dilation', img.DilationFiltration(n_jobs=num_jobs)),\ ('Erosion', img.ErosionFiltration(n_jobs=num_jobs)),\ ('Signed distance', img.SignedDistanceFiltration(n_jobs=num_jobs)),\ ('Vietoris-Rips', img.ImageToPointCloud(n_jobs=num_jobs))] vectorizations = [('Bottleneck', Amplitude(metric='bottleneck', order=None, n_jobs=num_jobs)),\ ('Wasserstein L1', Amplitude(metric='wasserstein', metric_params={'p': 1}, order=None, n_jobs=num_jobs)),\ ('Wasserstein L2', Amplitude(metric='wasserstein', metric_params={'p': 2}, order=None, n_jobs=num_jobs)),\ ('Betti L1', Amplitude(metric='betti', metric_params={'p': 1}, order=None, n_jobs=num_jobs)),\ ('Betti L2', Amplitude(metric='betti', metric_params={'p': 2}, order=None, n_jobs=num_jobs)),\ ('Landscape L1 k=1', Amplitude(metric='landscape', metric_params={'p': 1, 'n_layers':1}, order=None, n_jobs=num_jobs)),\ ('Landscape L1 k=2', Amplitude(metric='landscape', metric_params={'p': 1, 'n_layers':2}, order=None, n_jobs=num_jobs)),\ ('Landscape L2 k=1', Amplitude(metric='landscape', metric_params={'p': 2, 'n_layers':1}, order=None, n_jobs=num_jobs)),\ ('Landscape L2 k=2', Amplitude(metric='landscape', metric_params={'p': 2, 'n_layers':2}, order=None, n_jobs=num_jobs)),\ ('Heat kernel L1 sigma=10', Amplitude(metric='heat', metric_params={'p': 1, 'sigma':10}, order=None, n_jobs=num_jobs)),\ ('Heat kernel L1 sigma=15', Amplitude(metric='heat', metric_params={'p': 1, 'sigma':15}, order=None, n_jobs=num_jobs)),\ ('Heat kernel L2 sigma=10', Amplitude(metric='heat', metric_params={'p': 2, 'sigma':10}, order=None, n_jobs=num_jobs)),\ ('Heat kernel L2 sigma=15', Amplitude(metric='heat', metric_params={'p': 2, 'sigma':15}, order=None, n_jobs=num_jobs)),\ ('Persistence entropy', PersistenceEntropy(n_jobs=num_jobs))] if os.path.exists("train_labels.csv"):
[0, 1, 0.], [0, 0, 1.]], # Expected bottleneck ampl: [1, 0] [[3, 3.5, 0.], [0, 0, 0.], [5, 9, 1.]] # Expected bottleneck ampl: [1/4, 2] ]) X_bottleneck_res_exp = np.array([ [1/2, 2], [1, 0], [1/4, 2] ]) @pytest.mark.parametrize('transformer', [PairwiseDistance(), Amplitude()]) def test_not_fitted(transformer): with pytest.raises(NotFittedError): transformer.transform(X1) parameters_distance = [ ('bottleneck', None), ('wasserstein', {'p': 2, 'delta': 0.1}), ('betti', {'p': 2.1, 'n_bins': 10}), ('landscape', {'p': 2.1, 'n_bins': 10, 'n_layers': 2}), ('silhouette', {'p': 2.1, 'power': 1.2, 'n_bins': 10}), ('heat', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}), ('persistence_image', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}), ('persistence_image',