Esempi in Python per VietorisRipsPersistence, esempi in Python per gtda.homology.VietorisRipsPersistence

Esempio n. 1

0

Mostra file

File: test_simplicial.py Progetto: valeman/giotto-tda

def test_vrp_transform(max_edge_length, infinity_values):
    vrp = VietorisRipsPersistence(max_edge_length=max_edge_length,
                                  infinity_values=infinity_values)
    # This is not generally true, it is only a way to obtain the res array
    # in this specific case
    X_res = X_vrp_res.copy()
    X_res[:, :, :2][X_res[:, :, :2] >= max_edge_length] = infinity_values
    assert_almost_equal(vrp.fit_transform(X), X_res)

Esempio n. 2

0

Mostra file

File: common_routines.py Progetto: rodrigorivera/Time-Series-Classification

 def parallel_embed_(self, embedding):
     vr = VietorisRipsPersistence(
         metric='euclidean',
         homology_dimensions=self.homology_dimensions_,
         n_jobs=self.n_job)
     diagram_scaler = Scaler(n_jobs=self.n_job)
     persistence_diagrams = diagram_scaler.fit_transform(
         vr.fit_transform([embedding]))
     if self.filtering_:
         diagram_filter = Filtering(
             epsilon=0.1, homology_dimensions=self.filtering_dimensions_)
         persistence_diagrams = diagram_filter.fit_transform(
             persistence_diagrams)
     return persistence_diagrams[0]

Esempio n. 3

0

Mostra file

File: utils.py Progetto: pjhartout/TDA_ADNI_MLCB

def vr_persistent_homology(patch_pc):
    homology_dimensions = (0, 1, 2)
    VR = VietorisRipsPersistence(
        metric="euclidean",
        max_edge_length=5,
        homology_dimensions=homology_dimensions,
        n_jobs=N_JOBS,
    )
    diagrams_VietorisRips = VR.fit_transform(np.asarray(patch_pc))
    VR.plot(diagrams_VietorisRips).show()
    BC = BettiCurve()
    X_betti_curves = BC.fit_transform(diagrams_VietorisRips)
    BC.plot(X_betti_curves).show()
    return diagrams_VietorisRips

Esempio n. 4

0

Mostra file

File: features.py Progetto: thegodone/molecule_bond_prediction

def computing_persistence_diagram(G, t=np.inf, homologyDimensions = (0, 1, 2)):
    """
    INPUT:
        G: a graph
        t: persistence threshold
        homologyDimensions: homology dimensions to consider
    OUTPUT:
        pd: persistence diagram calculated by Giotto
    """

    dist_mat = computing_distance_matrix(G)
    persistenceDiagram = VietorisRipsPersistence(metric='precomputed', max_edge_length=t,
                                                 homology_dimensions=homologyDimensions,
                                                 n_jobs=-1)
    Diagrams = persistenceDiagram.fit_transform(dist_mat.reshape(1, dist_mat.shape[0], dist_mat.shape[1]))
    return Diagrams

Esempio n. 5

0

Mostra file

def get_persistent_entropy(point_clouds):
    ''' Creates Vietoris Rips Filtration and calculates Persistent Entropy

        Returns
        -------
        List with persistent entropy of 0th homology group for each series
    '''
    vietorisrips_tr = VietorisRipsPersistence(
        metric='manhattan',
        homology_dimensions=_homology_dimensions,
        max_edge_length=_max_edge_length,
        n_jobs=_n_jobs,
    )
    diagrams = vietorisrips_tr.fit_transform(point_clouds)

    entropy_tr = PersistenceEntropy()
    features = entropy_tr.fit_transform(diagrams)

    return features

Esempio n. 6

0

Mostra file

def extract_top_features(X, filtrations, vectorizations):
    """
    Extracts topological features from a MNIST-like dataset. 
    
    For each specified filtration and vectorization, features are extracted
    according to the pipeline:
    Filtration -> Persistence diagram -> Rescaling -> Vectorization.

    Parameters
    ----------
    X : ndarray of shape (n_samples, 28, 28)
        A collection of greyscale images.
        
    filtrations : list of tuples (string, filtration)
        A list of filtrations.
        Assumptions: 1) The first filtration is 'Voxel', the second is
                        'Binary', and for both of them the pipeline is
                        to be run on the original greyscale images. For all
                        subsequent filtrations, the pipeline is to be run on
                        binarized images.
                     2) For all filtrations except 'Vietoris-Rips', the
                        corresponding diagram is the cubical persistence
                        diagram. For 'Vietoris-Rips', i's the Vietoris-Rips
                        persistence diagram.
                    
    vectorizations : list of tuples (string, vectorization)
        A list of vectorizations.
        
    Returns
    -------
    X_f : ndarray of shape (n_samples, n_features)
        Topological features for all images in X
        
    """
    # Put all vectorizations together for convenience
    vect_union = FeatureUnion(vectorizations, n_jobs=num_jobs)

    X_bin = img.Binarizer(threshold=0.4, n_jobs=num_jobs).fit_transform(X)

    X_f = np.array([]).reshape(X.shape[0], 0)
    current_time = [time.perf_counter()]
    for filt in filtrations:
        filt_features = make_pipeline(\
            filt[1],\
            VietorisRipsPersistence(n_jobs=num_jobs) if filt[0] == 'Vietoris-Rips' else CubicalPersistence(n_jobs=num_jobs),\
            Scaler(n_jobs=num_jobs),\
            vect_union).fit_transform(X)
        X_f = np.hstack((X_f, filt_features))
        print("{} complete: {} seconds".format(filt[0],
                                               elapsed_time(current_time)))
        if filt[0] == 'Binary':
            X = X_bin  # From now on, we only work with binarized images

    return X_f

Esempio n. 7

0

Mostra file

File: giotto_fcm.py Progetto: geomstats/challenge-iclr-2021

def fpd_cluster(data,
                c,
                hom_dimension,
                metric='wasserstein',
                verbose=False,
                max_iter=10,
                frand='no',
                fuzzy=True):
    # Compute topological fuzzy clusters of a collection of point clouds
    #
    # INPUTS
    # data - collection of datasets
    # c - number of clusters
    # verbose - True or False to give iteration information
    # max_iter - max number of iterations to compute
    # p - dimension of persistence diagram (0=connected components, 1=holes, 2=voids, etc.)
    # max_range - Max distance to consider between points for VR complex
    # T - replace points at infinity with large hyperparameter T
    # frand - optional Fuzzy RAND reference matrix
    # fuzzy - fuzzy clustering if True, hard clustering if False
    # (if unsure of value for max_range or T, set as the furthest distance between two points)
    #
    # OUTPUTS
    # r - membership values
    # M - list of cluster centres
    # frand_indices - returns Fuzzy RAND index at each iteration (if reference matrix given)

    VR = VietorisRipsPersistence(homology_dimensions=[hom_dimension])
    diagrams = VR.fit_transform(data)
    # diagrams = np.delete(diagrams, axis=2, obj=2)
    r, M = pd_fuzzy(diagrams,
                    c,
                    verbose,
                    max_iter,
                    frand=frand,
                    fuzzy=fuzzy,
                    metric=metric)

    return r, M

Esempio n. 8

0

Mostra file

File: features.py Progetto: thegodone/molecule_bond_prediction

def get_pd_from_molecule(molecule_name, structures):
    """
    INPUT:
        molecule_name: name of the molecule as given in the structres file
        structures: structures file containing information (x, y, z coordinates) for all molecules

    OUTPUT:
        X_scaled: scaled persistence diagrams
    """
    m = structures[structures['molecule_name'] == molecule_name][['x', 'y', 'z']].to_numpy()
    m = m.reshape((1, m.shape[0], m.shape[1]))
    homology_dimensions = [0, 1, 2]
    persistenceDiagram = VietorisRipsPersistence(metric='euclidean',
                                                homology_dimensions=homology_dimensions, n_jobs=1)
    persistenceDiagram.fit(m)
    X_diagrams = persistenceDiagram.transform(m)

    diagram_scaler = diag.Scaler()
    diagram_scaler.fit(X_diagrams)
    X_scaled = diagram_scaler.transform(X_diagrams)

    return X_scaled

Esempio n. 9

0

Mostra file

File: tda_tools.py Progetto: rodrigorivera/ml2020

def get_diagrams_torch(point_clouds, maxdim = 1):
    # Calculates persistence diagrams from point clouds. 
    # Complexity of calculation increase with the maximum homology dimension, taken into account
    
    # point_clouds -  pytorch tensor of the shape (n_samples, n_points, dim)
    # maxdim - maximum homology dimension 
    
    # Returns tuple (diagrams_torch, diagrams_np, VR_persistence)
    #diagrams_torch - pytorch tensors of the shape (n_samples, maxdim + 1, n_features, 2)
    # n_features - maximum number of topological features, across different samples.
    # The last axis has the structure [birth_scale, death_scale]
    # The last two elements in tuple are needed for plotting diagrams only
    
    homology_dimensions = tuple(range(maxdim + 1))
    VR_persistence = VietorisRipsPersistence(homology_dimensions = homology_dimensions)
    point_clouds_np = point_clouds.numpy()
    diagrams_np = VR_persistence.fit_transform(point_clouds_np)
    homology_dimensions = diagrams_np[:, :, 2, np.newaxis]
    diagrams_torch = []
    for i in range(maxdim + 1):
        diagrams_fixed_Hdim = np.select([homology_dimensions == i], [diagrams_np[:, :, :2]])
        diagrams_torch.append(torch.FloatTensor(diagrams_fixed_Hdim[:, np.newaxis, :, :]))
    diagrams_torch = torch.cat(tuple(diagrams_torch), dim=1)
    return diagrams_torch, diagrams_np, VR_persistence

Esempio n. 10

0

Mostra file

File: cross_validation.py Progetto: giotto-ai/football-tda

    def _validate_k_fold_top(self, model, x_train, y_train, x_test, y_test):
        validation_quantities = []

        for k_min in self.k_mins:
            for k_max in self.k_maxs:
                for dist_percentage in self.dist_percentages:
                    print(
                        f"k_min, k_max, dist_percentage: {k_min}, {k_max}, {dist_percentage}"
                    )
                    pipeline_list = [
                        ('extract_subspaces',
                         SubSpaceExtraction(dist_percentage=dist_percentage,
                                            k_min=k_min,
                                            k_max=k_max,
                                            metric="euclidean",
                                            n_jobs=-1)),
                        ('compute_diagrams',
                         VietorisRipsPersistence(n_jobs=-1))
                    ]
                    top_pipeline = Pipeline(pipeline_list)

                    diagrams_train, _ = top_pipeline.fit_transform_resample(
                        x_train, y_train)

                    top_features_train = extract_topological_features(
                        diagrams_train)

                    x_train_model = np.concatenate(
                        [x_train, top_features_train], axis=1)
                    model.fit(x_train_model, y_train)

                    x_test_model = extract_features_for_prediction(
                        x_train, y_train, x_test, y_test, top_pipeline)

                    score = model.score(x_test_model, y_test)
                    output_dictionary = {
                        "k_min": k_min,
                        "k_max": k_max,
                        "dist_percentage": dist_percentage,
                        "score": score
                    }
                    validation_quantities.append(output_dictionary)

        return validation_quantities

Esempio n. 11

0

Mostra file

File: test_simplicial.py Progetto: takahashikazutaka/giotto-tda

def test_vrp_transform():
    vrp = VietorisRipsPersistence()

    assert_almost_equal(vrp.fit_transform(X), X_vrp_res)

Esempio n. 12

0

Mostra file

File: test_simplicial.py Progetto: takahashikazutaka/giotto-tda

def test_vrp_not_fitted():
    vrp = VietorisRipsPersistence()

    with pytest.raises(NotFittedError):
        vrp.transform(X)

Esempio n. 13

0

Mostra file

File: test_simplicial.py Progetto: takahashikazutaka/giotto-tda

def test_vrp_params():
    metric = 'not_defined'
    vrp = VietorisRipsPersistence(metric=metric)

    with pytest.raises(ValueError):
        vrp.fit_transform(X)

Esempio n. 14

0

Mostra file

def test_vrp_low_infinity_values(X, metric):
    vrp = VietorisRipsPersistence(max_edge_length=0.001,
                                  metric=metric,
                                  infinity_values=-1)
    assert_almost_equal(vrp.fit_transform(X)[:, :, :2], np.zeros((1, 2, 2)))

Esempio n. 15

0

Mostra file

def test_vrp_list_of_arrays_different_size():
    X_2 = np.array([[0., 1.], [1., 2.]])
    vrp = VietorisRipsPersistence()
    assert_almost_equal(vrp.fit_transform([X_pc[0], X_2])[0], X_vrp_exp[0])

Esempio n. 16

0

Mostra file

from biopandas.mol2 import PandasMol2
import numpy as np
import pandas as pd
import warnings
import os
warnings.filterwarnings('ignore')
from concurrent import futures
from gtda.homology import VietorisRipsPersistence


npoints = 15

persistence = VietorisRipsPersistence(
            metric="euclidean",
            homology_dimensions=[0,1,2],
            collapse_edges=True,
            n_jobs = None
    )


def get_local_cloud(prot_res):
        prot, res = prot_res
        tempdf = df.loc[prot]
        center = tempdf.loc[res, ['x', 'y', 'z']].to_numpy()

        tempdf['dist'] = np.sqrt((tempdf['x'] - center[0])**2 + (tempdf['y'] - center[1])**2 + (tempdf['z'] - center[2])**2)

        localcloud = tempdf.nsmallest(npoints, 'dist')[['x','y','z']].to_numpy()
        return localcloud

get_local_cloud = np.vectorize(get_local_cloud, otypes=[np.ndarray],)

Esempio n. 17

0

Mostra file

File: cross_validation.py Progetto: giotto-ai/football-tda

    def cross_validate(self, full_x, full_y, splitting_dates):
        train_split_date = splitting_dates[0]
        val_split_date = splitting_dates[1]
        end_date = splitting_dates[2]

        train_x = full_x[(full_x.date < train_split_date) |
                         (full_x.date >= end_date)]
        train_y = full_y[(full_x.date < train_split_date) |
                         (full_x.date >= end_date)]

        val_x = full_x[(full_x.date >= train_split_date)
                       & (full_x.date < val_split_date)]
        val_y = full_y[(full_x.date >= train_split_date)
                       & (full_x.date < val_split_date)]

        test_x = full_x[(full_x.date >= val_split_date)
                        & (full_x.date < end_date)]
        test_y = full_y[(full_x.date >= val_split_date)
                        & (full_x.date < end_date)]

        train_x.pop("date")
        val_x.pop("date")
        test_x.pop("date")

        train_x = train_x.values
        train_y = train_y.values
        val_x = val_x.values
        val_y = val_y.values
        test_x = test_x.values
        test_y = test_y.values

        print("START VALIDATING MODEL")
        models_cv = self._validate_k_fold_model(train_x, train_y, val_x, val_y)
        best_model_params = best_combination(models_cv)
        best_model_params.pop("score")
        best_model = RandomForestClassifier(**best_model_params)

        best_model.fit(train_x, train_y)

        score = best_model.score(test_x, test_y)
        print(f'score no_top {score}')
        print(f'best model parameters no_top {best_model_params}')

        print("START VALIDATING PARAMS")
        topo_cv = self._validate_k_fold_top(best_model, train_x, train_y,
                                            val_x, val_y)
        best_topo = best_combination(topo_cv)
        best_topo.pop("score")
        best_topo_pipeline_list = [
            ('extract_subspaces', SubSpaceExtraction(**best_topo)),
            ('compute_diagrams', VietorisRipsPersistence(n_jobs=-1))
        ]
        best_topo_pipeline = Pipeline(best_topo_pipeline_list)

        train_x_for_test = np.concatenate([train_x, val_x], axis=0)
        train_y_for_test = np.concatenate([train_y, val_y], axis=0)

        diagrams_train, _ = best_topo_pipeline.fit_transform_resample(
            train_x_for_test, train_y_for_test)

        print("EXTRACTING TOPOLOGICAL FEATURES TRAIN")
        top_features_train = extract_topological_features(diagrams_train)

        x_train_model = np.concatenate([train_x_for_test, top_features_train],
                                       axis=1)
        best_model.fit(x_train_model, train_y_for_test)

        print("EXTRACTING TOPOLOGICAL FEATURES TEST")
        x_test_model = extract_features_for_prediction(x_train_model,
                                                       train_y_for_test,
                                                       test_x, test_y,
                                                       best_topo_pipeline)

        score_top = best_model.score(x_test_model, test_y)

        val_x_with_topo = extract_features_for_prediction(
            train_x, train_y, val_x, val_y, best_topo_pipeline)

        print('START VALIDATING MODEL WITH OPTIMAL TOPOLOGY')
        model_config_with_topo = self._validate_k_fold_model(
            x_train_model, train_y, val_x_with_topo, val_y)
        best_model_config_with_topo = best_combination(model_config_with_topo)
        best_model_config_with_topo.pop('score')

        best_model_with_topo = RandomForestClassifier(
            **best_model_config_with_topo)
        best_model_with_topo.fit(x_train_model, train_y_for_test)

        score_best_topo_and_model = best_model_with_topo.score(
            x_test_model, test_y)
        print(f'score best model and topo_feat {score_best_topo_and_model}')

        return best_model_params, best_topo, best_model_config_with_topo, score, score_top, score_best_topo_and_model

Esempio n. 18

0

Mostra file

File: giottoTest.py Progetto: kp9001/tda-examples

# Representing the circle in 3d with parametric equations.
circle = np.asarray([[np.sin(t), np.cos(t), 0] for t in range(400)])
plot_point_cloud(circle)

# Representing the sphere in 3d with parametric equations
sphere = np.asarray([[np.cos(s) * np.cos(t),
                      np.cos(s) * np.sin(t),
                      np.sin(s)] for t in range(20) for s in range(20)])
plot_point_cloud(sphere)

# Representing the torus in 3d with parametric equations
torus = np.asarray([[(2 + np.cos(s)) * np.cos(t), (2 + np.cos(s)) * np.sin(t),
                     np.sin(s)] for t in range(20) for s in range(20)])
plot_point_cloud(torus)

# Saving the results into an array
topological_spaces = np.asarray([circle, sphere, torus])

# The homology ranks we choose to consider
homology_dimensions = (0, 1, 2)
VR = VietorisRipsPersistence(metric='euclidean',
                             max_edge_length=10,
                             homology_dimensions=homology_dimensions)

# Array of persistence diagrams, one per point cloud in the input
diagrams = VR.fit_transform(topological_spaces)
print(f'diagrams.shape = {diagrams.shape}')

# Plotting the persistence diagram of the circle
plot_diagram(diagrams[0])

Esempio n. 19

0

Mostra file

File: test_simplicial.py Progetto: takahashikazutaka/giotto-tda

def test_vrp_list_of_arrays():
    X_2 = np.array([[0., 1.], [1., 2.]])
    X_list = [X[0].copy(), X_2]
    vrp = VietorisRipsPersistence()
    vrp.fit(X_list)

Esempio n. 20

0

Mostra file

File: TDA时间序列预测.py Progetto: Ohou-csu/AIOps-Learning-and-Exploration

# ``X_sw`` is now a complicated-looking array, but it has a simple interpretation. Again, ``X_sw[i]`` is the ``i``-th window on ``X``, and it contains ``window_size`` samples from the original time series. This time, the samples are not scalars but 1D arrays.
#
# What if we suspect that the way in which the **correlations** between the variables evolve over time can help forecast the target ``y``? This is a common situation in neuroscience, where each variable could be data from a single EEG sensor, for instance.
#
# ``giotto-tda`` exposes a ``PearsonDissimilarity`` transformer which creates a 2D dissimilarity matrix from each window in ``X_sw``, and stacks them together into a single 3D object. This is the correct format (and information content!) for a typical topological transformer in ``gtda.homology``. See also [Topological feature extraction from graphs](https://github.com/giotto-ai/giotto-tda/blob/master/examples/persistent_homology_graphs.ipynb) for an in-depth look. Finally, we can extract simple scalar features using a selection of transformers in ``gtda.diagrams``.

# In[6]:

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude

PD = PearsonDissimilarity()
X_pd = PD.fit_transform(X_sw)
VR = VietorisRipsPersistence(metric="precomputed")
X_vr = VR.fit_transform(X_pd)  # "precomputed" required on dissimilarity data
Ampl = Amplitude()
X_a = Ampl.fit_transform(X_vr)
X_vr

# Notice that we are not acting on ``y`` above. We are simply creating features from each window using topology! *Note*: it's two features per window because we used the default value for ``homology_dimensions`` in ``VietorisRipsPersistence``, not because we had two variables in the time series initially!
#
# We can now put this all together into a ``giotto-tda`` ``Pipeline`` which combines both the sliding window transformation on ``X`` and resampling of ``y`` with the feature extraction from the windows on ``X``.
#
# *Note*: while we could import the ``Pipeline`` class and use its constructor, we use the convenience function ``make_pipeline`` instead, which is a drop-in replacement for [scikit-learn's](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html).
#
# 请注意，我们没有对上面的“y”采取行动。我们只是在使用拓扑从每个窗口创建特征！ *注意*：每个窗口有两个特征，因为我们在“VietorisRipsPersistence”中使用了“ homology_dimensions”的默认值，而不是因为我们最初在时间序列中有两个变量！
#
# 现在我们可以将所有这些放到giotto-tda“Pipeline”中，它将“X”上的滑动窗口转换和“y”的重采样与从“X”的Windows窗口上提取的特征结合在一起。
#

Esempio n. 21

0

Mostra file

File: TDA_Circle_Test.py Progetto: kp9001/tda-examples

        (3 * (np.arctan(np.cos(t) / np.sin(t)))) / 2)
] for t in range(1, 400)])
twistedcircle = np.concatenate((twistedcircle1, twistedcircle2))
plot_point_cloud(twistedcircle)

#Consider a specific example.
twistedcircle1 = np.asarray(
    [[np.sin(t),
      np.cos(t),
      np.cos(((np.arctan(np.cos(t) / np.sin(t)))) / 2)]
     for t in range(1, 400)])
twistedcircle2 = np.asarray(
    [[np.sin(t),
      np.cos(t), -np.cos(((np.arctan(np.cos(t) / np.sin(t)))) / 2)]
     for t in range(1, 400)])
twistedcircle = np.concatenate((twistedcircle1, twistedcircle2))
plot_point_cloud(twistedcircle)

# The homology ranks we choose to consider
homology_dimensions = (0, 1)
VR = VietorisRipsPersistence(metric='euclidean',
                             max_edge_length=10,
                             homology_dimensions=homology_dimensions)

# Creating persistence diagrams, one per point cloud in the input.
diagrams = VR.fit_transform([twistedcircle])
print(f'diagrams.shape = {diagrams.shape}')

# Plotting the persistence diagram of the twisted circle.
plot_diagram(diagrams[0])

Esempio n. 22

0

Mostra file

    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)


#cmap = get_cmap(1000000)
knots = []
for index,i in enumerate([4000,5000,6000,7000]):
    for j in range(5):
        X = np.random.randn(i,3) / 1000
        X[:,0] += np.cos(np.arange(i)*2*np.pi/i)
        X[:,1] += np.sin(np.arange(i)*2*np.pi/i)
        Z = TSNE(n_jobs=-1, init='random', random_state=np.random.randint(5,42)).fit_transform(X)
        plt.scatter(Z[:,0] , Z[:,1] , c = np.random.rand(3,))
        plt.show()
        knots.append(Z)
        
homology_dimensions = (0, 1)
VR = VietorisRipsPersistence(
    metric='euclidean', homology_dimensions=homology_dimensions)

# Array of persistence diagrams, one per point cloud in the input
diagrams = VR.fit_transform(knots)

PE = PersistenceEntropy()
F = PE.fit_transform(diagrams)

C = AgglomerativeClustering(n_clusters=5).fit_transform(X)
print(C.labels_)

Esempio n. 23

0

Mostra file

File: test_simplicial.py Progetto: takahashikazutaka/giotto-tda

def test_vrp_fit_transform_plot(hom_dims):
    VietorisRipsPersistence().fit_transform_plot(
        X, sample=0, homology_dimensions=hom_dims)

Esempio n. 24

0

Mostra file

File: notebook_functions.py Progetto: giotto-ai/football-tda

def get_pipeline(top_feat_params):
    pipeline = Pipeline([('extract_point_clouds', SubSpaceExtraction(**top_feat_params)),
                         ('create_diagrams', VietorisRipsPersistence(n_jobs=-1))])
    return pipeline