Example #1
0
def extract_top_features(X, filtrations, vectorizations):
    """
    Extracts topological features from a MNIST-like dataset. 
    
    For each specified filtration and vectorization, features are extracted
    according to the pipeline:
    Filtration -> Persistence diagram -> Rescaling -> Vectorization.

    Parameters
    ----------
    X : ndarray of shape (n_samples, 28, 28)
        A collection of greyscale images.
        
    filtrations : list of tuples (string, filtration)
        A list of filtrations.
        Assumptions: 1) The first filtration is 'Voxel', the second is
                        'Binary', and for both of them the pipeline is
                        to be run on the original greyscale images. For all
                        subsequent filtrations, the pipeline is to be run on
                        binarized images.
                     2) For all filtrations except 'Vietoris-Rips', the
                        corresponding diagram is the cubical persistence
                        diagram. For 'Vietoris-Rips', i's the Vietoris-Rips
                        persistence diagram.
                    
    vectorizations : list of tuples (string, vectorization)
        A list of vectorizations.
        
    Returns
    -------
    X_f : ndarray of shape (n_samples, n_features)
        Topological features for all images in X
        
    """
    # Put all vectorizations together for convenience
    vect_union = FeatureUnion(vectorizations, n_jobs=num_jobs)

    X_bin = img.Binarizer(threshold=0.4, n_jobs=num_jobs).fit_transform(X)

    X_f = np.array([]).reshape(X.shape[0], 0)
    current_time = [time.perf_counter()]
    for filt in filtrations:
        filt_features = make_pipeline(\
            filt[1],\
            VietorisRipsPersistence(n_jobs=num_jobs) if filt[0] == 'Vietoris-Rips' else CubicalPersistence(n_jobs=num_jobs),\
            Scaler(n_jobs=num_jobs),\
            vect_union).fit_transform(X)
        X_f = np.hstack((X_f, filt_features))
        print("{} complete: {} seconds".format(filt[0],
                                               elapsed_time(current_time)))
        if filt[0] == 'Binary':
            X = X_bin  # From now on, we only work with binarized images

    return X_f
Example #2
0
def bettiAmplitude(img_file):
    """
    Pipeline: Cubical Perisitance --> Amplitude of Betti Curve
    """
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # blur the image to reduce noise
    figure_size = 9  # the dimension of the x and y axis of the kernal.
    img = cv2.blur(img, (figure_size, figure_size))

    shape = img.shape
    images = np.zeros((1, *shape))
    images[0] = img
    p = make_pipeline(CubicalPersistence(), Amplitude(metric='betti'))
    return p.fit_transform(images)
Example #3
0
def persistenceEntropy(img_file):
    """
    Pipeline: Cubical Perisitance --> Persistence Entropy
    """
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # blur the image to reduce noise
    figure_size = 9  # the dimension of the x and y axis of the kernal.
    img = cv2.blur(img, (figure_size, figure_size))

    shape = img.shape
    images = np.zeros((1, *shape))
    images[0] = img
    p = make_pipeline(CubicalPersistence(), PersistenceEntropy())
    return p.fit_transform(images)
Example #4
0
def bettiCurve_pipe1(img_file):
    """
    Pipeline 1: Binarizer --> Height Filtration --> Cubical Persistance --> Betti Curve
    """
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # blur the image to reduce noise
    figure_size = 9  # the dimension of the x and y axis of the kernal.
    img = cv2.blur(img, (figure_size, figure_size))

    shape = img.shape
    images = np.zeros((1, *shape))
    images[0] = img
    bz = Binarizer(threshold=40 / 255)
    binned = bz.fit_transform(images)
    p = make_pipeline(HeightFiltration(direction=np.array([1, 1])),
                      CubicalPersistence(), BettiCurve(n_bins=50))
    return p.fit_transform(binned)
# *Note*: while we could import the ``Pipeline`` class and use its constructor, we use the convenience function ``make_pipeline`` instead, which is a drop-in replacement for [scikit-learn's](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html).
#
# 请注意,我们没有对上面的“y”采取行动。我们只是在使用拓扑从每个窗口创建特征! *注意*:每个窗口有两个特征,因为我们在“VietorisRipsPersistence”中使用了“ homology_dimensions”的默认值,而不是因为我们最初在时间序列中有两个变量!
#
# 现在我们可以将所有这些放到giotto-tda“Pipeline”中,它将“X”上的滑动窗口转换和“y”的重采样与从“X”的Windows窗口上提取的特征结合在一起。
#
# *注意*:虽然我们可以导入“ Pipeline”类并使用其构造函数,但我们使用便利功能“ make_pipeline”代替,它是[scikit-learn's](https:// scikit-learn.org/stable/modules/generation/sklearn.pipeline.make_pipeline.html)。

# In[7]:

from sklearn import set_config
set_config(display='diagram')  # For HTML representations of pipelines

from gtda.pipeline import make_pipeline

pipe = make_pipeline(SW, PD, VR, Ampl)
pipe

# Finally, if we have a *regression* task on ``y`` we can add a final estimator such as scikit-learn's ``RandomForestRegressor`` as a final step in the previous pipeline, and fit it!
#
# 最后,如果在y上有回归任务,我们可以添加最终估计量(例如scikit-learn的RandomForestRegressor)作为上一个管道中的最后一步,并将其拟合!

# In[8]:

from sklearn.ensemble import RandomForestRegressor

RFR = RandomForestRegressor()

pipe = make_pipeline(SW, PD, VR, Ampl, RFR)
pipe