def extract_top_features(X, filtrations, vectorizations): """ Extracts topological features from a MNIST-like dataset. For each specified filtration and vectorization, features are extracted according to the pipeline: Filtration -> Persistence diagram -> Rescaling -> Vectorization. Parameters ---------- X : ndarray of shape (n_samples, 28, 28) A collection of greyscale images. filtrations : list of tuples (string, filtration) A list of filtrations. Assumptions: 1) The first filtration is 'Voxel', the second is 'Binary', and for both of them the pipeline is to be run on the original greyscale images. For all subsequent filtrations, the pipeline is to be run on binarized images. 2) For all filtrations except 'Vietoris-Rips', the corresponding diagram is the cubical persistence diagram. For 'Vietoris-Rips', i's the Vietoris-Rips persistence diagram. vectorizations : list of tuples (string, vectorization) A list of vectorizations. Returns ------- X_f : ndarray of shape (n_samples, n_features) Topological features for all images in X """ # Put all vectorizations together for convenience vect_union = FeatureUnion(vectorizations, n_jobs=num_jobs) X_bin = img.Binarizer(threshold=0.4, n_jobs=num_jobs).fit_transform(X) X_f = np.array([]).reshape(X.shape[0], 0) current_time = [time.perf_counter()] for filt in filtrations: filt_features = make_pipeline(\ filt[1],\ VietorisRipsPersistence(n_jobs=num_jobs) if filt[0] == 'Vietoris-Rips' else CubicalPersistence(n_jobs=num_jobs),\ Scaler(n_jobs=num_jobs),\ vect_union).fit_transform(X) X_f = np.hstack((X_f, filt_features)) print("{} complete: {} seconds".format(filt[0], elapsed_time(current_time))) if filt[0] == 'Binary': X = X_bin # From now on, we only work with binarized images return X_f
def bettiAmplitude(img_file): """ Pipeline: Cubical Perisitance --> Amplitude of Betti Curve """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img p = make_pipeline(CubicalPersistence(), Amplitude(metric='betti')) return p.fit_transform(images)
def persistenceEntropy(img_file): """ Pipeline: Cubical Perisitance --> Persistence Entropy """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img p = make_pipeline(CubicalPersistence(), PersistenceEntropy()) return p.fit_transform(images)
def bettiCurve_pipe1(img_file): """ Pipeline 1: Binarizer --> Height Filtration --> Cubical Persistance --> Betti Curve """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img bz = Binarizer(threshold=40 / 255) binned = bz.fit_transform(images) p = make_pipeline(HeightFiltration(direction=np.array([1, 1])), CubicalPersistence(), BettiCurve(n_bins=50)) return p.fit_transform(binned)
# *Note*: while we could import the ``Pipeline`` class and use its constructor, we use the convenience function ``make_pipeline`` instead, which is a drop-in replacement for [scikit-learn's](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html). # # 请注意,我们没有对上面的“y”采取行动。我们只是在使用拓扑从每个窗口创建特征! *注意*:每个窗口有两个特征,因为我们在“VietorisRipsPersistence”中使用了“ homology_dimensions”的默认值,而不是因为我们最初在时间序列中有两个变量! # # 现在我们可以将所有这些放到giotto-tda“Pipeline”中,它将“X”上的滑动窗口转换和“y”的重采样与从“X”的Windows窗口上提取的特征结合在一起。 # # *注意*:虽然我们可以导入“ Pipeline”类并使用其构造函数,但我们使用便利功能“ make_pipeline”代替,它是[scikit-learn's](https:// scikit-learn.org/stable/modules/generation/sklearn.pipeline.make_pipeline.html)。 # In[7]: from sklearn import set_config set_config(display='diagram') # For HTML representations of pipelines from gtda.pipeline import make_pipeline pipe = make_pipeline(SW, PD, VR, Ampl) pipe # Finally, if we have a *regression* task on ``y`` we can add a final estimator such as scikit-learn's ``RandomForestRegressor`` as a final step in the previous pipeline, and fit it! # # 最后,如果在y上有回归任务,我们可以添加最终估计量(例如scikit-learn的RandomForestRegressor)作为上一个管道中的最后一步,并将其拟合! # In[8]: from sklearn.ensemble import RandomForestRegressor RFR = RandomForestRegressor() pipe = make_pipeline(SW, PD, VR, Ampl, RFR) pipe