def _tda_vectorisations_pipeline(self): persistence_image = Pipeline([ ("Rotator", tda.DiagramPreprocessor(scaler=tda.BirthPersistenceTransform())), ("PersistenceImage", tda.PersistenceImage()), ("Scaler", RobustScaler()) ]) return Pipeline([ ("Translate", TranslateChunks()), ("Extract", ExtractKeypoints(self.selected_keypoints)), ("Smoothing", SmoothChunks()), ("Flattening", FlattenTo3D()), ("Persistence", Persistence(max_alpha_square=1, complex_='alpha')), ("Separator", tda.DiagramSelector(limit=np.inf, point_type="finite")), ("Prominent", tda.ProminentPoints()), ("Union", FeatureUnion([("PersistenceImage", persistence_image), ("Landscape", Pipeline([("TDA", tda.Landscape(resolution=10)), ("Scaler", RobustScaler())])), ("TopologicalVector", Pipeline([("TDA", tda.TopologicalVector()), ("Scaler", RobustScaler())])), ("Silhouette", Pipeline([("TDA", tda.Silhouette()), ("Scaler", RobustScaler())])), ("BettiCurve", Pipeline([("TDA", tda.BettiCurve()), ("Scaler", RobustScaler())]))])), ("Scaler", RobustScaler()) ])
def generate_PIs(raw_PDs, PI_dim=32, display_each_class=False): PIs = [] for count, pd in enumerate(raw_PDs): diagram_transformed = tda.DiagramPreprocessor( use=True, scalers=[([0, 1], tda.BirthPersistenceTransform()) ]).fit_transform(np.asarray([pd])) PIs.append( tda.PersistenceImage( bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[PI_dim, PI_dim]).fit_transform(diagram_transformed)) if display_each_class and count % 8 == 7: plt.imshow(np.flip(np.reshape(PIs[-1][0], [PI_dim, PI_dim]), 0)) plt.show() return np.asarray(PIs)
import glob txt_files = glob.glob("../Barcodes-resized/*.txt") len(txt_files) for i in range(0, len(txt_files), 1): print(txt_files[i]) D = np.genfromtxt(txt_files[i], skip_header=1) D = np.array(D) diags = [D] diagsT = tda.DiagramPreprocessor(use=True, scalers=[ ([1, 2], tda.BirthPersistenceTransform()) ]).fit_transform(diags) PI = tda.PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[10, 10]) Persims[i][:] = PI.fit_transform(diagsT) c = np.array([1, 2, 3, 4, 5, 6, 7, 8]) ccol = np.repeat(c, 10) cname = np.array( ["Apple", "Bell", "Bird", "Bottle", "Brick", "Children", "Key", "Rat"]) ccolnames = np.repeat(cname, 10) from sklearn.model_selection import train_test_split from sklearn import datasets from sklearn.svm import SVC X = Persims
SH = tda.Silhouette(resolution=1000, weight=pow(5)) S = SH.fit_transform(diags) plt.plot(S[0]) plt.show() BC = tda.BettiCurve(resolution=1000) B = BC.fit_transform(diags) plt.plot(B[0]) plt.show() diagsT = tda.DiagramPreprocessor( use=True, scaler=tda.BirthPersistenceTransform()).fit_transform(diags) PI = tda.PersistenceImage(bandwidth=1.0, weight=arctan(1.0, 1.0), im_range=[0, 10, 0, 10], resolution=[100, 100]) I = PI.fit_transform(diagsT) plt.imshow(np.flip(np.reshape(I[0], [100, 100]), 0)) plt.show() plt.scatter(D[:, 0], D[:, 1]) D = np.array([[1.0, 5.0], [3.0, 6.0], [2.0, 7.0]]) plt.scatter(D[:, 0], D[:, 1]) plt.plot([0.0, 10.0], [0.0, 10.0]) plt.show() diags2 = [D] SW = tda.SlicedWassersteinKernel(num_directions=10, bandwidth=1.0) X = SW.fit(diags)
def dgms_vecs(self, **kwargs): """ :param kwargs: pass all kwargs here. PI('bandwidth', 'weight', 'im_range', 'resolution'), PL('num_landscapes', 'resolution') :return: np.array of shape (n_dgms, n_dim) where all zero columns are removed """ self.param = kwargs t1 = time.time() def arctan(C, p): return lambda x: C * np.arctan(np.power(x[1], p)) if self.vec_type == 'pi': if True: diagsT = DiagramPreprocessor(use=True, scalers=[ ([0, 1], BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) res = PI.fit_transform(diagsT) if False: diagsT = tda.DiagramPreprocessor( use=True, scalers=[([0, 1], tda.BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = tda.PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) res = PI.fit_transform(diagsT) if False: diagsT = tda.DiagramPreprocessor( use=True, scalers=[([0, 1], tda.BirthPersistenceTransform()) ]).fit_transform(self.diags) kwargs = filterdict( kwargs, ['bandwidth', 'weight', 'im_range', 'resolution']) kwargs['weight'] = arctan(kwargs['weight'][0], kwargs['weight'][1]) # PI = tda.PersistenceImage(**kwargs) PI = tda.PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) # PI = tda.PersistenceImage(bandwidth=1.0, weight=arctan(1.0, 1.0), im_range=[0, 1, 0, 1], resolution=[25, 25]) res = PI.fit_transform(diagsT) elif self.vec_type == 'pi_': kwargs_ = filterdict( kwargs, ['bandwidth', 'weight', 'im_range', 'resolution']) diagsT = DiagramPreprocessor(use=True, scalers=[ ([0, 1], BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = PersistenceImage( **kwargs_ ) #(bandwidth=1., weight=lambda x: x[1], im_range=[0, 2, 0, 2], resolution=[20, 20]) res = PI.fit_transform(diagsT) elif self.vec_type == 'pl': kwargs_ = filterdict(kwargs, ['num_landscapes', 'resolution']) LS = tda.Landscape(**kwargs_) # LS = tda.Landscape(num_landscapes=5, resolution=100) # print('self.diags', self.diags[1], self.diags[2]) # diags = [np.array(diag) for diag in self.diags] # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]]) # res = LS.fit_transform([D, D]) # matheiu's implementation # LS = Landscape(resolution=1000) # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]]) # diags = [D] res = LS.fit_transform(self.diags) elif self.vec_type == 'pervec': # permutation vector, i.e. the historgram of coordinates of dgm dgms = self.dgms kwargs = filterdict(kwargs, ['dim']) res = coordinate(dgms[0], **kwargs) for i in range(1, len(dgms)): tmp = coordinate(dgms[i], **kwargs) res = np.concatenate((res, tmp), axis=0) assert res.shape[0] == len(dgms) else: raise Exception('Unknown vec_type. You can only chose pi or pl') t2 = time.time() t = precision_format((t2 - t1), 1) self.t = t if kwargs.get('keep_zero', None) == True: return normalize_(res, axis=self.axis) return rm_zerocol(normalize_(res, axis=self.axis), cor_flag=False)
def sklearn_tda(): def arctan(C, p): return lambda x: C * np.arctan(np.power(x[1], p)) D = np.array([[0.0, 4.0], [1.0, 2.0], [3.0, 8.0], [6.0, 8.0]]) plt.scatter(D[:, 0], D[:, 1]) plt.plot([0.0, 10.0], [0.0, 10.0]) plt.show() diags = [D] LS = tda.Landscape(resolution=1000) L = LS.fit_transform(diags) plt.plot(L[0][:1000]) plt.plot(L[0][1000:2000]) plt.plot(L[0][2000:3000]) plt.show() SH = tda.Silhouette(resolution=1000, weight=lambda x: np.power(x[1] - x[0], 5)) S = SH.fit_transform(diags) plt.plot(S[0]) plt.show() BC = tda.BettiCurve(resolution=1000) B = BC.fit_transform(diags) plt.plot(B[0]) plt.show() diagsT = tda.DiagramPreprocessor(use=True, scaler=tda.BirthPersistenceTransform()).fit_transform(diags) PI = tda.PersistenceImage(bandwidth=1.0, weight=arctan(1.0, 1.0), im_range=[0, 10, 0, 10], resolution=[100, 100]) I = PI.fit_transform(diagsT) plt.imshow(np.flip(np.reshape(I[0], [100, 100]), 0)) plt.show() plt.scatter(D[:, 0], D[:, 1]) D = np.array([[1.0, 5.0], [3.0, 6.0], [2.0, 7.0]]) plt.scatter(D[:, 0], D[:, 1]) plt.plot([0.0, 10.0], [0.0, 10.0]) plt.show() diags2 = [D] SW = tda.SlicedWassersteinKernel(num_directions=10, bandwidth=1.0) X = SW.fit(diags) Y = SW.transform(diags2) print(("SW kernel is " + str(Y[0][0]))) PWG = tda.PersistenceWeightedGaussianKernel(bandwidth=1.0, weight=arctan(1.0, 1.0)) X = PWG.fit(diags) Y = PWG.transform(diags2) print(("PWG kernel is " + str(Y[0][0]))) PSS = tda.PersistenceScaleSpaceKernel(bandwidth=1.0) X = PSS.fit(diags) Y = PSS.transform(diags2) print(("PSS kernel is " + str(Y[0][0]))) W = tda.WassersteinDistance(wasserstein=1, delta=0.001) X = W.fit(diags) Y = W.transform(diags2) print(("Wasserstein-1 distance is " + str(Y[0][0]))) sW = tda.SlicedWassersteinDistance(num_directions=10) X = sW.fit(diags) Y = sW.transform(diags2) print(("sliced Wasserstein distance is " + str(Y[0][0])))
BC = tda.BettiCurve(resolution=1000) B = BC.fit_transform(diags) plt.plot(B[0]) plt.show() def linearWeight(x): if x[0] <= x[1]: return 1 else: return x[1] / x[0] diagsT = tda.DiagramPreprocessor( use=True, scaler=tda.BirthPersistenceTransform()).fit_transform(diags) PI = tda.PersistenceImage(bandwidth=1.0, weight=linearWeight, im_range=[0, 10, 0, 10], resolution=[100, 100]) I = PI.fit_transform(diagsT) plt.imshow(np.flip(np.reshape(I[0], [100, 100]), 0)) plt.show() plt.scatter(D[:, 0], D[:, 1]) D = np.array([[1.0, 5.0], [3.0, 6.0], [2.0, 7.0]]) plt.scatter(D[:, 0], D[:, 1]) plt.plot([0.0, 10.0], [0.0, 10.0]) plt.show() diags2 = [D] SW = tda.SlicedWassersteinKernel(num_directions=10, bandwidth=1.0) X = SW.fit(diags)