def cubical_persistence(images, title, plot_diagrams=False, betti_curves=False, scaled=False): homology_dimensions = (0, 1, 2) cp = CubicalPersistence( homology_dimensions=homology_dimensions, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=N_JOBS, ) diagrams_cubical_persistence = cp.fit_transform(images) if scaled: sc = Scaler(metric="bottleneck") diagrams_cubical_persistence = sc.fit_transform( diagrams_cubical_persistence) else: scaled_diagrams_cubical_persistence = diagrams_cubical_persistence if plot_diagrams: fig = cp.plot(diagrams_cubical_persistence) fig.update_layout(title=title) fig.show() if betti_curves: BC = BettiCurve() X_betti_curves = BC.fit_transform(diagrams_cubical_persistence) fig = BC.plot(X_betti_curves) fig.update_layout(title=title) fig.show() if title is not None: print(f"Computed CP for {title}") return diagrams_cubical_persistence
def get_cubical_persistence(patch): cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=N_JOBS, ) diagrams_cubical_persistence = cp.fit_transform(patch) return diagrams_cubical_persistence
def cubical_persistence( images, title, plot_diagrams=False, betti_curves=False, scaled=False ): homology_dimensions = (0, 1, 2) cp = CubicalPersistence( homology_dimensions=homology_dimensions, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=N_JOBS, ) diagrams_cubical_persistence = cp.fit_transform(images) return diagrams_cubical_persistence
def cubical_persistence(patch): cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=N_JOBS, ) diagrams_cubical_persistence = cp.fit_transform(patch) sc = Scaler(metric="bottleneck") scaled_diagrams_cubical_persistence = sc.fit_transform( diagrams_cubical_persistence) print("Computed cubical persistence") return scaled_diagrams_cubical_persistence
def pipeline1(images): """ Binarizer --> Height Filtration, Erosion Filtration, Dilation Filtration --> Cubical Persistance --> Amp, PE return: Array of pipelines """ # Pipeline parameters bin_thresholds = [np.percentile(images[0], 93) / np.max(images[0])] directions = [ np.array([np.cos(t), np.sin(t)]) for t in np.linspace(0, 2 * np.pi, 8)[:-1] ] n_iterations = np.linspace(1, 21, 5).astype(int).tolist() features = [('bottleneck', Amplitude(metric='bottleneck', n_jobs=-1)), ('PE', PersistenceEntropy(n_jobs=-1))] # Make filtrations binned_steps = [('binarizer_{}'.format(t), Binarizer(threshold=t, n_jobs=-1)) for t in bin_thresholds] filtrations = [('height_{}'.format(d), HeightFiltration(direction=d, n_jobs=-1)) for d in directions] filtrations += [('erosion_{}'.format(i), ErosionFiltration(n_iterations=i, n_jobs=-1)) for i in n_iterations] filtrations += [('dilation_{}'.format(i), DilationFiltration(n_iterations=i, n_jobs=-1)) for i in n_iterations] # Make pipelines cubical_lower = ('cubical', CubicalPersistence(n_jobs=-1)) partial_pipeline_steps = [] partial_pipeline_steps.append([cubical_lower]) partial_pipeline_steps.append([('inverter', Inverter(n_jobs=-1)), cubical_lower]) for b, f in itertools.product(binned_steps, filtrations): partial_pipeline_steps.append( [b, f, ('cubical', CubicalPersistence(n_jobs=-1))]) feature_pipelines = [] for s, f in itertools.product(partial_pipeline_steps, features): feature_pipelines.append(Pipeline(s + [f])) return feature_pipelines
def bettiAmplitude(img_file): """ Pipeline: Cubical Perisitance --> Amplitude of Betti Curve """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img p = make_pipeline(CubicalPersistence(), Amplitude(metric='betti')) return p.fit_transform(images)
def persistenceEntropy(img_file): """ Pipeline: Cubical Perisitance --> Persistence Entropy """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img p = make_pipeline(CubicalPersistence(), PersistenceEntropy()) return p.fit_transform(images)
def bettiCurve_pipe1(img_file): """ Pipeline 1: Binarizer --> Height Filtration --> Cubical Persistance --> Betti Curve """ img = cv2.imread(img_file) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # blur the image to reduce noise figure_size = 9 # the dimension of the x and y axis of the kernal. img = cv2.blur(img, (figure_size, figure_size)) shape = img.shape images = np.zeros((1, *shape)) images[0] = img bz = Binarizer(threshold=40 / 255) binned = bz.fit_transform(images) p = make_pipeline(HeightFiltration(direction=np.array([1, 1])), CubicalPersistence(), BettiCurve(n_bins=50)) return p.fit_transform(binned)
def extract_top_features(X, filtrations, vectorizations): """ Extracts topological features from a MNIST-like dataset. For each specified filtration and vectorization, features are extracted according to the pipeline: Filtration -> Persistence diagram -> Rescaling -> Vectorization. Parameters ---------- X : ndarray of shape (n_samples, 28, 28) A collection of greyscale images. filtrations : list of tuples (string, filtration) A list of filtrations. Assumptions: 1) The first filtration is 'Voxel', the second is 'Binary', and for both of them the pipeline is to be run on the original greyscale images. For all subsequent filtrations, the pipeline is to be run on binarized images. 2) For all filtrations except 'Vietoris-Rips', the corresponding diagram is the cubical persistence diagram. For 'Vietoris-Rips', i's the Vietoris-Rips persistence diagram. vectorizations : list of tuples (string, vectorization) A list of vectorizations. Returns ------- X_f : ndarray of shape (n_samples, n_features) Topological features for all images in X """ # Put all vectorizations together for convenience vect_union = FeatureUnion(vectorizations, n_jobs=num_jobs) X_bin = img.Binarizer(threshold=0.4, n_jobs=num_jobs).fit_transform(X) X_f = np.array([]).reshape(X.shape[0], 0) current_time = [time.perf_counter()] for filt in filtrations: filt_features = make_pipeline(\ filt[1],\ VietorisRipsPersistence(n_jobs=num_jobs) if filt[0] == 'Vietoris-Rips' else CubicalPersistence(n_jobs=num_jobs),\ Scaler(n_jobs=num_jobs),\ vect_union).fit_transform(X) X_f = np.hstack((X_f, filt_features)) print("{} complete: {} seconds".format(filt[0], elapsed_time(current_time))) if filt[0] == 'Binary': X = X_bin # From now on, we only work with binarized images return X_f
def test_cp_fit_transform_plot(hom_dims): CubicalPersistence().fit_transform_plot(X, sample=0, homology_dimensions=hom_dims)
def test_cp_not_fitted(): cp = CubicalPersistence() with pytest.raises(NotFittedError): cp.transform(X)
n_widths = len(width_list) n_widths_for_thread_attribution = 1 # TODO verify sklearn jobs start = time.time() # Note that padding should be same so that output images always have the same size feature_transformers = [ PersistenceEntropy(n_jobs=1), Amplitude(metric='wasserstein', metric_params={'p': 2}, order=None, n_jobs=1), NumberOfPoints(n_jobs=1) ] n_subimage_features = len(feature_transformers) transformer = make_pipeline( CubicalPersistence(homology_dimensions=(0, 1, 2), n_jobs=int(n_threads / n_widths_for_thread_attribution)), Filtering(epsilon=np.max(X) - 1, below=False), Scaler(), make_union(*feature_transformers, n_jobs=int((n_threads / n_widths_for_thread_attribution) / n_subimage_features))) # Batch decomposition to spare memory X_features = [] masked_y = [] masked_subimages = [] for batch_offset in tqdm(range(0, X.shape[0], batch_size)): X_batch = X[batch_offset:batch_offset + batch_size] mask_batch = mask[batch_offset:batch_offset + batch_size] # apply mask on ground truth batch_y = y[batch_offset:batch_offset + batch_size]
def evaluate_topological_hemisphere_classification( X, y, x_mask, save_dir, experiment_name, features=['PersistenceEntropy', 'Amplitude', 'NumberOfPoints'], amplitude_metric='wasserstein', processing_filter=True, processing_scale=True, processing_scaler_metric='bottleneck', homology_dimensions=(0, 1, 2), inverse_input=True, model='LogisticRegression', n_subjects=None, n_threads=50, subsampling_factor=2, split_seed=42, verbose=True, save_input_features=False, save_output=False): # Create necessary directories to save data if not os.path.exists(save_dir): os.mkdir(save_dir) experiment_save_dir = os.path.join(save_dir, experiment_name) if not os.path.exists(experiment_save_dir): os.mkdir(experiment_save_dir) else: os.mkdir(f'{experiment_save_dir}_{time.strftime("%Y%m%d_%H%M%S")}') if save_output or save_input_features: pickle_dir = os.path.join(experiment_save_dir, 'pickled_data') if not os.path.exists(pickle_dir): os.mkdir(pickle_dir) lesion_presence_GT = np.any(y, axis=(1, 2, 3)) # Reshape ct_inputs as it has 1 channel X = X.reshape((*X.shape[:-1])) if n_subjects is None: n_subjects = X.shape[0] # Apply brain masks X = (X[:n_subjects] * x_mask[:n_subjects])[range(n_subjects), ::subsampling_factor, :: subsampling_factor, ::subsampling_factor] y = lesion_presence_GT[:n_subjects] # Normalise data # Capping (threshold to 0-500 as values outside this range seem non relevant to the vascular analysis) vmin = 0 vmax = 500 X[X < vmin] = vmin X[X > vmax] = vmax ## Feature Creation n_widths_for_thread_attribution = 1 # TODO verify sklearn jobs start = time.time() if inverse_input: X = invert_image(X) feature_transformers = [] if 'PersistenceEntropy' in features: feature_transformers.append(PersistenceEntropy(n_jobs=1)) if 'Amplitude' in features: feature_transformers.append( Amplitude(metric=amplitude_metric, order=None, n_jobs=1)) if 'NumberOfPoints' in features: feature_transformers.append(NumberOfPoints(n_jobs=1)) n_subimage_features = len(feature_transformers) processing_pipeline = [ CubicalPersistence(homology_dimensions=homology_dimensions, n_jobs=int(n_threads / n_widths_for_thread_attribution)), make_union(*feature_transformers, n_jobs=int((n_threads / n_widths_for_thread_attribution) / n_subimage_features)) ] if processing_filter: processing_pipeline.insert( 1, Filtering(epsilon=np.max(X) - 1, below=False)) if processing_scale: processing_pipeline.insert(-1, Scaler(metric=processing_scaler_metric)) transformer = make_pipeline(*processing_pipeline) X_features = transformer.fit_transform(X) n_features = X_features.shape[1] end = time.time() feature_creation_timing = end - start if verbose: print(f'Features ready after {feature_creation_timing} s') ## Feature Classification #### Create classifierå start = time.time() if model == 'LogisticRegression': classifier = LogisticRegression(n_jobs=-1) elif model == 'RandomForestClassifier': classifier = RandomForestClassifier(n_estimators=10000, n_jobs=-1) else: raise Exception(f'Model {model} not known') #### Prepare dataset X_train, X_valid, y_train, y_valid = train_test_split( X_features, y, test_size=0.3, random_state=split_seed) if save_input_features: pickle.dump(X_train, open(os.path.join(pickle_dir, 'X_train.p'), 'wb')) pickle.dump(X_valid, open(os.path.join(pickle_dir, 'X_valid.p'), 'wb')) pickle.dump(y_train, open(os.path.join(pickle_dir, 'y_train.p'), 'wb')) pickle.dump(y_valid, open(os.path.join(pickle_dir, 'y_valid.p'), 'wb')) #### Train classifier classifier.fit(X_train, y_train) #### Apply classifier valid_probas = classifier.predict_proba(X_valid) valid_predicted = classifier.predict(X_valid) train_probas = classifier.predict_proba(X_train) train_predicted = classifier.predict(X_train) if save_output: ### save classifier pickle.dump( classifier, open(os.path.join(pickle_dir, 'trained_classifier.p'), 'wb')) ### save predicted output pickle.dump(valid_probas, open(os.path.join(pickle_dir, 'valid_probas.p'), 'wb')) pickle.dump(valid_predicted, open(os.path.join(pickle_dir, 'valid_predicted.p'), 'wb')) pickle.dump(train_probas, open(os.path.join(pickle_dir, 'train_probas.p'), 'wb')) pickle.dump(train_predicted, open(os.path.join(pickle_dir, 'train_predicted.p'), 'wb')) #### Reconstruct output end = time.time() feature_classification_and_prediction_time = end - start ## Model (Features + Classifier) Evaluation train_acc = accuracy_score(train_predicted, y_train) valid_acc = accuracy_score(valid_predicted, y_valid) if verbose: print('Train Accuracy:', train_acc) print('Test Accuracy:', valid_acc) with open(os.path.join(experiment_save_dir, 'logs.txt'), "a") as log_file: log_file.write('Train Accuracy: %s\n' % train_acc) log_file.write('Test Accuracy: %s\n' % valid_acc) log_file.write('Feature Creation timing: %s\n' % feature_creation_timing) log_file.write('Feature Classification and Prediction timing: %s\n' % feature_classification_and_prediction_time) ## Model feature analysis #### Model confusion matrix # confusion = confusion_matrix(y_valid, valid_predicted) # plt.imshow(confusion) # plt.savefig(os.path.join(experiment_save_dir, experiment_name + '_confusion_matrix.png')) # # #### Feature correlation # correlation = np.abs(np.corrcoef(X_train.T)) # plt.imshow(correlation) # plt.savefig(os.path.join(experiment_save_dir, experiment_name + '_correlation_matrix.png')) return train_acc, valid_acc, n_features, feature_creation_timing, feature_classification_and_prediction_time
im_filtration = radial_filtration.fit_transform(im_binarized) radplot = radial_filtration.plot(im_filtration, colorscale="jet") radplot.update_layout(template='plotly_dark') # PLOTS st.subheader("") st.write(binplot) st.subheader("Figure 1. Binarized Plot") st.subheader("") st.subheader("") st.write(radplot) st.subheader("Figure 2. Radial Filtration Plot") st.subheader("") # CUBICAL SIMPLICIAL COMPLEXES cubical_persistence = CubicalPersistence(n_jobs=-1) im_cubical = cubical_persistence.fit_transform(im_filtration) cubplot = cubical_persistence.plot(im_cubical) cubplot.update_layout(template='plotly_dark') st.subheader("") st.write(cubplot) st.subheader("Figure 3. Cubical Simplicial Complex Persistence Diagram") st.subheader("")
def calculate_dgms(self): cubpers = CubicalPersistence(n_jobs=-1) self.dgms = cubpers.fit_transform(-self.sacs) return self.dgms
def test_cp_transform(periodic_dimensions, expected): cp = CubicalPersistence(periodic_dimensions=periodic_dimensions) assert_almost_equal(cp.fit_transform(X), expected)
def main(): path_to_diags = "../data/collected_diagnoses_complete.json" with open(path_to_diags) as f: diagnoses = json.load(f) patients = list(diagnoses.keys()) # Sort diagnoses key diagnoses = collections.OrderedDict(sorted(diagnoses.items())) # Where the data comes from data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/" # Where the resulting distance matrices are saved. distance_dir = "/temporal_evolution/" utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + distance_dir) distances_to_evaluate = [ # "bottleneck", # "wasserstein", # "betti", "landscape", # "silhouette", # "heat", "persistence_image", ] # patients = ["sub-ADNI011S0023", "sub-ADNI029S0878"] # If we want to process multiple patients, we just throw them in a loop. for i, patient in tqdm(enumerate(patients), total=len(patients)): for distance in distances_to_evaluate: patches = [] for mri in diagnoses[patient]: try: patches.append( np.load( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" ) ) except FileNotFoundError: pass # print( # data_dir # + patient # + mri.replace("ses", "") # + "-MNI.npy" # + " not found" # ) # Stacking enables multiprocessing patches = np.stack(patches) cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=-1, ) diagrams_cubical_persistence = cp.fit_transform(patches) pl_dist = PairwiseDistance( metric=distance, metric_params=None, order=None, n_jobs=-1, ) X_distance = pl_dist.fit_transform( diagrams_cubical_persistence ) with open( DOTENV_KEY2VAL["GEN_DATA_DIR"] + distance_dir + f"patient_evolution_distance_data_patient_{patient}" f"_{distance}.npy", "wb", ) as f: np.save(f, X_distance)
def calculate_dgms(self, digits=64): sacs = self.sacs_.round(digits) cubpers = CubicalPersistence(infinity_values=1, n_jobs=-1) self.dgms_ = cubpers.fit_transform(-sacs) return self.dgms_
def generate_sample_representations(paths_to_patches, labels): sample_rep_dir = DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "/sample_rep/" try: os.mkdir(sample_rep_dir) except OSError: print("Creation of the directory %s failed" % sample_rep_dir) else: print("Successfully created the directory %s " % sample_rep_dir) for i, path in enumerate(paths_to_patches): patch = np.load(path) cp = CubicalPersistence( homology_dimensions=(0, 1, 2), coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=N_JOBS, ) diagrams_cubical_persistence = cp.fit_transform( patch.reshape(1, 30, 36, 30) ) for h_dim in HOMOLOGY_DIMENSIONS: cp.plot( diagrams_cubical_persistence, homology_dimensions=[h_dim], ).update_traces( marker=dict(size=10, color=HOMOLOGY_CMAP[h_dim]), ).write_image( sample_rep_dir + f"persistence_diagram_{labels[i]}_H_{h_dim}.png", scale=SCALE, ) representation_names = [ "Persistence landscape", "Betti curve", "Persistence image", "Heat kernel", "Silhouette", ] for j, rep in enumerate(representation_names): # Have not found a better way of doing this yet. if rep == "Persistence landscape": rep = PersistenceLandscape( n_layers=N_LAYERS, n_bins=VEC_SIZE, n_jobs=N_JOBS ) elif rep == "Betti curve": rep = BettiCurve() elif rep == "Persistence image": rep = PersistenceImage( sigma=0.001, n_bins=VEC_SIZE, n_jobs=N_JOBS ) elif rep == "Heat kernel": rep = HeatKernel(sigma=0.001, n_bins=VEC_SIZE, n_jobs=N_JOBS) elif rep == "Silhouette": rep = Silhouette(power=1.0, n_bins=VEC_SIZE, n_jobs=N_JOBS) vectorial_representation = rep.fit_transform( diagrams_cubical_persistence ) if representation_names[j] in ["Persistence image", "Heat kernel"]: for h_dim in range(vectorial_representation.shape[1]): plt.imshow( vectorial_representation[0:, h_dim, :, :].reshape( VEC_SIZE, VEC_SIZE ), cmap=(HOMOLOGY_CMAP[h_dim] + "s").capitalize(), ) # plt.title( # f"{representation_names[j]} representation of a " # f"{labels[i]} patient in h_{image}" # ) plt.savefig( sample_rep_dir + f"{representation_names[j].replace(' ', '_')}" f"_{labels[i]}_h_{h_dim}.png", bbox_inches="tight", ) else: rep.plot(vectorial_representation).update_layout( title=None, margin=dict(l=0, r=0, b=0, t=0, pad=4), ).write_image( sample_rep_dir + f"{representation_names[j].replace(' ', '_')}" f"_{labels[i]}.png", scale=SCALE, ) print(f"Done plotting {labels[i]} sample")
def main(): path_to_diags = "../data/collected_diagnoses_complete.json" patients = ["sub-ADNI005S0223"] progr = ["cn_mci_ad"] with open(path_to_diags) as f: diagnoses = json.load(f) # Sort diagnoses key diagnoses = collections.OrderedDict(sorted(diagnoses.items())) # Where the data comes from data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/" # Where the figures are saved temporal_progression_dir = "/temporal_evolution/" utils.make_dir( DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + temporal_progression_dir ) # Where the resulting distance matrices are saved. time_series_dir = "/temporal_evolution/" utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + temporal_progression_dir) # If we want to process multiple patients, we just throw them in a loop. for i, patient in enumerate(patients): print( "Processing longitudinal data for " + patient + " with progression pattern " + progr[i] ) patches = [] for mri in diagnoses[patient]: try: patches.append( np.load( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" ) ) except FileNotFoundError: print( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" + " not found" ) # Stacking enables multiprocessing patches = np.stack(patches) cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=-1, ) diagrams_cubical_persistence = cp.fit_transform(patches) pl_dist = PairwiseDistance( metric="landscape", metric_params=None, order=None, n_jobs=-1 ) X_distance = pl_dist.fit_transform( diagrams_cubical_persistence ) with open( DOTENV_KEY2VAL["GEN_DATA_DIR"] + time_series_dir + f"distance_data_patient_{patient}_{progr[i]}_landscape.npy", "wb", ) as f: np.save(f, X_distance)