import time from sklearn.manifold import SpectralEmbedding from src.datasets.datasets import SwissRoll, Spheres from src.utils.plots import plot_2Dscatter if __name__ == "__main__": dataset = SwissRoll() data, color = dataset.sample(n_samples=2560) start = time.time() embedding = SpectralEmbedding(n_components=2,n_jobs=1, n_neighbors=90) X_transformed = embedding.fit_transform(data) end = time.time() print('It took: {}'.format(end - start)) plot_2Dscatter(data = X_transformed, labels=color, path_to_save= None, title = None, show = True)
for v in itertools.product(*values): ret_i = dict(root_path=self.root_path, global_register=self.global_register, verbose=self.verbose) for kc, kc_v in zip(key_chains, v): tmp = ret_i for k in kc[:-1]: if k not in tmp: tmp[k] = {} tmp = tmp[k] tmp[kc[-1]] = kc_v ret.append(ConfigWC(**ret_i)) return ret placeholder_config_wc = ConfigWC(dataset=SwissRoll(), sampling_kwargs=dict(n_samples=2560), batch_size=64, wc_kwargs=dict(), eval_size=0.2, n_jobs=1, seed=1, global_register='', root_path='', verbose=False)
import numpy as np from torch.utils.data import TensorDataset, DataLoader from src.data_preprocessing.witness_complex_offline.config import ConfigWC from src.datasets.datasets import SwissRoll if __name__ == "__main__": n_samples = 5 # labels = torch.from_numpy(np.array(range(n_samples))) # # dataset_train = TensorDataset(labels) # dataset_train_2 = TensorDataset(labels) # # train_loader = DataLoader(dataset_train, batch_size=1, shuffle=False, # pin_memory=True, drop_last=True) # # path = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/tests' # torch.save(labels, os.path.join(path,'labels_tensor.pt')) path = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/tests' os.path.join(path, 'labels_tensor.pt') labels_loaded = torch.load(os.path.join(path, 'labels_tensor.pt')) print(torch.eq(labels_loaded, labels)) config = ConfigWC(SwissRoll(), dict(), dict(), 1, 1, 'global_bla', 'root_bla') print(config.uid)
import time import torch from sklearn.metrics import pairwise_distances from src.datasets.datasets import SwissRoll from src.topology.witness_complex import WitnessComplex if __name__ == "__main__": N_WITNESSES = 512 N_LANDMARKS = 32 landmark_dist = torch.ones(N_LANDMARKS, N_LANDMARKS) * 1000000 dataset = SwissRoll() X_witnesses, _ = dataset.sample(n_samples=N_WITNESSES) ind_l = random.sample(range(N_WITNESSES), N_LANDMARKS) X_landmarks = X_witnesses[ind_l, :] witness_complex1 = WitnessComplex(X_landmarks, X_witnesses) for n_jobs in [1]: witness_complex1 = WitnessComplex(X_landmarks, X_witnesses) start = time.time() witness_complex1.compute_metric_optimized(n_jobs=-1) end = time.time() print('{} jobs --- Time needed: {}'.format(n_jobs, end - start)) witness_complex2 = WitnessComplex(X_landmarks, X_witnesses)
from scripts.ssc.wc_offline.config_libraries.global_register_definitions import \ ( PATH_GR_SWISSROLL_EULER, PATH_GR_SWISSROLL_NOISE_EULER) from src.data_preprocessing.witness_complex_offline.config import ConfigWC_Grid from src.datasets.datasets import SwissRoll swissroll_nonoise = ConfigWC_Grid( dataset=[SwissRoll()], sampling_kwargs=[dict(n_samples=2560)], batch_size=[64, 128, 256, 512], wc_kwargs=[dict()], eval_size=[0.2], n_jobs=[2], seed=[36, 3851, 2570, 4304, 1935, 7954, 5095, 5310, 1577, 3288], global_register=PATH_GR_SWISSROLL_EULER, root_path= '/cluster/home/schsimo/MT/output/WitnessComplexes/SwissRoll/nonoise', verbose=True) swissroll_noise005 = ConfigWC_Grid( dataset=[SwissRoll()], sampling_kwargs=[dict(n_samples=2560, noise=0.05)], batch_size=[64, 128, 256, 512], wc_kwargs=[dict()], eval_size=[0.2], n_jobs=[2], seed=[6973, 5305, 6233, 1503, 3947, 1425, 3391, 2941, 1218, 7946], global_register=PATH_GR_SWISSROLL_NOISE_EULER, root_path= '/cluster/home/schsimo/MT/output/WitnessComplexes/SwissRoll/noise', verbose=True)
from sklearn.manifold import TSNE from src.datasets.datasets import SwissRoll if __name__ == "__main__": dataset = SwissRoll() data, labels = dataset.sample(n_samples=100) model = TSNE()
import random import torch import numpy as np import matplotlib.pyplot as plt from scripts.ssc.pairings_visualization.utils_definitions import make_plot from src.datasets.datasets import SwissRoll from src.topology.witness_complex import WitnessComplex if __name__ == "__main__": dataset_sampler = SwissRoll() N_WITNESSES = 2048 n_samples = 128 path_to_save = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/visualisation_nnsys/wc{}_w{}/'.format( n_samples, N_WITNESSES) N_sim = 100 ks = [1] ntot = int(len(ks) * N_sim) counter = 1 for seed in [30]: witnesses, color_ = dataset_sampler.sample((N_WITNESSES - 128), seed=seed) landmarks, color = dataset_sampler.sample(128, seed=30) witnesses_all = np.vstack((witnesses, landmarks))
import gudhi from scripts.ssc.persistence_pairings_visualization.utils_definitions import make_plot from src.datasets.datasets import SwissRoll if __name__ == "__main__": dataset_sampler = SwissRoll() n_points = 2048 seed = 13 samples, color = dataset_sampler.sample(n_points, seed=seed) tc = gudhi.TangentialComplex(intrisic_dim=1, points=samples) tc.compute_tangential_complex() simplex_tree = tc.create_simplex_tree() print(simplex_tree.get_skeleton(1)) skeleton_sorted = sorted(simplex_tree.get_skeleton(1), key=lambda t: t[1]) pairings = [] for element in skeleton_sorted: pair = element[0] if len(pair) == 2 and element[1] == 0: print(pair) pairings.append(pair) make_plot(samples, pairings, color, name='witness_TEST')
tSNE_see = 672 tSNE_path = '/Users/simons/MT_data/sync/euler_sync_scratch/schsimo/output/tsne_swissroll_2/SwissRoll-n_samples2560-tSNE--n_jobs1-perplexity50-seed672-017a5cba/train_latents.npz' eval_models_dict = {'UMAP': UMAP_path, 'tSNE': tSNE_path} eval_seeds = {'UMAP': UMAP_seed, 'tSNE': tSNE_see} metrics = ['RRE', 'Trust', 'Cont', 'IsoX', 'IsoZ', 'IsoXlist', 'IsoZlist'] # sample data for model_name, path in eval_models_dict.items(): # load WC-AE print('START: {}'.format(model_name)) n_samples = 2560 dataset = SwissRoll() X_eval, labels = dataset.sample(n_samples=n_samples, seed=eval_seeds[model_name]) X_eval, X_val, y_train, y_val, = train_test_split( X_eval, X_eval, test_size=0.2, random_state=eval_seeds[model_name]) Z_eval = np.load(path)['latents'] # evaluate for multiple ks, what? -> Cont, Trust, ll-RMSE, K ks = [15, 30, 45] #ks = [int(k) for k in np.linspace(15,150,10)] # eval = Multi_Evaluation(model=model) # ev_result = eval.get_multi_evals(data=X_eval, latent=Z_eval, labels=Y_eval, ks=ks) ev_result = CompPerformMetrics(X_eval, Z_eval, ks=ks, dataset='norm') print('Done')
ax = fig.gca(projection='3d') plt_obs = ax.scatter(data_obs[:, 0], data_obs[:, 1], data_obs[:, 2], c='tab:cyan') plt_out = ax.scatter(X_out, Y_out, Z_out, c='tab:orange') ax.set_title( str(4 * N_obs) + '-sampling of the necklace with ' + str(N_out) + ' outliers') ax.legend((plt_obs, plt_out), ('data', 'outliers'), loc='lower left') return data if __name__ == "__main__": dataset_sampler = SwissRoll() name = '128_noise4_DTM' data, color = dataset_sampler.sample(128, noise=0.4) st = DTMFiltration(data, m=0.01, p=10, dimension_max=1) st.persistence() pers_pairs = st.persistence_pairs() print(pers_pairs) pairings = np.array([[pers_pairs[0][1][0], pers_pairs[0][1][1]]]) for pair in pers_pairs[1:-1]: pairings = np.vstack((pairings, np.array([[pair[1][0], pair[1][1]]]))) make_plot(data, pairings, color, name=name) # name = '128_noise4_reg' # make_data(data, color, name = name)
wcae = '/Users/simons/MT_data/sync/euler_sync_scratch/schsimo/output/WCAE_swissroll_nonoise/SwissRoll-n_samples2560-seed1935-Autoencoder_MLP_topoae-32-32-lr1_100-bs256-nep1000-rlw1-tlw8192-mepush_active21_20-k3-rmax10-seed1935-1edc2a73/' eval_models_dict = { 'TopoAE64': topoae_64, 'TopoAE128': topoae_128, 'TopoAE256': topoae_256, 'WCTopoAE64': wctopoae_64, 'WCTopoAE128': wctopoae_128, 'WCTopoAE256': wctopoae_256, } eval_models_dict = {'wcae': wcae} # sample data n_samples = 2560 manifold = SwissRoll() data_manifold, data, labels = manifold.sample_manifold(n_samples=n_samples, seed=1) model_names = [] values = [] for model_name, path in eval_models_dict.items(): # load WC-AE model_kwargs = dict(input_dim=3, latent_dim=2, size_hidden_layers=[32, 32]) autoencoder = Autoencoder_MLP_topoae(**model_kwargs) model = WitnessComplexAutoencoder(autoencoder) state_dict = torch.load(os.path.join(path, 'model_state.pth')) model.load_state_dict(state_dict)
import math import os import random from scripts.ssc.visualization.demo_kNN_kwc import annulus from src.datasets.datasets import SwissRoll from src.topology.witness_complex import WitnessComplex if __name__ == "__main__": path = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/TDA/SwissRoll' dataset = SwissRoll() n_w = 512 #path = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/TDA/DoubleAnnulus' dataset = SwissRoll() small = [0.5,0.8] large = [1, 1.3] area_l = math.pi*(large[1]**2-large[0]**2) area_s = math.pi*(small[1]**2-small[0]**2) sample_ratio = area_l/area_s for seed in [22]: for n_l in [64,128]: X_w, w_ = dataset.sample(n_w, seed=seed) ind = random.sample(range(n_w), n_l) X_l, l_ = X_w[ind, :], w_[ind]
if path_root is not None: print(type(pairings[0])) print(type(data)) path_pairings = '{}pairings_{}.npy'.format(path_root, name) path_data = '{}data_{}.npy'.format(path_root, name) path_color = '{}color_{}.npy'.format(path_root, name) np.save(path_pairings, pairings[0]) np.save(path_data, data) np.save(path_color, color) return data, pairings[0], color if __name__ == "__main__": dataset_sampler = SwissRoll() n_samples_array = [128] tot_count = len(n_samples_array) * 100 progress_count = 1 for n_samples in n_samples_array: path_to_save = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/visualisation_nnsys/final_pretty/vr/'.format( n_samples) for seed in [30]: print('{} out of {}'.format(progress_count, tot_count)) progress_count += 1 name = 'vr_ns{}_seed{}'.format(n_samples, seed) data, color = dataset_sampler.sample(n_samples, seed=seed) data, pairings, color = make_data(data, color, name=name) # path_pairings = '{}pairings_{}.npy'.format(PATH_ROOT_SWISSROLL, name)
#dataset = SwissRoll() # small = [0.5,0.8] # large = [1, 1.3] # # area_l = math.pi*(large[1]**2-large[0]**2) # area_s = math.pi*(small[1]**2-small[0]**2) # # sample_ratio = area_l/area_s # # infty_sign = tadasets.infty_sign(n=3000, noise=0.1) for seed in [11, 22, 33, 44]: for n in [16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128]: name_plot_vr = 'VR_SwissRoll_n{n}_seed{seed}.pdf'.format(n=n, seed=seed) computeVR(data=SwissRoll().sample(n, seed=seed)[0], path_to_save=os.path.join(path, name_plot_vr)) # df_an1 = annulus(int(sample_ratio*n), large[0], large[1], seed=seed) # # df_an2 = annulus(n, small[0], small[1], label=1, seed=(seed+12)) # # df_an = df_an1.append(df_an2, ignore_index=True) # # make_scatter(df_an, name='/annulus_manifold_ns{ns}_nl{nl}_s{s}'.format(ns=n, nl = int(sample_ratio*n), s=seed), # # base_path=path) # # data = df_an[['x','y']].to_numpy() # # # # name_plot = 'VR_ns{ns}_nl{nl}_seed{s}.pdf'.format(ns=n, nl=int(sample_ratio*n), s=seed) # data = tadasets.infty_sign(n=n, noise=noise) # labels = np.ones(n) # name_plot_m = 'manifold_infty_n{n}_noise{noise}.pdf'.format(n=n, noise=noise) # plot_2Dscatter(data,labels, path_to_save = os.path.join(path, name_plot_m), show = True) # name_plot_vr = 'VR_infty_n{n}_noise{noise}.pdf'.format(n=n, noise=noise)
wcomp = gudhi.WitnessComplex(nlt) simplex_tree = wcomp.create_simplex_tree(max_alpha_square=50) skeleton = simplex_tree.get_skeleton(1) pairs = [] for element in skeleton: if len(element[0]) == 2: if element[1] == 0: pairs.append(element[0]) print('Skeleton W-LIST: {}'.format(skeleton)) print('Pairs W-LIST: {}'.format(pairs)) dataset_sampler = SwissRoll() n_landmarks = (64+64) n_total = 4048*2*2*2*2 seed = 9 #landmarks_, color_ = dataset_sampler.sample(n_landmarks, seed = 9) witnesses_, color_ = dataset_sampler.sample(n_total, seed = seed) landmark_indices = random.sample(list(range(0,n_total)), n_landmarks) #landmarks_ = gudhi.pick_n_random_points(points=witnesses_, nb_points=n_landmarks) landmarks, color_landmarks = witnesses_[landmark_indices,:], color_[landmark_indices] witness_complex = gudhi.EuclideanWitnessComplex(witnesses=witnesses_, landmarks=landmarks) simplex_tree = witness_complex.create_simplex_tree(max_alpha_square = 100, limit_dimension=1)
# # count_pairings(n_samples, pairs_filtered) # make_plot(landmarks, pairs_filtered, color, name=name) n_samples_array = [32,48,64,96,128] n_witnesses_array = [256,512,1024] seeds = [10,13,20] n_samples_array = [64] n_witnesses_array = [512] seeds = [27] for n_witnesses in n_witnesses_array: for seed in seeds: for n_samples in n_samples_array: name = 'witness_ssc_corrected_nl{}_nw{}_seed{}'.format(n_samples, n_witnesses, seed) dataset_sampler = SwissRoll() n_landmarks = n_samples seed = seed landmarks, color = dataset_sampler.sample(n_landmarks, seed = seed) witnesses, _ = dataset_sampler.sample(n_witnesses, seed=(seed+17)) distances = wl_table(witnesses,landmarks) pairs = get_pairs_0(distances) pairs_filtered = get_persistence_pairs(pairs, n_samples) count_pairings(n_samples, pairs_filtered) make_plot(landmarks, pairs_filtered, color, name=name)
from src.datasets.datasets import SwissRoll from src.evaluation.eval import Multi_Evaluation dataset_sampler = SwissRoll() data, label = dataset_sampler.sample(1000, seed=1) data2, label = dataset_sampler.sample(1000, seed=2) evaluator = Multi_Evaluation(seed=1) ev_result = evaluator.get_multi_evals( data, data, label, ks=[5,10,15]) print(ev_result)
import random import torch import numpy as np from scripts.ssc.persistence_pairings_visualization.utils_definitions import make_plot from src.datasets.datasets import SwissRoll from src.topology.witness_complex import WitnessComplex if __name__ == "__main__": dataset_sampler = SwissRoll() N_WITNESSES = 2048 n_samples = 128 path_to_save = '/Users/simons/PycharmProjects/MT-VAEs-TDA/output/visualisation_nnsys/wc{}_w{}/'.format( n_samples, N_WITNESSES) N_sim = 100 ks = [1, 2, 3, 4, 6, 8, 12, 16] ntot = int(len(ks) * N_sim) counter = 1 for seed in list(set(np.random.randint(1, 100000, N_sim))): witnesses, color_ = dataset_sampler.sample(N_WITNESSES, seed=seed) ind = random.sample(range(N_WITNESSES), n_samples) landmarks, color = witnesses[ind, :], color_[ind]
# 'TopoAE128': topoae_128, # 'WCTopoAE128': wctopoae_128, # # } # eval_models_dict = { # 'TopoAE64' : topoae_64 # } # set metrices to evaluate #metrics = ['K_min', 'K_max','K_avg','llrme','continuity','trustworthiness'] #metrics = ['Trust', 'Cont', 'LGD', 'K_min', 'K_max', 'K_avg'] metrics = ['RRE', 'Trust', 'Cont', 'IsoX', 'IsoZ', 'IsoXlist', 'IsoZlist'] # sample data n_samples = 2560 dataset = SwissRoll() data, labels = dataset.sample(n_samples=n_samples, seed=1) for model_name, path in eval_models_dict.items(): # load WC-AE print('START: {}'.format(model_name)) model_kwargs = dict(input_dim=3, latent_dim=2, size_hidden_layers=[32, 32]) autoencoder = Autoencoder_MLP_topoae(**model_kwargs) model = WitnessComplexAutoencoder(autoencoder) state_dict = torch.load(os.path.join(path, 'model_state.pth')) model.load_state_dict(state_dict) model.eval() dataset_test = TensorDataset(torch.Tensor(data), torch.Tensor(labels))
if k not in tmp: tmp[k] = {} tmp = tmp[k] tmp[kc[-1]] = kc_v ret.append(Config_Competitors(**ret_i)) return ret placeholder_config_competitors = Config_Competitors( model_class=tSNE, model_kwargs=dict(), dataset=SwissRoll(), sampling_kwargs={'n_samples': [2560]}, eval=[ ConfigEval( active=True, evaluate_on=None, save_eval_latent=True, save_train_latent=True, online_visualization=False, k_min=5, k_max=20, k_step=5, ) ], uid='uid', verbose=False,
import random import time import uuid import pandas as pd import numpy as np from src.datasets.datasets import SwissRoll from src.topology.witness_complex import WitnessComplex if __name__ == "__main__": unique_id = str(uuid.uuid4())[:4] df_timing = pd.DataFrame() dataset = SwissRoll() n_witnesses = [512, 1024, 2048] #n_landmarks = [int(i) for i in np.logspace(5, 9, num=5, base=2.0)] n_landmarks = [8, 16, 32, 64] data, _ = dataset.sample(n_samples=max(n_witnesses)) df_timing_data = [] for n_w in n_witnesses: ind_w = random.sample(range(max(n_witnesses)), n_w) X_witnesses = data[ind_w, :] for n_l in n_landmarks: ind_l = random.sample(range(max(n_witnesses)), n_l)