print(f"{time.time() - currtime:.3f} elapsed to scale and center data.") X_test = scaler.transform(X_test) #%% np.random.seed(seed) currtime = time.time() pca = PCA(n_components=n_components) X_train_pca = pca.fit_transform(X_train) print(f"{time.time() - currtime:.3f} elapsed to fit PCA model.") X_test_pca = pca.transform(X_test) #%% np.random.seed(seed + 2) currtime = time.time() sca = SparseComponentAnalysis(n_components=n_components, max_iter=30, gamma=50, verbose=10) X_train_sca = sca.fit_transform(X_train) print(f"{time.time() - currtime:.3f} elapsed for SCA.") X_test_sca = sca.transform(X_test) #%% neuron_types = sequencing_df.index.get_level_values("Neuron_type").values neuron_type_palette = dict(zip(np.unique(neuron_types), cc.glasbey_light)) y_train = index_train.get_level_values(level="Neuron_type").values y_test = index_test.get_level_values(level="Neuron_type").values pg = pairplot(X_train_sca[:, :4], labels=y_train, palette=neuron_type_palette,
method = "PCA" else: method = "SCA" print( f"method = {method}, n_components = {n_components}, gamma = {gamma}" ) print() curr_params = (method, n_components, gamma) params.append(curr_params) # fit model currtime = time.time() model = SparseComponentAnalysis( n_components=n_components, max_iter=max_iter, gamma=gamma, verbose=10, tol=tol, ) S_train = model.fit_transform(X_train) train_time = time.time() - currtime print(f"{train_time:.3f} elapsed to train model.") S_test = model.transform(X_test) # save model fit models_by_params[curr_params] = model S_train_by_params[curr_params] = S_train S_test_by_params[curr_params] = S_test # save metrics
#%% U_thresh = soft_threshold(U_rot, gamma=100) pairplot(U_thresh, alpha=0.2) #%% #%% currtime = time.time() pca = PCA(n_components=n_components) X_pca = pca.fit_transform(X_train) print(f"{time.time() - currtime:.3f} elapsed to fit PCA model.") #%% currtime = time.time() sca = SparseComponentAnalysis(n_components=n_components, max_iter=max_iter, gamma=gamma) X_sca = sca.fit_transform(X_train) print(f"{time.time() - currtime:.3f} elapsed to fit SCA model.") #%% max_iter = 5 gammas = [n_components, 100, 500, np.sqrt(X_train.shape[1]) * n_components, np.inf] models_by_gamma = {} Xs_by_gamma = {} for i, gamma in enumerate(gammas): print(f"Gamma = {gamma}...") if gamma == np.inf: _max_iter = 0 else: _max_iter = max_iter currtime = time.time()
center = True scale = False max_iter = 1 k_range = np.arange(2, d + 1, 4) n_replicates = 400 rows = [] for i in range(n_replicates): X = sample_data() if center: X -= np.mean(X, axis=0) for k in k_range: gamma = k * 2.5 sca = SparseComponentAnalysis(n_components=k, gamma=gamma, max_iter=max_iter, tol=0) Z_hat_sca = sca.fit_transform(X) Y_hat_sca = sca.components_.T pve = proportion_variance_explained(X, Y_hat_sca) rows.append({ "replicate": i, "k": k, "pve": pve, "method": "SCA", "n_nonzero": np.count_nonzero(Y_hat_sca), }) Z_hat_r, Y_hat_r, outs = sca_R( X,
# 500, # int(np.sqrt(X_train.shape[1]) * n_components), np.inf, ] gammas = [float(g) for g in gammas] models_by_gamma = {} Xs_by_gamma = {} for i, gamma in enumerate(gammas): print(f"Gamma = {gamma}...") if gamma == np.inf: _max_iter = 0 else: _max_iter = max_iter currtime = time.time() sca = SparseComponentAnalysis(n_components=n_components, max_iter=_max_iter, gamma=gamma, verbose=10) X_sca = sca.fit_transform(X_train) print(f"{time.time() - currtime:.3f} elapsed.") models_by_gamma[gamma] = sca Xs_by_gamma[gamma] = X_sca print() #%% rows = [] for gamma, model in models_by_gamma.items(): explained_variance_ratio = model.explained_variance_ratio_ for k, ev in enumerate(explained_variance_ratio): n_nonzero = np.count_nonzero(model.components_[:k + 1]) rows.append({