# that is held fixed across all paramter combinations. For a fixed # sparsity, the exact imposed sparsity profile may vary depending # on the block size of the covariance matrix. However, we use the # blocks as a seed for the shuffling that is done, so that for a fixed # sparsity and block size, all beta vectors should be identical betawidth = [0.1, np.inf, -1] beta_dict = [] for i, bw in enumerate(betawidth): beta_dict.append({ 'betawidth': bw, 'beta': gen_beta2(n_features, n_features, 1, bw, seed=betaseed) }) ##### Common parameters held fixed across all jobs ########## comm_params = { 'cov_params': cov_params[0], 'cov_type': 'interpolation', 'n_features': n_features, # n/p ratio # 'sparsity': sparsity, 'est_score': 'BIC', 'reps': 20, 'stability_selection': [1.0], 'n_boots_sel': 25, 'n_boots_est': 25, 'betadict': beta_dict,
# on the block size of the covariance matrix. However, we use the # blocks as a seed for the shuffling that is done, so that for a fixed # sparsity and block size, all beta vectors should be identical betawidth = [-1, np.inf] beta_dict = [] for i, bw in enumerate(betawidth): # NOTE THE DISTRIBUTION beta_dict.append({ 'betawidth': bw, 'beta': gen_beta2(n_features, n_features, 1, bw, seed=betaseed, distribution='normal') }) ##### Common parameters held fixed across all jobs ########## comm_params = { 'sparsity': sparsity, 'cov_type': 'interpolation', 'n_features': n_features, # n/p ratio # 'np_ratio': [4], 'est_score':
def __call__(self, task_tuple): cov_param_idx = task_tuple[0] rep = task_tuple[1] algorithm = task_tuple[2] n_features = self.n_features n_samples = self.n_samples beta = gen_beta2(n_features, n_features, sparsity=1, betawidth=-1, seed=1234) cov_param = self.cov_params[cov_param_idx] sigma = gen_covariance(n_features, cov_param['correlation'], cov_param['block_size'], cov_param['L'], cov_param['t']) beta_ = sparsify_beta(beta, cov_param['block_size'], sparsity=0.25, seed=cov_param['block_size']) # Follow the procedure of generating beta with a fixed betaseed at the getgo # and then sparsifying as one goes on. Is this reproducible subsequently? t0 = time.time() X, X_test, y, y_test, ss = gen_data(n_samples, n_features, kappa=5, covariance=sigma, beta=beta_) # Standardize X = StandardScaler().fit_transform(X) y -= np.mean(y) if algorithm == 0: lasso = LassoCV(fit_intercept=False, cv=5) lasso.fit(X, y.ravel()) beta_hat = lasso.coef_ elif algorithm == 1: uoi = UoI_Lasso(fit_intercept=False, estimation_score='r2') uoi.fit(X, y) beta_hat = uoi.coef_ elif algorithm == 2: scad = PycassoCV(penalty='scad', fit_intercept=False, nfolds=5, n_alphas=100) scad.fit(X, y) beta_hat = scad.coef_ elif algorithm == 3: mcp = PycassoCV(penalty='mcp', fit_intercept=False, nfolds=5, n_alphas=100) mcp.fit(X, y) beta_hat = mcp.coef_ self.beta.append(beta_) self.beta_hat.append(beta_hat) self.task_signature.append((cov_param_idx, rep, algorithm)) print('call successful, algorithm %d took %f seconds' % (algorithm, time.time() - t0))