def per_sample_subsets(X, nsubsets, ncell_per_subset, k_init=False): nmark = X.shape[1] shape = (nsubsets, nmark, ncell_per_subset) Xres = np.zeros(shape) if not k_init: for i in range(nsubsets): X_i = random_subsample(X, ncell_per_subset) Xres[i] = X_i.T else: for i in range(nsubsets): X_i = random_subsample(X, 2000) X_i = kmeans_subsample(X_i, ncell_per_subset, random_state=i) Xres[i] = X_i.T return Xres
def per_sample_biased_subsets(X, x_ctrl, nsubsets, ncell_final, to_keep, ratio_biased): nmark = X.shape[1] Xres = np.empty((nsubsets, nmark, ncell_final)) nc_biased = int(ratio_biased * ncell_final) nc_unbiased = ncell_final - nc_biased for i in range(nsubsets): print i x_unbiased = random_subsample(X, nc_unbiased) if (i % 100) == 0: x_outlier, outlierness = outlier_subsample(X, x_ctrl, to_keep) x_biased = weighted_subsample(x_outlier, outlierness, nc_biased) Xres[i] = np.vstack([x_biased, x_unbiased]).T return Xres
def per_sample_biased_subsets(X, x_ctrl, nsubsets, ncell_final, to_keep, ratio_biased): nmark = X.shape[1] Xres = np.empty((nsubsets, nmark, ncell_final)) nc_biased = int(ratio_biased * ncell_final) nc_unbiased = ncell_final - nc_biased for i in range(nsubsets): x_unbiased = random_subsample(X, nc_unbiased) if (i % 100) == 0: x_outlier, outlierness = outlier_subsample(X, x_ctrl, to_keep) x_biased = weighted_subsample(x_outlier, outlierness, nc_biased) Xres[i] = np.vstack([x_biased, x_unbiased]).T return Xres