def sig_test1(shape, iters = 20): result = np.zeros(iters) for i in np.arange(iters): X = np.random.rand(shape[0], shape[1]) p = sigclust(X, verbose = False)[0] result[i] = p return result
def sig_test_1(shape=(50, 50), iters=20): """ Run sigclust on randomly generated datasets and print results. :Parameters: shape : tuple Shape of randomly generated datasets iters : int Number of iterations """ result = np.zeros(iters) for i in np.arange(iters): print("Simulating random data of shape %r..." % str(shape)) X = np.random.rand(shape[0], shape[1]) print("Running sigclust on generated data...") p = sigclust(X)[0] result[i] = p mu = np.mean(result) sig = np.std(result) print("The set of %d p-values had\nmean: %f and\n" "standard deviation: %f\n" % (iters, mu, sig)) assert(mu < .98, "Warning: High average p-value for input matrices of random " "normal data.") assert(mu > .01, "Warning: Low average p-value for input matrices of random " "normal data.")
def sig_test_1(shape=(50, 50), iters=20): """ Run sigclust on randomly generated datasets and print results. :Parameters: shape : tuple Shape of randomly generated datasets iters : int Number of iterations """ result = np.zeros(iters) for i in np.arange(iters): print("Simulating random data of shape %r..." % str(shape)) X = np.random.rand(shape[0], shape[1]) print("Running sigclust on generated data...") p = sigclust(X)[0] result[i] = p mu = np.mean(result) sig = np.std(result) print("The set of %d p-values had\nmean: %f and\n" "standard deviation: %f\n" % (iters, mu, sig)) assert (mu < .98, "Warning: High average p-value for input matrices of random " "normal data.") assert (mu > .01, "Warning: Low average p-value for input matrices of random " "normal data.")
def RSC(file, rids=True, verbose = True, scale = False): rid_col, X = get_mat(file, rids = rids) while(True): p, clust = sigclust(X, verbose = verbose, scale = scale) print("p-value: %f" % p) s = sum(clust) n_samps = X.shape[0] print("The clusters have sizes %d, %d" % (n_samps - s, s)) in0 = input("Remove all points in smallest cluster and re-run sigclust? (Enter 'n' to terminate.):") if in0 is 'n': break sec_small = s < (n_samps / 2) print("Removing %s cluster (of size %d)." % ("SECOND" if sec_small else "FIRST", s if sec_small else n_samps - s)) f_clust = clust.astype(bool) if sec_small: to_remove = np.where(f_clust)[0] else: to_remove = np.where(~f_clust)[0] print("Now removing samples with the following indices:") print(to_remove) print("These samples correspond to the following rev ids:") rem_rids = rid_col[to_remove] print(rem_rids) X = np.delete(X, to_remove, axis = 0)
subpop_ind = popul_ind[subpop_bool] # Sizes of two populations BIG = ids.shape[0] SMALL = subpop_ind.shape[0] # The incoming data restricted # to the subpopulation ids_sub = ids[subpop_bool] features_sub = features[subpop_bool, :] labels_sub = labels[subpop_bool] # First we cluster the whole population print("Computing p-value for population.") p, clust = sigclust(features, mc_iters=ITERS_B) print("p-value for population: %f" % p) clust0_bool = clust == 0 clust0_ind = popul_ind[clust0_bool] clust1_bool = ~clust0_bool clust1_ind = popul_ind[clust1_bool] ss_of_clust0_bool = clust0_bool & subpop_bool ss_of_clust1_bool = clust1_bool & subpop_bool """ The clusters of the whole population determine two subsets (their respective intersections with the subpopulation)"""
subpop_ind = popul_ind[subpop_bool] #Sizes of two populations BIG = ids.shape[0] SMALL = subpop_ind.shape[0] # The incoming data restricted # to the subpopulation ids_sub = ids[subpop_bool] features_sub = features[subpop_bool,:] labels_sub = labels[subpop_bool] #First we cluster the whole population print("Computing p-value for population.") p, clust = sigclust(features, mc_iters = ITERS_B) print("p-value for population: %f" % p) clust0_bool = clust == 0 clust0_ind = popul_ind[clust0_bool] clust1_bool = ~clust0_bool clust1_ind = popul_ind[clust1_bool] """ The clusters of the whole population determine two subsets (their respective intersections with the subpopulation)""" ss_of_clust0_bool = clust0_bool & subpop_bool