コード例 #1
0
ファイル: read.py プロジェクト: halfak/sigclust
def sig_test1(shape, iters = 20):
    result = np.zeros(iters)
    for i in np.arange(iters):
        X = np.random.rand(shape[0], shape[1])
        p = sigclust(X, verbose = False)[0]
        result[i] = p
    return result
コード例 #2
0
ファイル: sigclust_tests.py プロジェクト: aetilley/sigclust
def sig_test_1(shape=(50, 50), iters=20):
    """
    Run sigclust on randomly generated datasets and print results.

    :Parameters:
        shape : tuple
            Shape of randomly generated datasets
        iters : int
            Number of iterations
    """
    result = np.zeros(iters)
    for i in np.arange(iters):
        print("Simulating random data of shape %r..." % str(shape))
        X = np.random.rand(shape[0], shape[1])
        print("Running sigclust on generated data...")
        p = sigclust(X)[0]
        result[i] = p
        mu = np.mean(result)
        sig = np.std(result)
    print("The set of %d p-values had\nmean: %f and\n"
          "standard deviation: %f\n" %
          (iters, mu, sig))

    assert(mu < .98,
           "Warning: High average p-value for input matrices of random "
           "normal data.")
    assert(mu > .01,
           "Warning: Low average p-value for input matrices of random "
           "normal data.")
コード例 #3
0
ファイル: sigclust_tests.py プロジェクト: aetilley/sigclust
def sig_test_1(shape=(50, 50), iters=20):
    """
    Run sigclust on randomly generated datasets and print results.

    :Parameters:
        shape : tuple
            Shape of randomly generated datasets
        iters : int
            Number of iterations
    """
    result = np.zeros(iters)
    for i in np.arange(iters):
        print("Simulating random data of shape %r..." % str(shape))
        X = np.random.rand(shape[0], shape[1])
        print("Running sigclust on generated data...")
        p = sigclust(X)[0]
        result[i] = p
        mu = np.mean(result)
        sig = np.std(result)
    print("The set of %d p-values had\nmean: %f and\n"
          "standard deviation: %f\n" % (iters, mu, sig))

    assert (mu < .98,
            "Warning: High average p-value for input matrices of random "
            "normal data.")
    assert (mu > .01,
            "Warning: Low average p-value for input matrices of random "
            "normal data.")
コード例 #4
0
ファイル: read.py プロジェクト: halfak/sigclust
def RSC(file, rids=True, verbose = True, scale = False):
    rid_col, X = get_mat(file, rids = rids)
    while(True):
        p, clust = sigclust(X, verbose = verbose, scale = scale)
        print("p-value: %f" % p)

        s = sum(clust)
        n_samps = X.shape[0]
        print("The clusters have sizes %d, %d" %
              (n_samps - s, s))
        in0 = input("Remove all points in smallest cluster and re-run sigclust?  (Enter 'n' to terminate.):")
        
        if in0 is 'n':
            break

        
        sec_small = s < (n_samps / 2)
        print("Removing %s cluster (of size %d)." %
              ("SECOND" if sec_small else "FIRST",
               s if sec_small else n_samps - s))
        
               

        f_clust = clust.astype(bool)
        if sec_small:
            to_remove = np.where(f_clust)[0]
        else:
            to_remove = np.where(~f_clust)[0]
        print("Now removing samples with the following indices:")
        print(to_remove)
        print("These samples correspond to the following rev ids:")
        rem_rids = rid_col[to_remove]
        
        print(rem_rids)
    
        X = np.delete(X, to_remove, axis = 0)
コード例 #5
0
subpop_ind = popul_ind[subpop_bool]

# Sizes of two populations
BIG = ids.shape[0]
SMALL = subpop_ind.shape[0]

# The incoming data restricted
# to the subpopulation
ids_sub = ids[subpop_bool]
features_sub = features[subpop_bool, :]
labels_sub = labels[subpop_bool]

# First we cluster the whole population
print("Computing p-value for population.")

p, clust = sigclust(features, mc_iters=ITERS_B)

print("p-value for population:  %f" % p)
clust0_bool = clust == 0
clust0_ind = popul_ind[clust0_bool]

clust1_bool = ~clust0_bool
clust1_ind = popul_ind[clust1_bool]


ss_of_clust0_bool = clust0_bool & subpop_bool
ss_of_clust1_bool = clust1_bool & subpop_bool
"""
The clusters of the whole population
 determine two subsets (their respective
intersections with the subpopulation)"""
コード例 #6
0
ファイル: similarity.py プロジェクト: halfak/sigclust
subpop_ind = popul_ind[subpop_bool]

#Sizes of two populations
BIG = ids.shape[0]
SMALL = subpop_ind.shape[0]

# The incoming data restricted
# to the subpopulation
ids_sub = ids[subpop_bool]
features_sub = features[subpop_bool,:]
labels_sub = labels[subpop_bool]

#First we cluster the whole population
print("Computing p-value for population.")

p, clust = sigclust(features,
                    mc_iters = ITERS_B)

print("p-value for population:  %f" % p)
clust0_bool = clust == 0
clust0_ind = popul_ind[clust0_bool]

clust1_bool = ~clust0_bool
clust1_ind = popul_ind[clust1_bool]



"""
The clusters of the whole population
 determine two subsets (their respective 
intersections with the subpopulation)"""
ss_of_clust0_bool = clust0_bool & subpop_bool