Esempio n. 1
0
def clustermodel(fcovs, fmeth, model,
                 # clustering args
                 max_dist=200, linkage='complete', rho_min=0.32,
                 min_clust_size=1,
                 merge_linkage=None,
                 max_merge_dist=0,
                 counts=False,
                 sep="\t",
                 X=None, X_locs=None, X_dist=None,
                 weights=None,
                 outlier_sds=None,
                 combine=False, bumping=False, betareg=False,
                 gee_args=(), skat=False,
                 png_path=None):
    # an iterable of feature objects
    # from here, weights are attached to the feature.
    feature_iter = feature_gen(fmeth, rho_min=rho_min, weights=weights)
    assert min_clust_size >= 1

    cluster_gen = (c for c in mclust(feature_iter,
                                     max_dist=max_dist,
                                     linkage=linkage,
                                     merge_linkage=merge_linkage,
                                     max_merge_dist=max_merge_dist
                                     )
                    if len(c) >= min_clust_size)
    for res in clustermodelgen(fcovs, cluster_gen, model, sep=sep,
            X=X, X_locs=X_locs, X_dist=X_dist,
            outlier_sds=outlier_sds,
            combine=combine, bumping=bumping, betareg=betareg,
            gee_args=gee_args, skat=skat, counts=counts, png_path=png_path):
        yield res
Esempio n. 2
0
methylation_file = '../../../crystal/tests/meth.txt.gz'
formula = 'methylation ~ age + gender'
coef = 'gender'

covs = pd.read_csv(covariates_file)


def feature_gen(fname):
    for i, d in enumerate(ts.reader(fname, header=False)):
        if i == 0: continue
        chrom, pos = d[0].split(":")
        yield crystal.Feature(chrom, int(pos),
                              crystal.logit(np.array(map(float, d[1:]))))


cluster_iter = mclust(feature_gen(methylation_file), max_dist=100, max_skip=0)

fmt = "{chrom}\t{start}\t{end}\t{p:.4g}\t{coef:.3f}\t{n_sites:d}"
print(ts.fmt2header(fmt))
for i, c in enumerate(
        crystal.model_clusters(cluster_iter,
                               covs,
                               formula,
                               coef,
                               model_fn=crystal.zscore_cluster,
                               n_cpu=1)):
    print(fmt.format(**c))
    if c['p'] < 1e-3 and abs(c['coef']) > 0.2 and c['n_sites'] > 3:
        crystal.plot.spaghetti_plot(c, covs)
        plt.savefig('/tmp/figure-1.eps')
        break
Esempio n. 3
0
    return r


if __name__ == "__main__":

    def feature_gen(fname):
        for i, toks in enumerate(ts.reader(fname, header=False)):
            if i == 0:
                continue
            chrom, pos = toks[0].split(":")
            yield Feature(chrom, int(pos), map(float, toks[1:]))

    # fmt = "{chrom}\t{start}\t{end}\t{n_probes}\t{p:5g}\t{t:.4f}\t{coef:.4f}\t{var}"
    # print ts.fmt2header(fmt)

    clust_iter = (c for c in mclust(feature_gen(sys.argv[1]), max_dist=400, max_skip=1) if len(c) > 2)

    df = pd.read_table("meth.clin.txt")
    df["id"] = df["StudyID"]
    formula = "methylation ~ asthma + age + gender"

    np.random.seed(10)
    ntrue, nfalse = 10, 10

    results = []
    for fn in (bump_cluster, liptak_cluster, zscore_cluster):
        results.append(evaluate_method(clust_iter, df, formula, "asthma", fn, ntrue, nfalse))

    formula = "methylation ~ asthma + age + gender"
    for fn in (gee_cluster, mixed_model_cluster):
        results.append(evaluate_method(clust_iter, df, formula, "asthma", fn, ntrue, nfalse))
Esempio n. 4
0
    return r


if __name__ == "__main__":

    def feature_gen(fname):
        for i, toks in enumerate(ts.reader(fname, header=False)):
            if i == 0: continue
            chrom, pos = toks[0].split(":")
            yield Feature(chrom, int(pos), map(float, toks[1:]))

    #fmt = "{chrom}\t{start}\t{end}\t{n_probes}\t{p:5g}\t{t:.4f}\t{coef:.4f}\t{var}"
    #print ts.fmt2header(fmt)

    clust_iter = (
        c for c in mclust(feature_gen(sys.argv[1]), max_dist=400, max_skip=1)
        if len(c) > 2)

    df = pd.read_table('meth.clin.txt')
    df['id'] = df['StudyID']
    formula = "methylation ~ asthma + age + gender"

    np.random.seed(10)
    ntrue, nfalse = 10, 10

    results = []
    for fn in (bump_cluster, liptak_cluster, zscore_cluster):
        results.append(
            evaluate_method(clust_iter, df, formula, 'asthma', fn, ntrue,
                            nfalse))
Esempio n. 5
0
covariates_file = "../../../crystal/tests/covs.csv"
methylation_file = "../../../crystal/tests/meth.txt.gz"
formula = "methylation ~ age + gender"
coef = "gender"

covs = pd.read_csv(covariates_file)


def feature_gen(fname):
    for i, d in enumerate(ts.reader(fname, header=False)):
        if i == 0:
            continue
        chrom, pos = d[0].split(":")
        yield crystal.Feature(chrom, int(pos), crystal.logit(np.array(map(float, d[1:]))))


cluster_iter = mclust(feature_gen(methylation_file), max_dist=100, max_skip=0)


fmt = "{chrom}\t{start}\t{end}\t{p:.4g}\t{coef:.3f}\t{n_sites:d}"
print(ts.fmt2header(fmt))
for i, c in enumerate(
    crystal.model_clusters(cluster_iter, covs, formula, coef, model_fn=crystal.zscore_cluster, n_cpu=1)
):
    print(fmt.format(**c))
    if c["p"] < 1e-3 and abs(c["coef"]) > 0.2 and c["n_sites"] > 3:
        crystal.plot.spaghetti_plot(c, covs)
        plt.savefig("/tmp/figure-1.eps")
        break