Example #1
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    trdata = None
    tedata = None
    tfids = load_tfids(args)

    for tfid in tfids:
        if "calib" in args.steps:
            print "-------------- calib:", tfid, "--------------"
            trdata = load_traindata(tfid, args)
            util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib)

        if "train" in args.steps:
            print "-------------- train:", tfid, "--------------"
            trdata = load_traindata(tfid, args)
            util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial)

    if "test" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_metrics(tedata, "test", args.finaldir)

    if "report" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_featuremaps(tedata, args.finaldir, args.reportdir, maxrows=100000)
        util.save_report(args.finaldir, args.reportdir, tfids)
Example #2
0
def main():
    util.enable_reversecomplement()

    args   = loadargs()
    models = loadmodels(args)
    tfgroups = load_tfgroups(args)
    util.globals.flags.push("normalize_targets", True)

    for tfgroup in tfgroups:
        trdata = None
        if len(tfgroup["ids"]) == 0:
            print "No TFs to train on microarray %s"%tfgroup["train_fold"]
            continue

        if "calib" in args.steps:
            trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True)
            util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True)

        if "train" in args.steps:
            trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True)
            util.train(models, trdata, args.calibdir, args.finaldir, nfold=1,          ntrial=args.ntrial, metric_key="pearson.r")

    for tfgroup in tfgroups:
        tedata = None

        newids = []
        for id in tfgroup["ids"]:
            if os.path.exists(args.outdir+"/final/"+id+"/model.pkl"):
                newids.append(id)
            else:
                print "WARNING: did not find model for %s, skipping" % id
        tfgroup["ids"] = newids

        if len(tfgroup["ids"]) == 0:
            print "No TFs to test on microarray %s"%tfgroup["train_fold"]
            continue

        if "test" in args.steps:
            tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False)
            save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args)

        if "report" in args.steps:
            tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False)
            util.save_featuremaps(tedata, args.finaldir, args.reportdir)

    if "report" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[])
        save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson")
        save_pbm_performance_table(args, all_tfids)

    if "chip" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[])
        save_chip_performance_table(args, all_tfids)
Example #3
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    trdata = None
    tedata = None
    tfids = load_tfids(args)

    for tfid in tfids:

        if "calib" in args.steps:
            print "-------------- calib:", tfid, "--------------"
            set_motif_lengths(args, models, tfid)
            trdata = load_traindata(tfid, args)
            util.calibrate(models,
                           trdata,
                           args.calibdir,
                           nfold=args.nfold,
                           ncalib=args.ncalib,
                           allfolds=False)

        if "train" in args.steps:
            print "-------------- train:", tfid, "--------------"
            set_motif_lengths(args, models, tfid)
            trdata = load_traindata(tfid, args)
            util.train(models,
                       trdata,
                       args.calibdir,
                       args.finaldir,
                       nfold=1,
                       ntrial=args.ntrial)

    if "test" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_metrics(tedata, "test", args.finaldir)

    if "report" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_featuremaps(tedata, args.finaldir, args.reportdir)
        util.save_report(args.finaldir, args.reportdir, tfids)
Example #4
0
def main():
    args = loadargs()
    models = loadmodels(args)
    trdata = None
    util.globals.flags.push(
        "clamp_targets", True
    )  # Clamp the top .995 percentile of target values, to avoid extremely large targets (e.g. 00038/MBNL suffers from this)
    util.globals.flags.push("normalize_targets",
                            True)  # Make the targets have unit variance

    if "calib" in args.steps:
        trdata = load_traindata(trdata, args)
        util.calibrate(models,
                       trdata,
                       args.calibdir,
                       nfold=args.nfold,
                       ncalib=args.ncalib,
                       allfolds=True)

    if "train" in args.steps:
        trdata = load_traindata(trdata, args)
        util.train(models,
                   trdata,
                   args.calibdir,
                   args.finaldir,
                   nfold=1,
                   ntrial=args.ntrial)

    if "test" in args.steps:
        save_test_predictions(args)

    if "report" in args.steps:
        save_pfms(args)
        ids, _ = get_chunktargets(args)
        ids = sorted(ids)
        util.save_report(args.finaldir,
                         args.reportdir,
                         ids,
                         index_metric="pearson",
                         rna=True)
Example #5
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    tfgroups = load_tfgroups(args)
    util.globals.flags.push("normalize_targets", True)

    for tfgroup in tfgroups:
        trdata = None
        if len(tfgroup["ids"]) == 0:
            print "No TFs to train on microarray %s" % tfgroup["train_fold"]
            continue

        if "calib" in args.steps:
            trdata = load_pbmdata(trdata,
                                  tfgroup["ids"],
                                  tfgroup["train_fold"],
                                  args,
                                  remove_probe_bias=True)
            util.calibrate(models,
                           trdata,
                           args.calibdir,
                           nfold=args.nfold,
                           ncalib=args.ncalib,
                           allfolds=True)

        if "train" in args.steps:
            trdata = load_pbmdata(trdata,
                                  tfgroup["ids"],
                                  tfgroup["train_fold"],
                                  args,
                                  remove_probe_bias=True)
            util.train(models,
                       trdata,
                       args.calibdir,
                       args.finaldir,
                       nfold=1,
                       ntrial=args.ntrial,
                       metric_key="pearson.r")

    for tfgroup in tfgroups:
        tedata = None

        newids = []
        for id in tfgroup["ids"]:
            if os.path.exists(args.outdir + "/final/" + id + "/model.pkl"):
                newids.append(id)
            else:
                print "WARNING: did not find model for %s, skipping" % id
        tfgroup["ids"] = newids

        if len(tfgroup["ids"]) == 0:
            print "No TFs to test on microarray %s" % tfgroup["train_fold"]
            continue

        if "test" in args.steps:
            tedata = load_pbmdata(tedata,
                                  tfgroup["ids"],
                                  tfgroup["test_fold"],
                                  args,
                                  remove_probe_bias=False)
            save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"],
                                  args)

        if "report" in args.steps:
            tedata = load_pbmdata(tedata,
                                  tfgroup["ids"],
                                  tfgroup["test_fold"],
                                  args,
                                  remove_probe_bias=False)
            util.save_featuremaps(tedata, args.finaldir, args.reportdir)

    if "report" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], [])
        save_report(args.finaldir,
                    args.reportdir,
                    all_tfids,
                    index_metric="pearson")
        save_pbm_performance_table(args, all_tfids)

    if "chip" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], [])
        save_chip_performance_table(args, all_tfids)