def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) trdata = None tedata = None tfids = load_tfids(args) for tfid in tfids: if "calib" in args.steps: print "-------------- calib:", tfid, "--------------" trdata = load_traindata(tfid, args) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib) if "train" in args.steps: print "-------------- train:", tfid, "--------------" trdata = load_traindata(tfid, args) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial) if "test" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_metrics(tedata, "test", args.finaldir) if "report" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_featuremaps(tedata, args.finaldir, args.reportdir, maxrows=100000) util.save_report(args.finaldir, args.reportdir, tfids)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) tfgroups = load_tfgroups(args) util.globals.flags.push("normalize_targets", True) for tfgroup in tfgroups: trdata = None if len(tfgroup["ids"]) == 0: print "No TFs to train on microarray %s"%tfgroup["train_fold"] continue if "calib" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True) if "train" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial, metric_key="pearson.r") for tfgroup in tfgroups: tedata = None newids = [] for id in tfgroup["ids"]: if os.path.exists(args.outdir+"/final/"+id+"/model.pkl"): newids.append(id) else: print "WARNING: did not find model for %s, skipping" % id tfgroup["ids"] = newids if len(tfgroup["ids"]) == 0: print "No TFs to test on microarray %s"%tfgroup["train_fold"] continue if "test" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args) if "report" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) util.save_featuremaps(tedata, args.finaldir, args.reportdir) if "report" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[]) save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson") save_pbm_performance_table(args, all_tfids) if "chip" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[]) save_chip_performance_table(args, all_tfids)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) trdata = None tedata = None tfids = load_tfids(args) for tfid in tfids: if "calib" in args.steps: print "-------------- calib:", tfid, "--------------" set_motif_lengths(args, models, tfid) trdata = load_traindata(tfid, args) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=False) if "train" in args.steps: print "-------------- train:", tfid, "--------------" set_motif_lengths(args, models, tfid) trdata = load_traindata(tfid, args) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial) if "test" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_metrics(tedata, "test", args.finaldir) if "report" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_featuremaps(tedata, args.finaldir, args.reportdir) util.save_report(args.finaldir, args.reportdir, tfids)
def main(): args = loadargs() models = loadmodels(args) trdata = None util.globals.flags.push( "clamp_targets", True ) # Clamp the top .995 percentile of target values, to avoid extremely large targets (e.g. 00038/MBNL suffers from this) util.globals.flags.push("normalize_targets", True) # Make the targets have unit variance if "calib" in args.steps: trdata = load_traindata(trdata, args) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True) if "train" in args.steps: trdata = load_traindata(trdata, args) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial) if "test" in args.steps: save_test_predictions(args) if "report" in args.steps: save_pfms(args) ids, _ = get_chunktargets(args) ids = sorted(ids) util.save_report(args.finaldir, args.reportdir, ids, index_metric="pearson", rna=True)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) tfgroups = load_tfgroups(args) util.globals.flags.push("normalize_targets", True) for tfgroup in tfgroups: trdata = None if len(tfgroup["ids"]) == 0: print "No TFs to train on microarray %s" % tfgroup["train_fold"] continue if "calib" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True) if "train" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial, metric_key="pearson.r") for tfgroup in tfgroups: tedata = None newids = [] for id in tfgroup["ids"]: if os.path.exists(args.outdir + "/final/" + id + "/model.pkl"): newids.append(id) else: print "WARNING: did not find model for %s, skipping" % id tfgroup["ids"] = newids if len(tfgroup["ids"]) == 0: print "No TFs to test on microarray %s" % tfgroup["train_fold"] continue if "test" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args) if "report" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) util.save_featuremaps(tedata, args.finaldir, args.reportdir) if "report" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], []) save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson") save_pbm_performance_table(args, all_tfids) if "chip" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], []) save_chip_performance_table(args, all_tfids)