def main(ddir): tree = common.read_tree_adj_list(ddir) scores, all_ecs = common.read_scores(ddir, augmented_format=True) ecscores = {} max_blast = 0 for species in scores: for ec in scores[species]: if ec not in ecscores: ecscores[ec] = {} ecscores[ec][species] = scores[species][ec] blast = scores[species][ec].bscore if blast > max_blast: max_blast = blast try: os.mkdir("%s/%s" % (ddir, common.EC_VISUALS_DIR)) except: pass keys = ecscores.keys() keys.sort() for ec in keys: #print ec s = tree_to_dot(tree, ecscores[ec], max_blast, ec) #print s outdir = "%s/%s" % (ddir, common.EC_VISUALS_DIR) outfn = "%s" % (ec) plot(s, outdir, outfn)
def main(ddir): tree = common.read_tree_adj_list(ddir) scores, all_ecs = common.read_scores(ddir, augmented_format = True) ecscores = {} max_blast = 0 for species in scores: for ec in scores[species]: if ec not in ecscores: ecscores[ec] = {} ecscores[ec][species] = scores[species][ec] blast = scores[species][ec].bscore if blast > max_blast: max_blast = blast try: os.mkdir("%s/%s" % (ddir, common.EC_VISUALS_DIR)) except: pass keys = ecscores.keys() keys.sort() for ec in keys: #print ec s = tree_to_dot(tree, ecscores[ec], max_blast, ec) #print s outdir = "%s/%s" % (ddir, common.EC_VISUALS_DIR) outfn = "%s" % (ec) plot(s, outdir, outfn)
def roc(ddir, mdir, remove_partial=FILTER_PARTIAL_ECS): print "Reading models..." models, model_ecs = common.read_models(mdir, remove_partial) print "Reading reaction scores..." scores, data_ecs = common.read_scores(ddir, True) if remove_partial: data_ecs = filter_partial_ecs(data_ecs) all_ecs = model_ecs.union(data_ecs) print "Computing ROC curves..." compute_roc_curves(models, scores, all_ecs)
def roc(ddir, mdir, remove_partial = FILTER_PARTIAL_ECS): print "Reading models..." models, model_ecs = common.read_models(mdir, remove_partial) print "Reading reaction scores..." scores, data_ecs = common.read_scores(ddir, True) if remove_partial: data_ecs = filter_partial_ecs(data_ecs) all_ecs = model_ecs.union(data_ecs) print "Computing ROC curves..." compute_roc_curves(models, scores, all_ecs)
def roc_one(ddir, modelfn, species, outdir, remove_partial=FILTER_PARTIAL_ECS): roc_dir = "%s/%s" % (ddir, outdir) try: os.mkdir(roc_dir) except: pass model = common.read_model(open(modelfn), remove_partial) scores, data_ecs = common.read_scores(ddir, True) if remove_partial: data_ecs = filter_partial_ecs(data_ecs) all_ecs = model.union(data_ecs) models = {species: model} of = open("%s/%s.full" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "pscore", 1, remove_partial) of.close() of = open("%s/%s.naive" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "npscore", 1, remove_partial) of.close() of = open("%s/%s.blast" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "bscore", None, remove_partial) of.close() of = open("%s/%s.gtg" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "gscore", 1, remove_partial) of.close() of = open("%s/%s.blasttree" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "btscore", 1, remove_partial) of.close() of = open("%s/%s.gtgtree" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "gtscore", 1, remove_partial) of.close() plot_single(ddir, species, outdir, "pdf") plot_single(ddir, species, outdir, "png") subprocess.call("rm *.Rout", shell=True)
def roc_one(ddir, modelfn, species, outdir, remove_partial = FILTER_PARTIAL_ECS): roc_dir = "%s/%s" % (ddir, outdir) try: os.mkdir(roc_dir) except: pass model = common.read_model(open(modelfn), remove_partial) scores, data_ecs = common.read_scores(ddir, True) if remove_partial: data_ecs = filter_partial_ecs(data_ecs) all_ecs = model.union(data_ecs) models = {species : model} of = open("%s/%s.full" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "pscore", 1, remove_partial) of.close() of = open("%s/%s.naive" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "npscore", 1, remove_partial) of.close() of = open("%s/%s.blast" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "bscore", None, remove_partial) of.close() of = open("%s/%s.gtg" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "gscore", 1, remove_partial) of.close() of = open("%s/%s.blasttree" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "btscore", 1, remove_partial) of.close() of = open("%s/%s.gtgtree" % (roc_dir, species), "w") roc_curves = compute_roc_curve(models, scores, all_ecs, of, species, "gtscore", 1, remove_partial) of.close() plot_single(ddir, species, outdir, "pdf") plot_single(ddir, species, outdir, "png") subprocess.call("rm *.Rout", shell = True)
def compute_reaction_scores(ddir, r2ec, ec2r, target_species, remove_partial=False): scores, all_ecs = common.read_scores(ddir, True) try: os.mkdir("%s/%s" % (ddir, common.REACTION_SCORE_DIR)) except: pass print "Computing reaction scores..." for species in scores: if target_species != None and species not in target_species: continue print " %s" % (species) sfn = "%s/%s/%s" % (ddir, common.REACTION_SCORE_DIR, species) o = open(sfn, "w") rscores = {} rec = {} for ec in scores[species]: if remove_partial and "-" in ec: print "reco-dir.py: Discarding reaction score for partial EC number:", ec continue score = scores[species][ec] if ec in ec2r: for r in ec2r[ec]: if r not in rec: rec[r] = set() if r not in rscores or score.pscore > rscores[r].pscore: rscores[r] = score rec[r].add(ec) else: o.write("?\t%s\t%s\n" % (ec, score)) rs = rscores.keys() rs.sort() for r in rs: o.write("%s\t%s\t%s\n" % (r, ",".join(rec[r]), rscores[r])) o.close() return scores.keys()
def plot_reaction_scores(ddir): scores, all_ecs = common.read_scores(ddir, augmented_format = True) keys = scores.keys() keys.sort() try: os.mkdir("%s/%s" % (ddir, common.REACTION_SCORE_PLOT_DIR)) except: pass for species in keys: sys.stdout.write("%s " % (species)) plotfn = "%s/%s/%s" %(ddir, common.REACTION_SCORE_PLOT_DIR, species) tf = tempfile.NamedTemporaryFile() o = open(tf.name, "w") for ec in scores[species]: rscore = scores[species][ec] o.write("%s\n" % (rscore)) o.flush() splot(tf.name, plotfn, "pdf") splot(tf.name, plotfn, "png") o.close()
def plot_reaction_scores(ddir): scores, all_ecs = common.read_scores(ddir, augmented_format=True) keys = scores.keys() keys.sort() try: os.mkdir("%s/%s" % (ddir, common.REACTION_SCORE_PLOT_DIR)) except: pass for species in keys: sys.stdout.write("%s " % (species)) plotfn = "%s/%s/%s" % (ddir, common.REACTION_SCORE_PLOT_DIR, species) tf = tempfile.NamedTemporaryFile() o = open(tf.name, "w") for ec in scores[species]: rscore = scores[species][ec] o.write("%s\n" % (rscore)) o.flush() splot(tf.name, plotfn, "pdf") splot(tf.name, plotfn, "png") o.close()
def compute_reaction_scores(ddir, r2ec, ec2r, target_species, remove_partial = False): scores, all_ecs = common.read_scores(ddir, True) try: os.mkdir("%s/%s" % (ddir, common.REACTION_SCORE_DIR)) except: pass print "Computing reaction scores..." for species in scores: if target_species != None and species not in target_species: continue print " %s" % (species) sfn = "%s/%s/%s" % (ddir, common.REACTION_SCORE_DIR, species) o = open(sfn, "w") rscores = {} rec = {} for ec in scores[species]: if remove_partial and "-" in ec: print "reco-dir.py: Discarding reaction score for partial EC number:", ec continue score = scores[species][ec] if ec in ec2r: for r in ec2r[ec]: if r not in rec: rec[r] = set() if r not in rscores or score.pscore > rscores[r].pscore: rscores[r] = score rec[r].add(ec) else: o.write("?\t%s\t%s\n" % (ec, score)) rs = rscores.keys() rs.sort() for r in rs: o.write("%s\t%s\t%s\n" % (r, ",".join(rec[r]), rscores[r])) o.close() return scores.keys()
#!/usr/bin/env python import sys, os, datetime import common ddir = sys.argv[1] # project dir scoredir = sys.argv[2] # dir with .ecscores o = open("%s/%s" % (ddir, common.FULL_REACTION_SCORE_FILE), "w") rscores, ecs = common.read_scores(ddir) org_abbr, org_long = common.read_organisms(ddir) fns = os.listdir(scoredir) scores = {} for fn in fns: if fn.endswith(".ecscores"): org = org_abbr[fn[:-len(".ecscores")]] scores[org] = {} f = open("%s/%s" % (scoredir, fn)) for s in f: if s.startswith("#"): continue ec, bs, bs1, bs2, gs, gs1, gs2 = s.strip().split("\t") scores[org][ec] = (bs, bs1, bs2, gs, gs1, gs2) ecs.add(ec) keys = set(rscores.keys()) #return set() keys with adding set(scores.keys()) keys.update(set(scores.keys())) keys = list(keys)