def main(): cdir = sys.argv[1] # program dir (with ec-list.txt) kdir = sys.argv[2] # kegg dir ddir = sys.argv[3] # data dir param_accept = float(sys.argv[4]) param_reject = float(sys.argv[5]) species = None if len(sys.argv) > 6: species = sys.argv[6].split(",") if len(sys.argv) > 7 and sys.argv[7] == "yes": odir_param_labeled = True else: odir_param_labeled = False if len(sys.argv) > 8 and sys.argv[8] == "yes": remove_partial = True else: remove_partial = False print "Reading EC list..." ec2r, r2ec = common.read_ec_list(open("%s/%s" % (cdir, common.FILE_EC_MAP))) do_reconstruction(cdir, kdir, ddir, r2ec, ec2r, param_accept, param_reject, species, odir_param_labeled, remove_partial)
def main(inputdir, ecfile): ec2r, r2ec = common.read_ec_list(open(ecfile)) reactions = common.read_stoichiometry(open(STOICHIOMETRY)).reactions reco = common.read_reconstruction(open("%s/%s" % (inputdir, common.NETWORK_REACTION_FILE))) cofactors = common.read_set(open(COFACTORS)) o = open("%s/%s" % (inputdir, common.FILE_EC_GRAPH), "w") R = set() for r in reco: baser = r.split("_")[0].replace("#","_") #baser = r.split("_")[0] R.add(baser) E = {} for r in R: if r in reactions: re = reactions[r] if r not in E: E[r] = {} for m in re.substrates: if m not in E: E[m] = {} E[m][r] = 1 E[r][m] = 1 for m in re.products: if m not in E: E[m] = {} E[r][m] = 1 E[m][r] = 1 ecg = {} for r in E: if r not in r2ec: continue ec1 = r2ec[r] for m in E[r]: if m in E: for r2 in E[m]: if r == r2: continue if r2 not in r2ec: continue ec2 = r2ec[r2] for e1 in ec1: if e1 not in ecg: ecg[e1] = {} for e2 in ec2: if e2 not in ecg[e1]: ecg[e1][e2] = set() ecg[e1][e2].add(m) o.write("#Output of \"%s\" on %s\n" % (" ".join(sys.argv), datetime.datetime.now())) cofs = list(cofactors) cofs.sort() o.write("#Cofactors: %s\n" % (",".join(cofs))) o.write("#EC1 EC2 SharedMetabolites AllCofactors\n") k1 = ecg.keys() k1.sort() for ec1 in k1: k2 = ecg[ec1].keys() k2.sort() for ec2 in k2: cofactor = 1 for m in ecg[ec1][ec2]: if m not in cofactors: cofactor = 0 if cofactor and REMOVE_COFACTORS: continue o.write("%s\t%s\t%s\t%s\n" % (ec1, ec2, ",".join(ecg[ec1][ec2]), cofactor))
def main(rdir, dir_prefix, ref_model_fn, ofile, remove_partial): ref_model_f = open(ref_model_fn) of = open(ofile, "w") ref_ecs = set() for s in ref_model_f: ec = s.strip() if remove_partial and "-" in ec: continue ref_ecs.add(ec) all_ecs = set() f = open(common.FILE_EC_MAP) ec2r, r2ec = common.read_ec_list(f) all_ecs = set(ec2r.keys()) if remove_partial: all_ecs = map(lambda x: "-" not in x, all_ecs) fns = os.listdir(rdir) results = {} values = [] for fna in fns: if fna.startswith(dir_prefix): sys.stdout.write("Processing %s\n" % (fna)) params = fna[len(dir_prefix):] if PLOT_PARAM == PARAM_ACCEPT: param = float(params.split("-")[0]) # accept param elif PLOT_PARAM == PARAM_REJECT: param = float(params.split("-")[1]) # reject param else: print "Unknown parameter", PLOT_PARAM assert (0) #print fna, param try: f = open("%s/%s/%s" % (rdir, fna, common.NETWORK_EC_FILE)) #f = open("%s/%s/network.ecs.filtered" % (rdir, fna)) except: sys.stderr.write("Unable to open %s\n" % (fna)) continue res_ecs = set() for s in f: if s.startswith("#"): continue ec = s.strip().split()[0] if ec == "?": continue if remove_partial and "-" in ec: continue res_ecs.add(ec) tp = len(ref_ecs.intersection(res_ecs)) + 1 fp = len(res_ecs.difference(ref_ecs)) + 1 fn = len(ref_ecs.difference(res_ecs)) + 1 tn = len(all_ecs) - tp - fp - fn + 3 try: tpr = 1.0 * tp / (tp + fn) fpn = 1.0 * fp / (fp + tn) prec = 1.0 * tp / (tp + fp) recall = 1.0 * tp / (tp + fn) f1 = 2 * prec * recall / (prec + recall) results[param] = (tp, fp, fn, tn, tpr, fpn, f1) #values.append((param, tp, fp, fn, tn, tpr, fpn, f1)) #print fpn, tpr except: print "Cannot process %s" % (fna) raise pfpr = ptpr = 1.0 auc = 0.0 keys = results.keys() keys.sort() of.write("#Param TP FP FN TN TPR FPN F1 AUC\n") for param in keys: tp, fp, fn, tn, tpr, fpr, f1 = results[param] auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2 print fpr, tpr, auc, f1 of.write("%.10f\t%d\t%d\t%d\t%d\t%f\t%f\t%s\t%f\n" % (param, tp, fp, fn, tn, tpr, fpr, f1, auc)) pfpr = fpr ptpr = tpr tpr = 0.0 fpr = 0.0 auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2 f1 = 0.0 of.write("NA\tNA\tNA\tNA\tNA\t%f\t%f\t%f\t%f\n" % (tpr, fpr, f1, auc))
def main(inputdir): ec2r, r2ec = common.read_ec_list(open(common.FILE_EC_MAP)) reactions = common.read_stoichiometry(open(STOICHIOMETRY)).reactions reco = common.read_reconstruction(open("%s/%s" % (inputdir, common.NETWORK_REACTION_FILE))) cofactors = common.read_set(open(COFACTORS)) o = open("%s/%s" % (inputdir, common.FILE_EC_GRAPH), "w") R = set() for r in reco: baser = r.split("_")[0] R.add(baser) E = {} for r in R: if r in reactions: re = reactions[r] if r not in E: E[r] = {} for m in re.substrates: if m not in E: E[m] = {} E[m][r] = 1 E[r][m] = 1 for m in re.products: if m not in E: E[m] = {} E[r][m] = 1 E[m][r] = 1 ecg = {} for r in E: if r not in r2ec: continue ec1 = r2ec[r] for m in E[r]: if m in E: for r2 in E[m]: if r == r2: continue if r2 not in r2ec: continue ec2 = r2ec[r2] for e1 in ec1: if e1 not in ecg: ecg[e1] = {} for e2 in ec2: if e2 not in ecg[e1]: ecg[e1][e2] = set() ecg[e1][e2].add(m) o.write("#Output of \"%s\" on %s\n" % (" ".join(sys.argv), datetime.datetime.now())) cofs = list(cofactors) cofs.sort() o.write("#Cofactors: %s\n" % (",".join(cofs))) o.write("#EC1 EC2 SharedMetabolites AllCofactors\n") k1 = ecg.keys() k1.sort() for ec1 in k1: k2 = ecg[ec1].keys() k2.sort() for ec2 in k2: cofactor = 1 for m in ecg[ec1][ec2]: if m not in cofactors: cofactor = 0 if cofactor and REMOVE_COFACTORS: continue o.write("%s\t%s\t%s\t%s\n" % (ec1, ec2, ",".join(ecg[ec1][ec2]), cofactor))
#!/usr/bin/env python import sys import common kdir = sys.argv[1] # kegg dir ofn = sys.argv[2] # output file o = open(ofn, "w") ec2r, r2ec = common.read_ec_list(open(common.FILE_EC_MAP)) target_species = sys.argv[3].split(",") # comma-separated list of species or "-" for all f = open("%s/enzyme" % (kdir)) geneson = 0 ec2gene = {} for s in f: if s[0] != " ": geneson = 0 if geneson == 0: if s.startswith("ENTRY"): ec = s.strip().split()[2] ec2gene[ec] = {} elif s.startswith("GENES"): geneson = 1 species = s[12:15] if target_species == "-" or species in target_species: ec2gene[ec][species] = set(s[16:].strip().split()) elif s.startswith("///"): #print ec
def main(rdir, dir_prefix, ref_model_fn, ofile, remove_partial): ref_model_f = open(ref_model_fn) of = open(ofile, "w") ref_ecs = set() for s in ref_model_f: ec = s.strip() if remove_partial and "-" in ec: continue ref_ecs.add(ec) all_ecs = set() f = open(common.FILE_EC_MAP) ec2r, r2ec = common.read_ec_list(f) all_ecs = set(ec2r.keys()) if remove_partial: all_ecs = map(lambda x: "-" not in x, all_ecs) fns = os.listdir(rdir) results = {} values = [] for fna in fns: if fna.startswith(dir_prefix): sys.stdout.write("Processing %s\n" % (fna)) params = fna[len(dir_prefix):] if PLOT_PARAM == PARAM_ACCEPT: param = float(params.split("-")[0]) # accept param elif PLOT_PARAM == PARAM_REJECT: param = float(params.split("-")[1]) # reject param else: print "Unknown parameter", PLOT_PARAM assert(0) #print fna, param try: f = open("%s/%s/%s" % (rdir, fna, common.NETWORK_EC_FILE)) #f = open("%s/%s/network.ecs.filtered" % (rdir, fna)) except: sys.stderr.write("Unable to open %s\n" % (fna)) continue res_ecs = set() for s in f: if s.startswith("#"): continue ec = s.strip().split()[0] if ec == "?": continue if remove_partial and "-" in ec: continue res_ecs.add(ec) tp = len(ref_ecs.intersection(res_ecs)) + 1 fp = len(res_ecs.difference(ref_ecs)) + 1 fn = len(ref_ecs.difference(res_ecs)) + 1 tn = len(all_ecs) - tp - fp - fn + 3 try: tpr = 1.0 * tp / (tp + fn) fpn = 1.0 * fp / (fp + tn) prec = 1.0 * tp / (tp + fp) recall = 1.0 * tp / (tp + fn) f1 = 2 * prec * recall / (prec + recall) results[param] = (tp, fp, fn, tn, tpr, fpn, f1) #values.append((param, tp, fp, fn, tn, tpr, fpn, f1)) #print fpn, tpr except: print "Cannot process %s" % (fna) raise pfpr = ptpr = 1.0 auc = 0.0 keys = results.keys() keys.sort() of.write("#Param TP FP FN TN TPR FPN F1 AUC\n") for param in keys: tp, fp, fn, tn, tpr, fpr, f1 = results[param] auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2 print fpr, tpr, auc, f1 of.write("%.10f\t%d\t%d\t%d\t%d\t%f\t%f\t%s\t%f\n" % (param, tp, fp, fn, tn, tpr, fpr, f1, auc)) pfpr = fpr ptpr = tpr tpr = 0.0 fpr = 0.0 auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2 f1 = 0.0 of.write("NA\tNA\tNA\tNA\tNA\t%f\t%f\t%f\t%f\n" % (tpr, fpr, f1, auc))