Esempio n. 1
0
def main():
    cdir = sys.argv[1]  # program dir (with ec-list.txt)

    kdir = sys.argv[2]  # kegg dir

    ddir = sys.argv[3]  # data dir
    param_accept = float(sys.argv[4])
    param_reject = float(sys.argv[5])

    species = None
    if len(sys.argv) > 6:
        species = sys.argv[6].split(",")

    if len(sys.argv) > 7 and sys.argv[7] == "yes":
        odir_param_labeled = True
    else:
        odir_param_labeled = False

    if len(sys.argv) > 8 and sys.argv[8] == "yes":
        remove_partial = True
    else:
        remove_partial = False

    print "Reading EC list..."
    ec2r, r2ec = common.read_ec_list(open("%s/%s" %
                                          (cdir, common.FILE_EC_MAP)))

    do_reconstruction(cdir, kdir, ddir, r2ec, ec2r, param_accept, param_reject,
                      species, odir_param_labeled, remove_partial)
Esempio n. 2
0
def main():
    cdir = sys.argv[1]  # program dir (with ec-list.txt)

    kdir = sys.argv[2]  # kegg dir

    ddir = sys.argv[3]  # data dir
    param_accept = float(sys.argv[4])
    param_reject = float(sys.argv[5])

    species = None
    if len(sys.argv) > 6:
        species = sys.argv[6].split(",")
    
    if len(sys.argv) > 7 and sys.argv[7] == "yes":
        odir_param_labeled = True
    else:
        odir_param_labeled = False

    if len(sys.argv) > 8 and sys.argv[8] == "yes":
        remove_partial = True
    else:
        remove_partial = False

    print "Reading EC list..."
    ec2r, r2ec = common.read_ec_list(open("%s/%s" % (cdir, common.FILE_EC_MAP)))

    do_reconstruction(cdir, kdir, ddir, r2ec, ec2r, param_accept, param_reject, species, odir_param_labeled, remove_partial)
def main(inputdir, ecfile):

    ec2r, r2ec = common.read_ec_list(open(ecfile))

    reactions = common.read_stoichiometry(open(STOICHIOMETRY)).reactions
    reco = common.read_reconstruction(open("%s/%s" % (inputdir, common.NETWORK_REACTION_FILE)))
    cofactors = common.read_set(open(COFACTORS))
    o = open("%s/%s" % (inputdir, common.FILE_EC_GRAPH), "w")

    R = set()
    for r in reco:
        baser = r.split("_")[0].replace("#","_")
        #baser = r.split("_")[0]
        R.add(baser)

    E = {}
    for r in R:
        if r in reactions:
            re = reactions[r]
            if r not in E:
                E[r] = {}
            for m in re.substrates:
                if m not in E:
                    E[m] = {}
                E[m][r] = 1
                E[r][m] = 1
            for m in re.products:
                if m not in E:
                    E[m] = {}
                E[r][m] = 1
                E[m][r] = 1
   
    ecg = {}
    for r in E:
        if r not in r2ec:
            continue
        ec1 = r2ec[r]
        for m in E[r]:
            if m in E:
                for r2 in E[m]:
                    if r == r2:
                        continue
                    if r2 not in r2ec:
                        continue
                    ec2 = r2ec[r2]
                    for e1 in ec1:
                        if e1 not in ecg:
                            ecg[e1] = {}
                        for e2 in ec2:
                            if e2 not in ecg[e1]:
                                ecg[e1][e2] = set()
                            ecg[e1][e2].add(m)

    o.write("#Output of \"%s\" on %s\n" % (" ".join(sys.argv), datetime.datetime.now()))
    cofs = list(cofactors)
    cofs.sort()
    o.write("#Cofactors: %s\n" % (",".join(cofs)))
    o.write("#EC1 EC2 SharedMetabolites AllCofactors\n")
    k1 = ecg.keys()
    k1.sort()
    for ec1 in k1:
        k2 = ecg[ec1].keys()
        k2.sort()
        for ec2 in k2:
            cofactor = 1
            for m in ecg[ec1][ec2]:
                if m not in cofactors:
                    cofactor = 0
            if cofactor and REMOVE_COFACTORS:
                continue
            o.write("%s\t%s\t%s\t%s\n" % (ec1, ec2, ",".join(ecg[ec1][ec2]), cofactor))
Esempio n. 4
0
def main(rdir, dir_prefix, ref_model_fn, ofile, remove_partial):
    ref_model_f = open(ref_model_fn)

    of = open(ofile, "w")

    ref_ecs = set()
    for s in ref_model_f:
        ec = s.strip()
        if remove_partial and "-" in ec:
            continue
        ref_ecs.add(ec)

    all_ecs = set()
    f = open(common.FILE_EC_MAP)
    ec2r, r2ec = common.read_ec_list(f)
    all_ecs = set(ec2r.keys())
    if remove_partial:
        all_ecs = map(lambda x: "-" not in x, all_ecs)

    fns = os.listdir(rdir)
    results = {}
    values = []
    for fna in fns:
        if fna.startswith(dir_prefix):
            sys.stdout.write("Processing %s\n" % (fna))
            params = fna[len(dir_prefix):]
            if PLOT_PARAM == PARAM_ACCEPT:
                param = float(params.split("-")[0])  # accept param
            elif PLOT_PARAM == PARAM_REJECT:
                param = float(params.split("-")[1])  # reject param
            else:
                print "Unknown parameter", PLOT_PARAM
                assert (0)

            #print fna, param
            try:
                f = open("%s/%s/%s" % (rdir, fna, common.NETWORK_EC_FILE))
                #f = open("%s/%s/network.ecs.filtered" % (rdir, fna))
            except:
                sys.stderr.write("Unable to open %s\n" % (fna))
                continue
            res_ecs = set()
            for s in f:
                if s.startswith("#"):
                    continue
                ec = s.strip().split()[0]
                if ec == "?":
                    continue
                if remove_partial and "-" in ec:
                    continue
                res_ecs.add(ec)

            tp = len(ref_ecs.intersection(res_ecs)) + 1
            fp = len(res_ecs.difference(ref_ecs)) + 1
            fn = len(ref_ecs.difference(res_ecs)) + 1
            tn = len(all_ecs) - tp - fp - fn + 3

            try:
                tpr = 1.0 * tp / (tp + fn)
                fpn = 1.0 * fp / (fp + tn)

                prec = 1.0 * tp / (tp + fp)
                recall = 1.0 * tp / (tp + fn)

                f1 = 2 * prec * recall / (prec + recall)

                results[param] = (tp, fp, fn, tn, tpr, fpn, f1)

                #values.append((param, tp, fp, fn, tn, tpr, fpn, f1))
                #print fpn, tpr
            except:
                print "Cannot process %s" % (fna)
                raise

    pfpr = ptpr = 1.0
    auc = 0.0

    keys = results.keys()
    keys.sort()
    of.write("#Param TP FP FN TN TPR FPN F1 AUC\n")
    for param in keys:
        tp, fp, fn, tn, tpr, fpr, f1 = results[param]

        auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2
        print fpr, tpr, auc, f1

        of.write("%.10f\t%d\t%d\t%d\t%d\t%f\t%f\t%s\t%f\n" %
                 (param, tp, fp, fn, tn, tpr, fpr, f1, auc))
        pfpr = fpr
        ptpr = tpr

    tpr = 0.0
    fpr = 0.0
    auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2
    f1 = 0.0
    of.write("NA\tNA\tNA\tNA\tNA\t%f\t%f\t%f\t%f\n" % (tpr, fpr, f1, auc))
Esempio n. 5
0
def main(inputdir):
    ec2r, r2ec = common.read_ec_list(open(common.FILE_EC_MAP))
    reactions = common.read_stoichiometry(open(STOICHIOMETRY)).reactions
    reco = common.read_reconstruction(open("%s/%s" % (inputdir, common.NETWORK_REACTION_FILE)))
    cofactors = common.read_set(open(COFACTORS))
    o = open("%s/%s" % (inputdir, common.FILE_EC_GRAPH), "w")

    R = set()
    for r in reco:
        baser = r.split("_")[0]
        R.add(baser)

    E = {}
    for r in R:
        if r in reactions:
            re = reactions[r]
            if r not in E:
                E[r] = {}
            for m in re.substrates:
                if m not in E:
                    E[m] = {}
                E[m][r] = 1
                E[r][m] = 1
            for m in re.products:
                if m not in E:
                    E[m] = {}
                E[r][m] = 1
                E[m][r] = 1
   
    ecg = {}
    for r in E:
        if r not in r2ec:
            continue
        ec1 = r2ec[r]
        for m in E[r]:
            if m in E:
                for r2 in E[m]:
                    if r == r2:
                        continue
                    if r2 not in r2ec:
                        continue
                    ec2 = r2ec[r2]
                    for e1 in ec1:
                        if e1 not in ecg:
                            ecg[e1] = {}
                        for e2 in ec2:
                            if e2 not in ecg[e1]:
                                ecg[e1][e2] = set()
                            ecg[e1][e2].add(m)

    o.write("#Output of \"%s\" on %s\n" % (" ".join(sys.argv), datetime.datetime.now()))
    cofs = list(cofactors)
    cofs.sort()
    o.write("#Cofactors: %s\n" % (",".join(cofs)))
    o.write("#EC1 EC2 SharedMetabolites AllCofactors\n")
    k1 = ecg.keys()
    k1.sort()
    for ec1 in k1:
        k2 = ecg[ec1].keys()
        k2.sort()
        for ec2 in k2:
            cofactor = 1
            for m in ecg[ec1][ec2]:
                if m not in cofactors:
                    cofactor = 0
            if cofactor and REMOVE_COFACTORS:
                continue
            o.write("%s\t%s\t%s\t%s\n" % (ec1, ec2, ",".join(ecg[ec1][ec2]), cofactor))
Esempio n. 6
0
#!/usr/bin/env python

import sys

import common

kdir = sys.argv[1]  # kegg dir
ofn = sys.argv[2]   # output file
o = open(ofn, "w")

ec2r, r2ec = common.read_ec_list(open(common.FILE_EC_MAP))

target_species = sys.argv[3].split(",")  # comma-separated list of species or "-" for all

f = open("%s/enzyme" % (kdir))
geneson = 0
ec2gene = {}
for s in f:
    if s[0] != " ":
        geneson = 0
    if geneson == 0:
        if s.startswith("ENTRY"):
            ec = s.strip().split()[2]
            ec2gene[ec] = {}
        elif s.startswith("GENES"):
            geneson = 1
            species = s[12:15]
            if target_species == "-" or species in target_species:
                ec2gene[ec][species] = set(s[16:].strip().split())
        elif s.startswith("///"):
            #print ec
def main(rdir, dir_prefix, ref_model_fn, ofile, remove_partial):
    ref_model_f = open(ref_model_fn)

    of = open(ofile, "w")

    ref_ecs = set()
    for s in ref_model_f:
        ec = s.strip()
        if remove_partial and "-" in ec:
            continue
        ref_ecs.add(ec)

    all_ecs = set()
    f = open(common.FILE_EC_MAP)
    ec2r, r2ec = common.read_ec_list(f)
    all_ecs = set(ec2r.keys())
    if remove_partial:
        all_ecs = map(lambda x: "-" not in x, all_ecs)

    fns = os.listdir(rdir)
    results = {}
    values = []
    for fna in fns:
        if fna.startswith(dir_prefix):
            sys.stdout.write("Processing %s\n" % (fna))
            params = fna[len(dir_prefix):]
            if PLOT_PARAM == PARAM_ACCEPT:
                param = float(params.split("-")[0])  # accept param
            elif PLOT_PARAM == PARAM_REJECT:
                param = float(params.split("-")[1])  # reject param
            else:
                print "Unknown parameter", PLOT_PARAM
                assert(0)

            #print fna, param
            try:
                f = open("%s/%s/%s" % (rdir, fna, common.NETWORK_EC_FILE))
                #f = open("%s/%s/network.ecs.filtered" % (rdir, fna))
            except:
                sys.stderr.write("Unable to open %s\n" % (fna))
                continue
            res_ecs = set()
            for s in f:
                if s.startswith("#"):
                    continue
                ec = s.strip().split()[0]
                if ec == "?":
                    continue
                if remove_partial and "-" in ec:
                    continue
                res_ecs.add(ec)

            tp = len(ref_ecs.intersection(res_ecs)) + 1
            fp = len(res_ecs.difference(ref_ecs)) + 1
            fn = len(ref_ecs.difference(res_ecs)) + 1
            tn = len(all_ecs) - tp - fp - fn + 3

            try:
                tpr = 1.0 * tp / (tp + fn)
                fpn = 1.0 * fp / (fp + tn)

                prec = 1.0 * tp / (tp + fp)
                recall = 1.0 * tp / (tp + fn)

                f1 = 2 * prec * recall / (prec + recall)

                results[param] = (tp, fp, fn, tn, tpr, fpn, f1)

                #values.append((param, tp, fp, fn, tn, tpr, fpn, f1))
                #print fpn, tpr
            except:
                print "Cannot process %s" % (fna)
                raise




    pfpr = ptpr = 1.0
    auc = 0.0

    keys = results.keys()
    keys.sort()
    of.write("#Param TP FP FN TN TPR FPN F1 AUC\n")
    for param in keys:
        tp, fp, fn, tn, tpr, fpr, f1 = results[param]

        auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2
        print fpr, tpr, auc, f1

        of.write("%.10f\t%d\t%d\t%d\t%d\t%f\t%f\t%s\t%f\n" % (param, tp, fp, fn, tn, tpr, fpr, f1, auc))
        pfpr = fpr
        ptpr = tpr

    tpr = 0.0
    fpr = 0.0
    auc += (pfpr - fpr) * tpr + (pfpr - fpr) * (ptpr - tpr) / 2
    f1 = 0.0
    of.write("NA\tNA\tNA\tNA\tNA\t%f\t%f\t%f\t%f\n" % (tpr, fpr, f1, auc))