Exemple #1
0
def main(argv):
    if (len(argv) < 1):
        sys.stderr.write("Error: Number of arguments incorrect\n")
        usage()
        sys.exit()
    else:

        hidden_decoys = False
        hidden_pattern = ""
        decoy_pattern = "random"
        verbose = False
        ties = True
        label = ""

        try:
            opts, args = getopt.getopt(
                sys.argv[2:], "e:d:vhl:t",
                ["hidden=", "prefix=", "verbose", "help", "label", "ties"])
        except getopt.GetoptError, err:
            # print help information and exit:
            print str(
                err)  # will print something like "option -a not recognized"
            usage()
            sys.exit(2)

        for o, a in opts:
            if o == "-v":
                verbose = True
            elif o in ("-h", "--help"):
                usage()
                sys.exit()
            elif o in ("-e", "--hidden"):
                hidden_decoys = True
                hidden_pattern = a
            elif o in ("-d", "--prefix"):
                decoy_pattern = a
            elif o in ("-l", "--label"):
                label = a
            elif o in ("-t", "--ties"):
                ties = a
            else:
                assert False, "unhandled option"

        if (os.path.isfile(argv[0])):
            infile = argv[0]
        else:
            sys.stderr.write("Error: file " + str(argv[0]) + " not found\n")
            sys.exit()

        colors = [
            "red", "blue", "yellow", "black", "brown", "pink", "cyan",
            "darkblue", "darkred"
        ]
        hits = list()
        names = list()
        ##READING ELEMENTS##
        for line in open(infile).readlines():
            words = line.split()
            prob_file = words[0]
            names.append(words[1])
            if (verbose):
                print "reading file " + str(prob_file)
            if (os.path.isfile(prob_file)):
                hits.append(util.importer(prob_file))
            else:
                sys.stderr.write("Error: file " + str(prob_file) +
                                 " not found\n")
                sys.exit()

        pvalues = list(xrange(len(hits)))
        pvalues2 = list(xrange(len(hits)))

        for x in xrange(len(hits)):
            ##ESTIMATING PVALUES FROM PEP##
            if (hidden_decoys):
                pvalues[x] = pep2pvalue([(ele.pep, ele.protein) for ele in [
                    ele
                    for ele in hits[x] if ele.protein.find(decoy_pattern) == -1
                ]], ties)
                pvalues[x] = [
                    ele[-2] for ele in pvalues[x]
                    if ele[-1].find(hidden_pattern) != -1
                ]
                pvalues[x] = remove_zeroes(pvalues[x])
                pvalues[x] = sorted(pvalues[x], reverse=False)
                pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(hidden_pattern) != -1],\
                                               [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\
                                               decoy_pattern)
            else:
                pvalues[x] = pep2pvalue([(ele.pep, ele.protein)
                                         for ele in hits[x]], ties)
                pvalues[x] = [
                    ele[-2] for ele in pvalues[x]
                    if ele[-1].find(decoy_pattern) != -1
                ]
                pvalues[x] = remove_zeroes(pvalues[x])
                pvalues[x] = sorted(pvalues[x], reverse=False)
                pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) == -1],\
                                               [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\
                                               decoy_pattern)

        if (verbose):
            print "generatings plots"
        ##PLOTTING##
        util.plotHist(pvalues2, names, colors, "estimated $p$ value",
                      "#target " + label, label + "_pvalue_estimated.png")
        util.plot_pvalues_calibration(
            pvalues, names, colors, label + "_pvalue_calibration_from_pep.png")
        util.plot_pvalues_calibration(pvalues2, names, colors,
                                      label + "_pvalue_calibration.png")

        if (verbose):
            print "plots generated"
Exemple #2
0
def main(argv):
    if (len(argv) < 1):
        sys.stderr.write("Error: Number of arguments incorrect\n")
        usage()
        sys.exit()
    else:

        hidden_decoys = False
        hidden_pattern = ""
        decoy_pattern = "random"
        verbose = False
        ties = False
        countdecoys = True
        label = ""
        fdr = 0.01
        tdratio = 0.0
        pi0 = False
        try:
            opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:f:i:r:tc", [
                "hidden=", "prefix=", "verbose", "help", "label=", "fdr=",
                "pi0=", "tdratio=", "ties", "countdecoys"
            ])
        except getopt.GetoptError, err:
            # print help information and exit:
            print str(
                err)  # will print something like "option -a not recognized"
            usage()
            sys.exit(2)

        for o, a in opts:
            if o == "-v":
                verbose = True
            elif o in ("-h", "--help"):
                usage()
                sys.exit()
            elif o in ("-e", "--hidden"):
                hidden_decoys = True
                hidden_pattern = a
            elif o in ("-d", "--prefix"):
                decoy_pattern = a
            elif o in ("-l", "--label"):
                label = a
            elif o in ("-f", "--fdr"):
                fdr = float(a)
            elif o in ("-r", "--tdratio"):
                tdratio = float(a)
                print "Using Target Decoy Ratio = " + str(tdratio)
            elif o in ("-i", "--pi0"):
                pi0 = True
                print "Using pi0 to adjust empirical q values"
            elif o in ("-c", "--countdecoys"):
                countdecoys = True
                print "Not counting decoys when computing empirical q values"
            elif o in ("-t", "--ties"):
                ties = True
                print "Counting clusters as one"
            else:
                assert False, "unhandled option"

        if (os.path.isfile(argv[0])):
            infile = argv[0]
        else:
            sys.stderr.write("Error: file " + str(argv[0]) + " not found\n")
            sys.exit()

        colors = [
            "red", "blue", "yellow", "black", "brown", "pink", "cyan",
            "darkblue", "darkred"
        ]
        hits = list()
        names = list()
        ##READING ELEMENTS##
        for line in open(infile).readlines():
            words = line.split()
            if (line != ""):
                prob_file = words[0]
                names.append(words[1])
                if (os.path.isfile(prob_file)):
                    hits.append(util.importer(prob_file))
                    if (verbose):
                        print "reading file " + str(prob_file)
                else:
                    sys.stderr.write("Error: file " + str(prob_file) +
                                     " not found\n")
                    sys.exit()

        pi0s = list(xrange(len(hits)))
        qvaluesEmps = list(xrange(len(hits)))
        qvaluesEsts = list(xrange(len(hits)))

        if (hidden_decoys):
            print "Estimating qvalues for hidden decoys mode..."

        for x in xrange(len(hits)):
            ##ESTIMATING PI0##
            if (hidden_decoys):
                hits[x] = [
                    ele for ele in hits[x]
                    if ele.protein.find(decoy_pattern) == -1
                ]
                if (pi0):
                    pi0s[x] = estimatePi0(
                        getPValues(hits[x], True, hidden_pattern))
                else:
                    pi0s[x] = 1.0
            else:
                if (pi0):
                    pi0s[x] = estimatePi0(
                        getPValues(hits[x], True, decoy_pattern))
                else:
                    pi0s[x] = 1.0

            if (pi0s[x] > 1.0 or pi0s[x] < 0.0):
                pi0s[x] = 1.0

            ##ESTIMATING QVALUEs##
            ndecoys = 0
            ntargets = 0
            ndecoysEmp = 0
            ntargetEmp = 0
            if (hidden_decoys):
                qvaluesEmps[x], qvaluesEsts[
                    x], ndecoys, ntargets, ndecoysEmp, ntargetEmp = estimateFDR_hidden_decoys(
                        hits[x], pi0s[x], hidden_pattern, False, ties,
                        countdecoys, fdr, tdratio)
            else:
                qvaluesEmps[x], qvaluesEsts[
                    x], ndecoys, ntargets, ndecoysEmp, ntargetEmp = estimateQvalues(
                        hits[x], pi0s[x], decoy_pattern, False, ties,
                        countdecoys, fdr, tdratio)
            if (pi0):
                print "pi0 file " + str(x + 1) + " :" + str(pi0s[x])
            print "elements with estimated qvalue below " + str(
                fdr) + " in file " + str(x + 1) + " :" + str(ntargets)
            print "elements with empirical qvalue below " + str(
                fdr) + " in file " + str(x + 1) + " :" + str(ntargetEmp)
            print "false positive elements with estimated qvalue below " + str(
                fdr) + " in file " + str(x + 1) + " :" + str(ndecoys)
            print "false positive elements with empirical qvalue below " + str(
                fdr) + " in file " + str(x + 1) + " :" + str(ndecoysEmp)

        if (verbose):
            print "generatings plots"

        ##PLOTTING##
        util.plotHist(qvaluesEsts, names, colors, "estimated $q$ value",
                      "#target " + label, label + "_qvalue_estimated.png", fdr)
        util.plotHist(qvaluesEmps, names, colors, "empirical $q$ value",
                      "#target " + label, label + "_qvalue_empirical.png", fdr)
        util.plotCorrelation(
            qvaluesEmps, qvaluesEsts, names, colors, "empirical $q$ value",
            "estimated $q$ value",
            label + "_qvalue_estimated_VS_qvalue_empirical_low_range.png", 1.0)
        util.plotCorrelation(
            qvaluesEmps, qvaluesEsts, names, colors, "empirical $q$ value",
            "estimated $q$ value",
            label + "_qvalue_estimated_VS_qvalue_empirical.png", fdr)

        if (verbose):
            print "plots generated"
def main(argv):
    if( len(argv) < 1):
        sys.stderr.write("Error: Number of arguments incorrect\n")
        usage()
        sys.exit()
    else:
        
        hidden_decoys = False
        hidden_pattern = ""
        decoy_pattern = "random"
        verbose = False
        ties = True
        label = ""
        
        try:
            opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:t", ["hidden=", "prefix=", "verbose", "help", "label", "ties"])
        except getopt.GetoptError, err:
            # print help information and exit:
            print str(err) # will print something like "option -a not recognized"
            usage()
            sys.exit(2)
        
        for o, a in opts:
            if o == "-v":
                verbose = True
            elif o in ("-h", "--help"):
                usage()
                sys.exit()
            elif o in ("-e", "--hidden"):
                hidden_decoys = True
                hidden_pattern = a
            elif o in ("-d", "--prefix"):
                decoy_pattern = a
            elif o in ("-l", "--label"):
                label = a
            elif o in ("-t", "--ties"):
                ties = a
            else:
                assert False, "unhandled option"
            
        if(os.path.isfile(argv[0])):
            infile = argv[0]
        else:
            sys.stderr.write("Error: file " + str(argv[0]) + " not found\n")
            sys.exit()
        
        colors = ["red","blue","yellow","black","brown","pink","cyan","darkblue","darkred"]
        hits = list()
        names = list()
        ##READING ELEMENTS##
        for line in open(infile).readlines():
            words = line.split()
            prob_file = words[0]
            names.append(words[1])
            if(verbose):
                print "reading file " +str(prob_file)
            if(os.path.isfile(prob_file)):
                hits.append(util.importer(prob_file))
            else:
                sys.stderr.write("Error: file " + str(prob_file) + " not found\n")
                sys.exit()  

        pvalues = list(xrange(len(hits))) 
        pvalues2 = list(xrange(len(hits)))
        
        for x in xrange(len(hits)):
            ##ESTIMATING PVALUES FROM PEP##
            if(hidden_decoys):
                pvalues[x] = pep2pvalue([(ele.pep,ele.protein) for ele in [ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1]],ties)
                pvalues[x] = [ele[-2] for ele in pvalues[x] if ele[-1].find(hidden_pattern) != -1]
                pvalues[x] = remove_zeroes(pvalues[x])
                pvalues[x] = sorted(pvalues[x],reverse=False)
                pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(hidden_pattern) != -1],\
                                               [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\
                                               decoy_pattern)
            else:
                pvalues[x] = pep2pvalue([(ele.pep,ele.protein) for ele in hits[x]],ties)
                pvalues[x] = [ele[-2] for ele in pvalues[x] if ele[-1].find(decoy_pattern) != -1]
                pvalues[x] = remove_zeroes(pvalues[x])
                pvalues[x] = sorted(pvalues[x],reverse=False)
                pvalues2[x] = estimate_pvalues([(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) == -1],\
                                               [(ele.score,ele.protein) for ele in hits[x] if ele.protein.find(decoy_pattern) != -1],\
                                               decoy_pattern)
                      

        if(verbose):
            print "generatings plots"
        ##PLOTTING##
        util.plotHist(pvalues2,names,colors,"estimated $p$ value", "#target " + label, label + "_pvalue_estimated.png")
        util.plot_pvalues_calibration(pvalues,names,colors,label + "_pvalue_calibration_from_pep.png")     
        util.plot_pvalues_calibration(pvalues2,names,colors,label + "_pvalue_calibration.png")    

        if(verbose):
            print "plots generated"
def main(argv):
    if( len(argv) < 1):
        sys.stderr.write("Error: Number of arguments incorrect\n")
        usage()
        sys.exit()
    else:
        
        hidden_decoys = False
        hidden_pattern = ""
        decoy_pattern = "random"
        verbose = False
        ties = False
        countdecoys = True
        label = ""
        fdr = 0.01
        tdratio = 0.0
        pi0 = False
        try:
            opts, args = getopt.getopt(sys.argv[2:], "e:d:vhl:f:i:r:tc", ["hidden=", "prefix=", "verbose", "help", "label=", "fdr=", "pi0=", "tdratio=","ties","countdecoys"])
        except getopt.GetoptError, err:
            # print help information and exit:
            print str(err) # will print something like "option -a not recognized"
            usage()
            sys.exit(2)
        
        for o, a in opts:
            if o == "-v":
                verbose = True
            elif o in ("-h", "--help"):
                usage()
                sys.exit()
            elif o in ("-e", "--hidden"):
                hidden_decoys = True
                hidden_pattern = a
            elif o in ("-d", "--prefix"):
                decoy_pattern = a
            elif o in ("-l", "--label"):
                label = a
            elif o in ("-f", "--fdr"):
                fdr = float(a)
            elif o in ("-r", "--tdratio"):
                tdratio = float(a)
                print "Using Target Decoy Ratio = " + str(tdratio)
            elif o in ("-i", "--pi0"):
                pi0 = True
                print "Using pi0 to adjust empirical q values"
            elif o in ("-c", "--countdecoys"):
                countdecoys = True
                print "Not counting decoys when computing empirical q values"
            elif o in ("-t", "--ties"):
                ties = True
                print "Counting clusters as one"
            else:
                assert False, "unhandled option"
            
        if(os.path.isfile(argv[0])):
            infile = argv[0]
        else:
            sys.stderr.write("Error: file " + str(argv[0]) + " not found\n")
            sys.exit()
        
        colors = ["red","blue","yellow","black","brown","pink","cyan","darkblue","darkred"]
        hits = list()
        names = list()
        ##READING ELEMENTS##
        for line in open(infile).readlines():
            words = line.split()
            if(line != ""):
                prob_file = words[0]
                names.append(words[1])
                if(os.path.isfile(prob_file)):
                    hits.append(util.importer(prob_file))
                    if(verbose):
                        print "reading file " +str(prob_file)
                else:
                    sys.stderr.write("Error: file " + str(prob_file) + " not found\n")
                    sys.exit()  
        
        pi0s = list(xrange(len(hits))) 
        qvaluesEmps = list(xrange(len(hits))) 
        qvaluesEsts = list(xrange(len(hits))) 
        
        if(hidden_decoys):
            print "Estimating qvalues for hidden decoys mode..."
        
        for x in xrange(len(hits)):
            ##ESTIMATING PI0##
            if(hidden_decoys):
                hits[x] = [ele for ele in hits[x] if ele.protein.find(decoy_pattern) == -1]
                if(pi0):
                    pi0s[x] = estimatePi0(getPValues(hits[x],True,hidden_pattern))
                else:
                    pi0s[x] = 1.0
            else:
                if(pi0):
                    pi0s[x] = estimatePi0(getPValues(hits[x],True,decoy_pattern))
                else:
                    pi0s[x] = 1.0
                          
            if(pi0s[x] > 1.0 or pi0s[x] < 0.0):
                pi0s[x] = 1.0
                
            ##ESTIMATING QVALUEs##
            ndecoys = 0
            ntargets = 0
            ndecoysEmp = 0
            ntargetEmp = 0
            if(hidden_decoys):
                qvaluesEmps[x],qvaluesEsts[x],ndecoys,ntargets,ndecoysEmp,ntargetEmp = estimateFDR_hidden_decoys(hits[x],pi0s[x],hidden_pattern,False,ties,countdecoys,fdr,tdratio)
            else:
                qvaluesEmps[x],qvaluesEsts[x],ndecoys,ntargets,ndecoysEmp,ntargetEmp = estimateQvalues(hits[x],pi0s[x],decoy_pattern,False,ties,countdecoys,fdr,tdratio)
            if(pi0):
                print "pi0 file " + str(x+1) + " :" + str(pi0s[x])
            print "elements with estimated qvalue below " + str(fdr) + " in file " + str(x+1) + " :"  + str(ntargets) 
            print "elements with empirical qvalue below " + str(fdr) + " in file " + str(x+1) + " :"  + str(ntargetEmp) 
            print "false positive elements with estimated qvalue below " + str(fdr) + " in file " + str(x+1) + " :"  + str(ndecoys) 
            print "false positive elements with empirical qvalue below " + str(fdr) + " in file " + str(x+1) + " :"  + str(ndecoysEmp) 
            
        if(verbose):
            print "generatings plots"
        
        ##PLOTTING##
        util.plotHist(qvaluesEsts,names,colors,"estimated $q$ value", "#target " + label, label + "_qvalue_estimated.png",fdr)
        util.plotHist(qvaluesEmps,names,colors,"empirical $q$ value", "#target " + label, label + "_qvalue_empirical.png",fdr)
        util.plotCorrelation(qvaluesEmps,qvaluesEsts,names,colors,"empirical $q$ value","estimated $q$ value",label + "_qvalue_estimated_VS_qvalue_empirical_low_range.png",1.0)
        util.plotCorrelation(qvaluesEmps,qvaluesEsts,names,colors,"empirical $q$ value","estimated $q$ value",label + "_qvalue_estimated_VS_qvalue_empirical.png",fdr)        

        if(verbose):
            print "plots generated"