コード例 #1
0
def main(args):
    sims = os.path.sep.join([pathname, '..', 'sims', 'doi_10.1101_025387', args.species])

    rdict = {}
    corrs = {}
    for i in xrange(1, 7):
        path = os.path.sep.join([sims, str(i)])
        print("reading results from {}".format(path))
        tdf = ParsingUtils.readThreeColumnTruth(os.path.sep.join([path, "truth.tsv"]) , "_true")
        kdf = ParsingUtils.readKallisto(os.path.sep.join([path, "abundance.tsv"]), "_kallisto")
        sdf = ParsingUtils.readSailfish(os.path.sep.join([path, "quant.sf"]), "_sailfish")
        df = tdf.join(kdf, rsuffix="_K").join(sdf, rsuffix="_S")

        for method in ["sailfish", "kallisto"]:
            for ct in ["spearman", "pearson"]:
                corrKey = "{}_{}".format(method, ct)
                corr = df["TPM_true"].corr(df["TPM_{}".format(method)], method=ct)
                if corrKey in corrs:
                    corrs[corrKey].append(corr)
                else:
                    corrs[corrKey] = [corr] 

        rdict[i] = df

    corrStrs = []
    for k, v in corrs.iteritems():
        corrStrs.append("{}: {}".format(k, ','.join(["{:0.2}".format(c) for c in v])))
    corrStrs = sorted(corrStrs)
    print('\n'.join(corrStrs))

    relDiffs = {} 
    for k,v in rdict.iteritems():
        rds = AnalysisUtils.relDiff("TPM_true", "TPM_sailfish", v, verbose=False)
        rdk = AnalysisUtils.relDiff("TPM_true", "TPM_kallisto", v, verbose=False)
        for method, rd in {"sailfish" : rds, "kallisto" : rdk}.iteritems():
            for summaryName, summaryFunc in {"median" : AnalysisUtils.getMedian, "mean" : AnalysisUtils.getMean}.iteritems(): 
                signedKey = "{}_{}_{}".format(method, summaryName, "signed")
                absKey = "{}_{}_{}".format(method, summaryName, "abs")
                if signedKey in relDiffs:
                    relDiffs[signedKey].append(summaryFunc(rd[0]))
                else:
                    relDiffs[signedKey] = [summaryFunc(rd[0])]
                if absKey in relDiffs:
                    relDiffs[absKey].append(summaryFunc(rd[0].abs()))
                else: 
                    relDiffs[absKey] = [summaryFunc(rd[0].abs())]

    for signedness in ["signed", "abs"]:
        for stat in ["median", "mean"]:
            if stat == "median":
                print("mean of medians of {} relative differences is :\n kallisto: {:0.2f}\n sailfish: {:0.2f}\n".format(
                    signedness, np.mean(relDiffs["kallisto_{}_{}".format(stat, signedness)]),
                    np.mean(relDiffs["sailfish_{}_{}".format(stat, signedness)])))
            elif stat == "mean":
                 print("median of means of {} relative differences is :\n kallisto: {:0.2f}\n sailfish: {:0.2f}\n".format(
                    signedness, np.median(relDiffs["kallisto_{}_{}".format(stat, signedness)]),
                    np.median(relDiffs["sailfish_{}_{}".format(stat, signedness)])))
コード例 #2
0
def main():
    sims = os.path.sep.join([pathname, '..', 'sims', 'rsem'])

    rdict = {}
    for i in xrange(1, 21):
        path = os.path.sep.join([sims, str(i)])
        print("reading results from {}".format(path))
        tdf = ParsingUtils.readRSEMTruth(os.path.sep.join([path, "truth.tsv"]) , "_true")
        kdf = ParsingUtils.readKallisto(os.path.sep.join([path, "abundance.tsv"]), "_kallisto")
        sdf = ParsingUtils.readSailfish(os.path.sep.join([path, "quant.sf"]), "_sailfish")
        df = tdf.join(kdf, rsuffix="_K").join(sdf, rsuffix="_S")
        rdict[i] = df

    relDiffs = {} 
    for k,v in rdict.iteritems():
        rds = AnalysisUtils.relDiff("TPM_true", "TPM_sailfish", v, verbose=False)
        rdk = AnalysisUtils.relDiff("TPM_true", "TPM_kallisto", v, verbose=False)
        for method, rd in {"sailfish" : rds, "kallisto" : rdk}.iteritems():
            for summaryName, summaryFunc in {"median" : AnalysisUtils.getMedian, "mean" : AnalysisUtils.getMean}.iteritems(): 
                signedKey = "{}_{}_{}".format(method, summaryName, "signed")
                absKey = "{}_{}_{}".format(method, summaryName, "abs")
                if signedKey in relDiffs:
                    relDiffs[signedKey].append(summaryFunc(rd[0]))
                else:
                    relDiffs[signedKey] = [summaryFunc(rd[0])]
                if absKey in relDiffs:
                    relDiffs[absKey].append(summaryFunc(rd[0].abs()))
                else: 
                    relDiffs[absKey] = [summaryFunc(rd[0].abs())]

    for signedness in ["signed", "abs"]:
        for stat in ["median", "mean"]:
            if stat == "median":
                print("mean of medians of {} relative differences is :\n kallisto: {:0.2f}\n sailfish: {:0.2f}\n".format(
                    signedness, np.mean(relDiffs["kallisto_{}_{}".format(stat, signedness)]),
                    np.mean(relDiffs["sailfish_{}_{}".format(stat, signedness)])))
            elif stat == "mean":
                 print("median of means of {} relative differences is :\n kallisto: {:0.2f}\n sailfish: {:0.2f}\n".format(
                    signedness, np.median(relDiffs["kallisto_{}_{}".format(stat, signedness)]),
                    np.median(relDiffs["sailfish_{}_{}".format(stat, signedness)])))
コード例 #3
0
ファイル: fasta_parser.py プロジェクト: rhp4515/CB_Project_15
from Bio import SeqIO
import ParsingUtils

def parseFastaFormat():
    tid = {}
    wfile = "./../quant_data/gc_content.csv"
    f = open(wfile, "w")
    for seq_record in SeqIO.parse("./../transcripts.filtered.fa", "fasta"):
        # print(seq_record.id)
        # print(repr(seq_record.seq))
        # print(len(seq_record))
        tid[seq_record.id] = {}
        count = 0
        for nt in str(seq_record.seq):
            # print nt
            if nt == 'G' or nt == 'C':
                count += 1
        # print count
        # print len(str(seq_record.seq))
        gc_frac = round(float(count)/len(str(seq_record.seq)), 4)
        tid[seq_record.id]["gc_content"] = gc_frac
        f.write(seq_record.id + "," + str(gc_frac) + "\n")
    f.close()
    print len(tid)

f = ParsingUtils.readRSEMTruth("./../quant_data/rsem/rsem.new.quant.isoforms.results")
for line in f:
    print line
    break