Beispiel #1
0
def _detect_rRNA(data):
    gtf_file = dd.get_gtf_file(data)
    count_file = dd.get_count_file(data)
    rrna_features = gtf.get_rRNA(gtf_file)
    genes = [x[0] for x in rrna_features if x]
    count_table = pd.read_csv(count_file, sep="\t", names=["id", "counts"])
    return {'rRNA': sum(count_table.ix[genes]["counts"])}
def _detect_rRNA(data, out_dir):
    out_file = os.path.join(out_dir, "rRNA_metrics.txt")
    if not utils.file_exists(out_file):
        gtf_file = dd.get_gtf_file(data)
        quant = tz.get_in(["quant", "tsv"], data)
        if not quant:
            salmon_dir = dd.get_salmon_dir(data)
            if salmon_dir:
                quant = os.path.join(salmon_dir, "quant.sf")
        logger.info("Calculating RNA-seq rRNA metrics for %s." % quant)
        rrna_features = gtf.get_rRNA(gtf_file)
        transcripts = set([x[1] for x in rrna_features if x])
        if not (transcripts and quant and utils.file_exists(quant)):
            return {'rRNA': "NA", "rRNA_rate": "NA"}
        sample_table = pd.read_csv(quant, sep="\t")
        rrna_exp = list(
            map(
                float, sample_table[sample_table["Name"].isin(transcripts)]
                ["NumReads"]))
        total_exp = list(map(float, sample_table["NumReads"]))
        rrna = sum(rrna_exp)
        if sum(total_exp) == 0:
            rrna_rate = "NA"
        else:
            rrna_rate = float(rrna) / sum(total_exp)
        with file_transaction(out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                out_handle.write(",".join(["rRNA", str(rrna)]) + "\n")
                out_handle.write(
                    ",".join(["rRNA_rate", str(rrna_rate)]) + "\n")
    return _read_memoized_rrna(out_file)
def _find_rRNA_genes(gtf_file, rrna_file):
    print(gtf_file)
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[0] for x in rrna_features if x])
    with open(rrna_file, 'w') as outh:
        outh.write("\n".join(transcripts))
    return rrna_file
Beispiel #4
0
def _detect_rRNA(data):
    gtf_file = dd.get_gtf_file(data)
    count_file = dd.get_count_file(data)
    rrna_features = gtf.get_rRNA(gtf_file)
    genes = [x[0] for x in rrna_features if x]
    count_table = pd.read_csv(count_file, sep="\t", names=["id", "counts"])
    return {'rRNA': sum(count_table.ix[genes]["counts"])}
Beispiel #5
0
def _find_rRNA_genes(gtf_file, rrna_file):
    print(gtf_file)
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[0] for x in rrna_features if x])
    with open(rrna_file, 'w') as outh:
        outh.write("\n".join(transcripts))
    return rrna_file
Beispiel #6
0
def _detect_rRNA(data, out_dir):
    out_file = os.path.join(out_dir, "rRNA_metrics.txt")
    if not utils.file_exists(out_file):
        gtf_file = dd.get_gtf_file(data)
        quant = tz.get_in(["quant", "tsv"], data)
        if not quant:
            salmon_dir = dd.get_salmon_dir(data)
            if salmon_dir:
                quant = os.path.join(salmon_dir, "quant", "quant.sf")
        logger.info("Calculating RNA-seq rRNA metrics for %s." % quant)
        rrna_features = gtf.get_rRNA(gtf_file)
        transcripts = set([x[1] for x in rrna_features if x])
        if not (transcripts and quant and utils.file_exists(quant)):
            return {'rRNA': "NA", "rRNA_rate": "NA"}
        sample_table = pd.read_csv(quant, sep="\t")
        rrna_exp = list(map(float, sample_table[sample_table["Name"].isin(transcripts)]["NumReads"]))
        total_exp = list(map(float, sample_table["NumReads"]))
        rrna = sum(rrna_exp)
        if sum(total_exp) == 0:
            rrna_rate = "NA"
        else:
            rrna_rate = float(rrna) / sum(total_exp)
        with file_transaction(out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                out_handle.write(",".join(["rRNA", str(rrna)]) + "\n")
                out_handle.write(",".join(["rRNA_rate", str(rrna_rate)]) + "\n")
    return _read_memoized_rrna(out_file)
Beispiel #7
0
def _detect_rRNA(data):
    gtf_file = dd.get_gtf_file(data)
    count_file = dd.get_count_file(data)
    rrna_features = gtf.get_rRNA(gtf_file)
    genes = [x[0] for x in rrna_features if x]
    if not genes:
        return {'rRNA': "NA", "rRNA_rate": "NA"}
    count_table = pd.read_csv(count_file, sep="\t", names=["id", "counts"])
    rrna = sum(count_table[count_table["id"].isin(genes)]["counts"])
    rrna_rate = float(rrna) / sum(count_table["counts"])
    return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
Beispiel #8
0
def _detect_rRNA(data):
    sample = dd.get_sample_name(data)
    gtf_file = dd.get_gtf_file(data)
    tidy_file = dd.get_sailfish_tidy(data)
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[1] for x in rrna_features if x])
    if not transcripts:
        return {'rRNA': "NA", "rRNA_rate": "NA"}
    count_table = pd.read_csv(tidy_file, sep="\t")
    sample_table = count_table[count_table["sample"].isin([sample])]
    rrna_exp = map(float, sample_table[sample_table["id"].isin(transcripts)]["numreads"])
    total_exp = map(float, sample_table["numreads"])
    rrna = sum(rrna_exp)
    rrna_rate = float(rrna) / sum(total_exp)
    return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
Beispiel #9
0
def _detect_rRNA(data):
    sample = dd.get_sample_name(data)
    gtf_file = dd.get_gtf_file(data)
    tidy_file = dd.get_sailfish_tidy(data)
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[1] for x in rrna_features if x])
    if not transcripts:
        return {'rRNA': "NA", "rRNA_rate": "NA"}
    count_table = pd.read_csv(tidy_file, sep="\t")
    sample_table = count_table[count_table["sample"].isin([sample])]
    rrna_exp = map(
        float, sample_table[sample_table["id"].isin(transcripts)]["numreads"])
    total_exp = map(float, sample_table["numreads"])
    rrna = sum(rrna_exp)
    rrna_rate = float(rrna) / sum(total_exp)
    return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
Beispiel #10
0
def _detect_rRNA(data):
    sample = dd.get_sample_name(data)
    gtf_file = dd.get_gtf_file(data)
    sailfish_dir = dd.get_sailfish_dir(data)
    quant = os.path.join(sailfish_dir, "quant", "quant.sf")
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[1] for x in rrna_features if x])
    if not (transcripts and utils.file_exists(quant)):
        return {'rRNA': "NA", "rRNA_rate": "NA"}
    sample_table = pd.read_csv(quant, sep="\t")
    rrna_exp = map(float, sample_table[sample_table["Name"].isin(transcripts)]["NumReads"])
    total_exp = map(float, sample_table["NumReads"])
    rrna = sum(rrna_exp)
    if sum(total_exp) == 0:
        return {'rRNA': str(rrna), 'rRNA_rate': "NA"}
    rrna_rate = float(rrna) / sum(total_exp)
    return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
Beispiel #11
0
def _detect_rRNA(data):
    sample = dd.get_sample_name(data)
    gtf_file = dd.get_gtf_file(data)
    salmon_dir = dd.get_salmon_dir(data)
    quant = os.path.join(salmon_dir, "quant", "quant.sf")
    rrna_features = gtf.get_rRNA(gtf_file)
    transcripts = set([x[1] for x in rrna_features if x])
    if not (transcripts and utils.file_exists(quant)):
        return {'rRNA': "NA", "rRNA_rate": "NA"}
    sample_table = pd.read_csv(quant, sep="\t")
    rrna_exp = map(
        float,
        sample_table[sample_table["Name"].isin(transcripts)]["NumReads"])
    total_exp = map(float, sample_table["NumReads"])
    rrna = sum(rrna_exp)
    if sum(total_exp) == 0:
        return {'rRNA': str(rrna), 'rRNA_rate': "NA"}
    rrna_rate = float(rrna) / sum(total_exp)
    return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}