def _detect_rRNA(data): gtf_file = dd.get_gtf_file(data) count_file = dd.get_count_file(data) rrna_features = gtf.get_rRNA(gtf_file) genes = [x[0] for x in rrna_features if x] count_table = pd.read_csv(count_file, sep="\t", names=["id", "counts"]) return {'rRNA': sum(count_table.ix[genes]["counts"])}
def _detect_rRNA(data, out_dir): out_file = os.path.join(out_dir, "rRNA_metrics.txt") if not utils.file_exists(out_file): gtf_file = dd.get_gtf_file(data) quant = tz.get_in(["quant", "tsv"], data) if not quant: salmon_dir = dd.get_salmon_dir(data) if salmon_dir: quant = os.path.join(salmon_dir, "quant.sf") logger.info("Calculating RNA-seq rRNA metrics for %s." % quant) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not (transcripts and quant and utils.file_exists(quant)): return {'rRNA': "NA", "rRNA_rate": "NA"} sample_table = pd.read_csv(quant, sep="\t") rrna_exp = list( map( float, sample_table[sample_table["Name"].isin(transcripts)] ["NumReads"])) total_exp = list(map(float, sample_table["NumReads"])) rrna = sum(rrna_exp) if sum(total_exp) == 0: rrna_rate = "NA" else: rrna_rate = float(rrna) / sum(total_exp) with file_transaction(out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: out_handle.write(",".join(["rRNA", str(rrna)]) + "\n") out_handle.write( ",".join(["rRNA_rate", str(rrna_rate)]) + "\n") return _read_memoized_rrna(out_file)
def _find_rRNA_genes(gtf_file, rrna_file): print(gtf_file) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[0] for x in rrna_features if x]) with open(rrna_file, 'w') as outh: outh.write("\n".join(transcripts)) return rrna_file
def _detect_rRNA(data, out_dir): out_file = os.path.join(out_dir, "rRNA_metrics.txt") if not utils.file_exists(out_file): gtf_file = dd.get_gtf_file(data) quant = tz.get_in(["quant", "tsv"], data) if not quant: salmon_dir = dd.get_salmon_dir(data) if salmon_dir: quant = os.path.join(salmon_dir, "quant", "quant.sf") logger.info("Calculating RNA-seq rRNA metrics for %s." % quant) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not (transcripts and quant and utils.file_exists(quant)): return {'rRNA': "NA", "rRNA_rate": "NA"} sample_table = pd.read_csv(quant, sep="\t") rrna_exp = list(map(float, sample_table[sample_table["Name"].isin(transcripts)]["NumReads"])) total_exp = list(map(float, sample_table["NumReads"])) rrna = sum(rrna_exp) if sum(total_exp) == 0: rrna_rate = "NA" else: rrna_rate = float(rrna) / sum(total_exp) with file_transaction(out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: out_handle.write(",".join(["rRNA", str(rrna)]) + "\n") out_handle.write(",".join(["rRNA_rate", str(rrna_rate)]) + "\n") return _read_memoized_rrna(out_file)
def _detect_rRNA(data): gtf_file = dd.get_gtf_file(data) count_file = dd.get_count_file(data) rrna_features = gtf.get_rRNA(gtf_file) genes = [x[0] for x in rrna_features if x] if not genes: return {'rRNA': "NA", "rRNA_rate": "NA"} count_table = pd.read_csv(count_file, sep="\t", names=["id", "counts"]) rrna = sum(count_table[count_table["id"].isin(genes)]["counts"]) rrna_rate = float(rrna) / sum(count_table["counts"]) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) tidy_file = dd.get_sailfish_tidy(data) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not transcripts: return {'rRNA': "NA", "rRNA_rate": "NA"} count_table = pd.read_csv(tidy_file, sep="\t") sample_table = count_table[count_table["sample"].isin([sample])] rrna_exp = map(float, sample_table[sample_table["id"].isin(transcripts)]["numreads"]) total_exp = map(float, sample_table["numreads"]) rrna = sum(rrna_exp) rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) tidy_file = dd.get_sailfish_tidy(data) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not transcripts: return {'rRNA': "NA", "rRNA_rate": "NA"} count_table = pd.read_csv(tidy_file, sep="\t") sample_table = count_table[count_table["sample"].isin([sample])] rrna_exp = map( float, sample_table[sample_table["id"].isin(transcripts)]["numreads"]) total_exp = map(float, sample_table["numreads"]) rrna = sum(rrna_exp) rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) sailfish_dir = dd.get_sailfish_dir(data) quant = os.path.join(sailfish_dir, "quant", "quant.sf") rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not (transcripts and utils.file_exists(quant)): return {'rRNA': "NA", "rRNA_rate": "NA"} sample_table = pd.read_csv(quant, sep="\t") rrna_exp = map(float, sample_table[sample_table["Name"].isin(transcripts)]["NumReads"]) total_exp = map(float, sample_table["NumReads"]) rrna = sum(rrna_exp) if sum(total_exp) == 0: return {'rRNA': str(rrna), 'rRNA_rate': "NA"} rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) salmon_dir = dd.get_salmon_dir(data) quant = os.path.join(salmon_dir, "quant", "quant.sf") rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not (transcripts and utils.file_exists(quant)): return {'rRNA': "NA", "rRNA_rate": "NA"} sample_table = pd.read_csv(quant, sep="\t") rrna_exp = map( float, sample_table[sample_table["Name"].isin(transcripts)]["NumReads"]) total_exp = map(float, sample_table["NumReads"]) rrna = sum(rrna_exp) if sum(total_exp) == 0: return {'rRNA': str(rrna), 'rRNA_rate': "NA"} rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}