def sample_summary(bam_file, data, out_dir): """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory. """ metrics_file = os.path.join(out_dir, "metrics.tsv") if not file_exists(metrics_file): config = data["config"] ref_file = data["sam_ref"] genome_dir = os.path.dirname(os.path.dirname(ref_file)) gtf_file = config_utils.get_transcript_gtf(genome_dir) rna_file = config_utils.get_rRNA_sequence(genome_dir) sample_file = os.path.join(safe_makedir(out_dir), "sample_file.txt") _write_sample_id_file(data, bam_file, sample_file) runner = rnaseqc_runner_from_config(config) bam.index(bam_file, config) single_end = bam.is_paired(bam_file) runner.run(sample_file, ref_file, rna_file, gtf_file, out_dir, single_end) return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])
def sample_summary(samples): sample_config = samples[0] config = sample_config[0]["config"] work_dir = sample_config[0]["dirs"]["work"] ref_file = sample_config[0]["sam_ref"] genome_dir = os.path.dirname(os.path.dirname(ref_file)) gtf_file = config_utils.get_transcript_gtf(genome_dir) rna_file = config_utils.get_rRNA_sequence(genome_dir) out_dir = safe_makedir(os.path.join(work_dir, "qc", "rnaseqc")) sample_file = os.path.join(out_dir, "sample_file.txt") _write_sample_id_file(samples, sample_file) _index_samples(samples) runner = rnaseqc_runner_from_config(config) single_end = is_paired(sample_config[0]["work_bam"]) runner.run(sample_file, ref_file, rna_file, gtf_file, out_dir, single_end) return samples
def sample_summary(bam_file, data, out_dir): """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory. """ metrics_file = os.path.join(out_dir, "metrics.tsv") if not file_exists(metrics_file): with file_transaction(out_dir) as tx_out_dir: config = data["config"] ref_file = data["sam_ref"] genome_dir = os.path.dirname(os.path.dirname(ref_file)) gtf_file = config_utils.get_transcript_gtf(genome_dir) rna_file = config_utils.get_rRNA_sequence(genome_dir) sample_file = os.path.join(safe_makedir(tx_out_dir), "sample_file.txt") _write_sample_id_file(data, bam_file, sample_file) runner = rnaseqc_runner_from_config(config) bam.index(bam_file, config) single_end = not bam.is_paired(bam_file) runner.run(sample_file, ref_file, rna_file, gtf_file, tx_out_dir, single_end) # we don't need this large directory for just the report shutil.rmtree(os.path.join(tx_out_dir, data["description"])) return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])
def make_refflat(genome_dir): """ makes a refflat file for use with Picard from a GTF file """ gtf_file = get_transcript_gtf(genome_dir) base, _ = os.path.splitext(gtf_file) refflat_file = base + ".refFlat" print "Making %s into a refFlat file named %s." % (gtf_file, refflat_file) if file_exists(refflat_file): print "%s already exists, skipping." % refflat_file return refflat_file with tmpfile(dir=os.getcwd(), prefix="genepred") as tmp_file: cmd = "gtfToGenePred {gtf_file} {tmp_file}".format(**locals()) subprocess.check_call(cmd, shell=True) with open(tmp_file) as tmp_handle, open(refflat_file, "w") as out_handle: for line in tmp_handle: l = line.split("\t") l = [l[0]] + l out_handle.write("\t".join(l) + "\n") return refflat_file