Example #1
0
def sample_summary(bam_file, data, out_dir):
    """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory.
    """
    metrics_file = os.path.join(out_dir, "metrics.tsv")
    if not file_exists(metrics_file):
        config = data["config"]
        ref_file = data["sam_ref"]
        genome_dir = os.path.dirname(os.path.dirname(ref_file))
        gtf_file = config_utils.get_transcript_gtf(genome_dir)
        rna_file = config_utils.get_rRNA_sequence(genome_dir)
        sample_file = os.path.join(safe_makedir(out_dir), "sample_file.txt")
        _write_sample_id_file(data, bam_file, sample_file)
        runner = rnaseqc_runner_from_config(config)
        bam.index(bam_file, config)
        single_end = bam.is_paired(bam_file)
        runner.run(sample_file, ref_file, rna_file, gtf_file, out_dir, single_end)
    return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])
Example #2
0
def sample_summary(samples):
    sample_config = samples[0]
    config = sample_config[0]["config"]
    work_dir = sample_config[0]["dirs"]["work"]
    ref_file = sample_config[0]["sam_ref"]
    genome_dir = os.path.dirname(os.path.dirname(ref_file))
    gtf_file = config_utils.get_transcript_gtf(genome_dir)
    rna_file = config_utils.get_rRNA_sequence(genome_dir)

    out_dir = safe_makedir(os.path.join(work_dir, "qc", "rnaseqc"))
    sample_file = os.path.join(out_dir, "sample_file.txt")
    _write_sample_id_file(samples, sample_file)
    _index_samples(samples)
    runner = rnaseqc_runner_from_config(config)
    single_end = is_paired(sample_config[0]["work_bam"])
    runner.run(sample_file, ref_file, rna_file, gtf_file, out_dir, single_end)

    return samples
Example #3
0
def sample_summary(bam_file, data, out_dir):
    """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory.
    """
    metrics_file = os.path.join(out_dir, "metrics.tsv")
    if not file_exists(metrics_file):
        config = data["config"]
        ref_file = data["sam_ref"]
        genome_dir = os.path.dirname(os.path.dirname(ref_file))
        gtf_file = config_utils.get_transcript_gtf(genome_dir)
        rna_file = config_utils.get_rRNA_sequence(genome_dir)
        sample_file = os.path.join(safe_makedir(out_dir), "sample_file.txt")
        _write_sample_id_file(data, bam_file, sample_file)
        runner = rnaseqc_runner_from_config(config)
        bam.index(bam_file, config)
        single_end = bam.is_paired(bam_file)
        runner.run(sample_file, ref_file, rna_file, gtf_file, out_dir,
                   single_end)
    return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])
Example #4
0
def sample_summary(bam_file, data, out_dir):
    """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory.
    """
    metrics_file = os.path.join(out_dir, "metrics.tsv")
    if not file_exists(metrics_file):
        with file_transaction(out_dir) as tx_out_dir:
            config = data["config"]
            ref_file = data["sam_ref"]
            genome_dir = os.path.dirname(os.path.dirname(ref_file))
            gtf_file = config_utils.get_transcript_gtf(genome_dir)
            rna_file = config_utils.get_rRNA_sequence(genome_dir)
            sample_file = os.path.join(safe_makedir(tx_out_dir), "sample_file.txt")
            _write_sample_id_file(data, bam_file, sample_file)
            runner = rnaseqc_runner_from_config(config)
            bam.index(bam_file, config)
            single_end = not bam.is_paired(bam_file)
            runner.run(sample_file, ref_file, rna_file, gtf_file, tx_out_dir, single_end)
            # we don't need this large directory for just the report
            shutil.rmtree(os.path.join(tx_out_dir, data["description"]))
    return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])
Example #5
0
def make_refflat(genome_dir):
    """
    makes a refflat file for use with Picard from a GTF file
    """
    gtf_file = get_transcript_gtf(genome_dir)
    base, _ = os.path.splitext(gtf_file)
    refflat_file = base + ".refFlat"
    print "Making %s into a refFlat file named %s." % (gtf_file, refflat_file)
    if file_exists(refflat_file):
        print "%s already exists, skipping." % refflat_file
        return refflat_file

    with tmpfile(dir=os.getcwd(), prefix="genepred") as tmp_file:
        cmd = "gtfToGenePred {gtf_file} {tmp_file}".format(**locals())
        subprocess.check_call(cmd, shell=True)
        with open(tmp_file) as tmp_handle, open(refflat_file, "w") as out_handle:
            for line in tmp_handle:
                l = line.split("\t")
                l = [l[0]] + l
                out_handle.write("\t".join(l) + "\n")
    return refflat_file
Example #6
0
def make_refflat(genome_dir):
    """
    makes a refflat file for use with Picard from a GTF file
    """
    gtf_file = get_transcript_gtf(genome_dir)
    base, _ = os.path.splitext(gtf_file)
    refflat_file = base + ".refFlat"
    print "Making %s into a refFlat file named %s." % (gtf_file, refflat_file)
    if file_exists(refflat_file):
        print "%s already exists, skipping." % refflat_file
        return refflat_file

    with tmpfile(dir=os.getcwd(), prefix="genepred") as tmp_file:
        cmd = "gtfToGenePred {gtf_file} {tmp_file}".format(**locals())
        subprocess.check_call(cmd, shell=True)
        with open(tmp_file) as tmp_handle, open(refflat_file,
                                                "w") as out_handle:
            for line in tmp_handle:
                l = line.split("\t")
                l = [l[0]] + l
                out_handle.write("\t".join(l) + "\n")
    return refflat_file
Example #7
0
def sample_summary(bam_file, data, out_dir):
    """Run RNA-SeQC on a single RNAseq sample, writing to specified output directory.
    """
    metrics_file = os.path.join(out_dir, "metrics.tsv")
    if not file_exists(metrics_file):
        with file_transaction(out_dir) as tx_out_dir:
            config = data["config"]
            ref_file = data["sam_ref"]
            genome_dir = os.path.dirname(os.path.dirname(ref_file))
            gtf_file = config_utils.get_transcript_gtf(genome_dir)
            rna_file = config_utils.get_rRNA_sequence(genome_dir)
            sample_file = os.path.join(safe_makedir(tx_out_dir),
                                       "sample_file.txt")
            _write_sample_id_file(data, bam_file, sample_file)
            runner = rnaseqc_runner_from_config(config)
            bam.index(bam_file, config)
            single_end = not bam.is_paired(bam_file)
            runner.run(sample_file, ref_file, rna_file, gtf_file, tx_out_dir,
                       single_end)
            # we don't need this large directory for just the report
            shutil.rmtree(os.path.join(tx_out_dir, data["description"]))
    return _parse_rnaseqc_metrics(metrics_file, data["name"][-1])