Beispiel #1
0
    def _rnaseq_metrics(self, align_bam, gtf_file, rrna_file):
        metrics = self._check_metrics_file(align_bam, "rnaseq_metrics")
        if not file_exists(metrics):
            with file_transaction(metrics) as tx_metrics:
                picard_rnaseq_metrics(self._picard, align_bam, gtf_file, rrna_file, tx_metrics)

        return metrics
Beispiel #2
0
    def _rnaseq_metrics(self, align_bam, gtf_file, rrna_file):
        metrics = self._check_metrics_file(align_bam, "rnaseq_metrics")
        if not file_exists(metrics):
            with file_transaction(metrics) as tx_metrics:
                picard_rnaseq_metrics(self._picard, align_bam, gtf_file,
                                      rrna_file, tx_metrics)

        return metrics
Beispiel #3
0
 def __call__(self, in_file):
     out_file = self.out_file(in_file)
     if file_exists(out_file):
         return out_file
     out_file = picardrun.picard_rnaseq_metrics(self.picard, in_file,
                                                self.ref, self.ribo,
                                                out_file)
     return out_file
def main(config_file):
    if config_file:
        with open(config_file) as in_handle:
            config = yaml.load(in_handle)

    dirs = config["in_dir"]
    conditions = config["conditions"]
    glob_string = config["glob_string"]

    files = list(flatten([glob.glob(os.path.join(x, glob_string)) for x in dirs]))
    out_dir = config["dir"]["results"]
    safe_makedir(out_dir)

    curr_files = []
    for condition in conditions:
        condition_files = [x for x in files if condition in x]
        out_file = os.path.join(out_dir, condition + "_v2_v3.bam")
        print "Combining %s into %s." % (condition_files, out_file)
        sh.samtools.merge(list(flatten([out_file, condition_files])))
        #        bsub_call = list(flatten(["-q", "hsph", "-o", "out" + condition, "-e", "err" + condition, "samtools", "merge", out_file, condition_files]))
        #sh.bsub(bsub_call)
        sorted_prefix = remove_suffix(out_file) + ".sorted"
        sorted_file = sorted_prefix + ".bam"
        sh.samtools.sort(out_file, sorted_prefix)
        sh.samtools.index(sorted_file)
        mapped_file = append_stem(sorted_file, "mapped")
        sh.samtools.view(sorted_file, F=4, b=True, o=mapped_file)
        sh.samtools.index(mapped_file)

        # find the reads that don't intersect with the rrna
        in_file = mapped_file
        out_file = os.path.join(out_dir, condition + "_noribo" + "_v2_v3.bam")
        ribo = config["ribo"]
        print "Filtering %s for rRNA in %s into %s." % (in_file, ribo, out_file)
        sh.bedtools.intersect("-abam", in_file, "-v", "-b", ribo, _out=out_file)
        filtered_file = out_file

        print "Calculating RNASeq metrics on %s." % (out_file)
        in_file = out_file
        ref = blastn.prepare_ref_file(config["stage"]["new_coverage"]["ref"],
                                      config)
        ribo = config["stage"]["new_coverage"]["ribo"]
        picard = BroadRunner(config["program"]["picard"])
        out_dir = os.path.join(config["dir"]["results"], "new_coverage")
        safe_makedir(out_dir)
        out_file = replace_suffix(os.path.basename(in_file), "metrics")
        out_file = os.path.join(out_dir, out_file)
        metrics_file = picardrun.picard_rnaseq_metrics(picard, in_file, ref,
                                                       ribo, out_file)

        jelly_dir = os.path.join(config["dir"]["results"], "jellyfish")
        safe_makedir(jelly_dir)
        # convert the filtered file to fastq for jellyfish counting
        fastq_file = os.path.join(jelly_dir,
                                  os.path.basename(replace_suffix(filtered_file,
                                                                  "fastq")))
        sh.bam2fastx(filtered_file, fastq=True, _out=fastq_file)
        for mer in config["stage"]["jellyfish"]["mer_lengths"]:
            base, _ = os.path.splitext(os.path.basename(fastq_file))
            out_prefix = base + "_%dmer" % (mer)
            out_file = os.path.join(jelly_dir, out_prefix)
            if not file_exists(out_file):
                sh.jellyfish.count(fastq_file,
                                   config["stage"]["jellyfish"]["options"],
                                   m=mer, o=out_file)