Exemple #1
0
    def run(self):
        """Run"""
        iceq = IceQuiver(root_dir=self.root_dir,
                         bas_fofn=None,
                         fasta_fofn=None,
                         sge_opts=None,
                         prog_name="ice_quiver_merge")

        iceq.add_log(self.cmd_str())
        iceq.add_log("root_dir: {d}.".format(d=self.root_dir))
        iceq.add_log("Total number of chunks: N = {N}.".format(N=self.N))

        src = [
            iceq.submitted_quiver_jobs_log_of_chunk_i(i=i, num_chunks=self.N)
            for i in range(0, self.N)
        ]
        for f in src:
            if not nfs_exists(f):
                raise IOError("Log {f} ".format(f=f) +
                              "of submitted quiver jobs does not exist.")

        dst = iceq.submitted_quiver_jobs_log

        iceq.add_log(
            "Collecting submitted quiver jobs from:\n{src}\nto {dst}.".format(
                src="\n".join(src), dst=dst))

        cat_files(src=src, dst=dst)

        iceq.close_log()
Exemple #2
0
    def run(self):
        """Run"""
        iceq = IceQuiver(root_dir=self.root_dir, bas_fofn=None,
                         fasta_fofn=None, sge_opts=None,
                         prog_name="ice_quiver_merge")

        iceq.add_log(self.cmd_str())
        iceq.add_log("root_dir: {d}.".format(d=self.root_dir))
        iceq.add_log("Total number of chunks: N = {N}.".format(N=self.N))

        src = [iceq.submitted_quiver_jobs_log_of_chunk_i(i=i, num_chunks=self.N)
               for i in range(0, self.N)]
        for f in src:
            if not nfs_exists(f):
                raise IOError("Log {f} ".format(f=f) +
                              "of submitted quiver jobs does not exist.")

        dst = iceq.submitted_quiver_jobs_log

        iceq.add_log("Collecting submitted quiver jobs from:\n{src}\nto {dst}.".
                     format(src="\n".join(src), dst=dst))

        cat_files(src=src, dst=dst)

        iceq.close_log()
Exemple #3
0
    def test_cat_files(self):
        """Test cat_files."""
        fn_1 = op.join(self.data_dir, "primers.fa")
        fn_2 = op.join(self.data_dir, "test_phmmer.fa")
        out_fn_1 = op.join(self.out_dir, "test_cat_1")
        out_fn_2 = op.join(self.out_dir, "test_cat_2")

        std_out_fn_2 = op.join(self.stdout_dir, "test_cat_2")

        cat_files(src=[fn_1], dst=out_fn_1)
        cat_files(src=[fn_1, fn_2], dst=out_fn_2)
        self.assertTrue(filecmp.cmp(out_fn_1, fn_1))
        self.assertTrue(filecmp.cmp(out_fn_2, std_out_fn_2))
Exemple #4
0
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn=self.primer_fn,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert(self.summary.num_fl == self.summary.num_flnc +
               self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info("Detect chimeric reads from trimmed non-full-length " +
                         "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert(self.summary.num_nfl == self.summary.num_nflnc +
                   self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn, self.out_nflc_fn],
                      dst=self.out_nfl_fn)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nflnc_fn],
                      dst=self.out_all_reads_fn)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                      dst=self.out_all_reads_fn)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn,
                       self._primer_report_fl_fn])
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn_forward=self.primer_fn_forward,
            primer_fn_reverse=self.primer_fn_reverse,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert(self.summary.num_fl == self.summary.num_flnc +
               self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info("Detect chimeric reads from trimmed non-full-length " +
                         "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert(self.summary.num_nfl == self.summary.num_nflnc +
                   self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn, self.out_nflc_fn],
                      dst=self.out_nfl_fn)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nflnc_fn],
                      dst=self.out_all_reads_fn)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                      dst=self.out_all_reads_fn)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn,
                       self._primer_report_fl_fn])
Exemple #6
0
    def runChimeraDetector(self):
        """Detect chimeras from trimmed reads."""
        logging.info("Start to detect chimeras from trimmed reads.")
        need_cleanup = True
        if os.path.exists(self.out_trimmed_reads_dom_fn):
            logging.info("Output already exists. Parsing {0}.".format(self.out_trimmed_reads_dom_fn))
            need_cleanup = False
        else:
            # Create forward/reverse primers for chimera detection.
            _primer_indices = self._processPrimers(
                primer_fn=self.primer_fn,
                window_size=self.chimera_detection_opts.primer_search_window,
                primer_out_fn=self.primer_chimera_fn,
                revcmp_primers=True)
    
            num_chunks = max(min(self.summary.num_fl, self.cpus), 1)
            #logging.debug("Split non-full-length reads into {n} chunks.".
            #              format(n=num_chunks))
            # Only detect chimeras on full-length reads in order to save time
            reads_per_chunk = int(math.ceil(self.summary.num_fl /
                                            (float(num_chunks))))
            num_chunks = int(math.ceil(self.summary.num_fl/float(reads_per_chunk)))
    
            self.chunked_trimmed_reads_fns = generateChunkedFN(self.out_dir,
                "in.trimmed.fa_split", num_chunks)
    
            self.chunked_trimmed_reads_dom_fns = generateChunkedFN(self.out_dir,
                "out.trimmed.hmmer_split", num_chunks)
    
            self._chunkReads(reads_fn=self._trimmed_fl_reads_fn,
                             reads_per_chunk=reads_per_chunk,
                             chunked_reads_fns=self.chunked_trimmed_reads_fns,
                             extract_front_back_only=False)
    
            self._startPhmmers(self.chunked_trimmed_reads_fns,
                               self.chunked_trimmed_reads_dom_fns,
                               self.out_trimmed_reads_dom_fn,
                               self.primer_chimera_fn,
                               self.pbmatrix_fn)

        suspicous_hits = self._getChimeraRecord(self.out_trimmed_reads_dom_fn,
                                                self.chimera_detection_opts)

        # Only detect chimeras on full-length reads in order to save time
        self._updateChimeraInfo(suspicous_hits=suspicous_hits,
                                in_read_fn=self._trimmed_fl_reads_fn,
                                out_flnc_fn=self.out_flnc_fn,
                                out_flc_fn=self.out_flc_fn,
                                primer_report_fl_fn=self._primer_report_fl_fn)
        # full-length non-chimeric reads written to out_flnc.fa
        # non-full-length reads written to out_nfl.fa
        # primer info of fl reads reported to _primer_report_fl_fn
        # primer info of nfl reads reported to _primer_report_nfl_fn
        # Need to: (1) concatenate out_flnc and out_nfl to make
        #              out_all_reads_fn
        #          (2) concatenate _primer_report_fl_fn and
        #              _primer_report_nfl_fn to make primer_report_fn
        cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                  dst=self.out_all_reads_fn)

        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        if need_cleanup:
            self._cleanup(self.chunked_trimmed_reads_fns +
                          self.chunked_trimmed_reads_dom_fns)
        logging.info("Done with chimera detection.")
Exemple #7
0
    def runChimeraDetector(self):
        """Detect chimeras from trimmed reads."""
        logging.info("Start to detect chimeras from trimmed reads.")
        need_cleanup = True
        if os.path.exists(self.out_trimmed_reads_dom_fn):
            logging.info("Output already exists. Parsing {0}.".format(
                self.out_trimmed_reads_dom_fn))
            need_cleanup = False
        else:
            # Create forward/reverse primers for chimera detection.
            _primer_indices = self._processPrimers(
                primer_fn=self.primer_fn,
                window_size=self.chimera_detection_opts.primer_search_window,
                primer_out_fn=self.primer_chimera_fn,
                revcmp_primers=True)

            num_chunks = max(min(self.summary.num_fl, self.cpus), 1)
            #logging.debug("Split non-full-length reads into {n} chunks.".
            #              format(n=num_chunks))
            # Only detect chimeras on full-length reads in order to save time
            reads_per_chunk = int(
                math.ceil(self.summary.num_fl / (float(num_chunks))))
            num_chunks = int(
                math.ceil(self.summary.num_fl / float(reads_per_chunk)))

            self.chunked_trimmed_reads_fns = generateChunkedFN(
                self.out_dir, "in.trimmed.fa_split", num_chunks)

            self.chunked_trimmed_reads_dom_fns = generateChunkedFN(
                self.out_dir, "out.trimmed.hmmer_split", num_chunks)

            self._chunkReads(reads_fn=self._trimmed_fl_reads_fn,
                             reads_per_chunk=reads_per_chunk,
                             chunked_reads_fns=self.chunked_trimmed_reads_fns,
                             extract_front_back_only=False)

            self._startPhmmers(self.chunked_trimmed_reads_fns,
                               self.chunked_trimmed_reads_dom_fns,
                               self.out_trimmed_reads_dom_fn,
                               self.primer_chimera_fn, self.pbmatrix_fn)

        suspicous_hits = self._getChimeraRecord(self.out_trimmed_reads_dom_fn,
                                                self.chimera_detection_opts)

        # Only detect chimeras on full-length reads in order to save time
        self._updateChimeraInfo(suspicous_hits=suspicous_hits,
                                in_read_fn=self._trimmed_fl_reads_fn,
                                out_flnc_fn=self.out_flnc_fn,
                                out_flc_fn=self.out_flc_fn,
                                primer_report_fl_fn=self._primer_report_fl_fn)
        # full-length non-chimeric reads written to out_flnc.fa
        # non-full-length reads written to out_nfl.fa
        # primer info of fl reads reported to _primer_report_fl_fn
        # primer info of nfl reads reported to _primer_report_nfl_fn
        # Need to: (1) concatenate out_flnc and out_nfl to make
        #              out_all_reads_fn
        #          (2) concatenate _primer_report_fl_fn and
        #              _primer_report_nfl_fn to make primer_report_fn
        cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                  dst=self.out_all_reads_fn)

        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        if need_cleanup:
            self._cleanup(self.chunked_trimmed_reads_fns +
                          self.chunked_trimmed_reads_dom_fns)
        logging.info("Done with chimera detection.")