Ejemplo n.º 1
0
    def run(self):
        """Assigning nfl reads to consensus isoforms and merge."""
        # Call ice_partial.py to create a pickle for each splitted nfl fasta
        self.createPickles()
        # Wait for pickles to be created, if SGE is used.
        self.waitForPickles(pickle_filenames=self.pickle_filenames,
                            done_filenames=self.done_filenames)
        # Combine all pickles to a big pickle file: nfl_all_pickle_fn.
        self.combinePickles(pickle_filenames=self.pickle_filenames,
                            out_pickle=self.nfl_all_pickle_fn)
        # Create symbolic link if necessary
        ln(self.nfl_all_pickle_fn, self.out_pickle)

        # Close log
        self.close_log()
Ejemplo n.º 2
0
    def run(self):
        """Assigning nfl reads to consensus isoforms and merge."""
        # Call ice_partial.py to create a pickle for each splitted nfl fasta
        self.createPickles()
        # Wait for pickles to be created, if SGE is used.
        self.waitForPickles(pickle_filenames=self.pickle_filenames,
                            done_filenames=self.done_filenames)
        # Combine all pickles to a big pickle file: nfl_all_pickle_fn.
        self.combinePickles(pickle_filenames=self.pickle_filenames,
                            out_pickle=self.nfl_all_pickle_fn)
        # Create symbolic link if necessary
        ln(self.nfl_all_pickle_fn, self.out_pickle)

        # Close log
        self.close_log()
Ejemplo n.º 3
0
    def run(self):
        """Check all quiver jobs are running, failed or done. Write high-quality
        consensus and low-quality consensus to all_quivered.good|bad.fa|fq.
        """
        job_stats = self.check_quiver_jobs_completion()
        self.add_log("quiver job status: {s}".format(s=job_stats))

        if self.use_sge is not True and job_stats != "DONE":
            self.add_log("quiver jobs were not submitted via sge, " +
                         "however are still incomplete. Please check.",
                         level=logging.ERROR)
            return -1
        elif self.use_sge is True:
            while job_stats != "DONE":
                self.add_log("Sleeping for 180 seconds.")
                sleep(180)
                job_stats = self.check_quiver_jobs_completion()
                if job_stats == "DONE":
                    break
                elif job_stats == "FAILED":
                    self.add_log("There are some failed jobs. Please check.",
                                 level=logging.ERROR)
                    return 1
                elif job_stats == "RUNNING":
                    self.add_log("There are jobs still running, waiting...",
                                 level=logging.INFO)
                    if self.quit_if_not_done is True:
                        return 0
                else:
                    msg = "Unable to recognize job_stats {s}".format(job_stats)
                    self.add_log(msg, logging.ERROR)
                    raise ValueError(msg)

        self.pickup_best_clusters(self.fq_filenames)

        self.add_log("Creating polished high quality consensus isoforms.")
        if self.hq_isoforms_fa is not None:
            ln(self.quivered_good_fa, self.hq_isoforms_fa)
        if self.hq_isoforms_fq is not None:
            ln(self.quivered_good_fq, self.hq_isoforms_fq)

        self.add_log("Creating polished low quality consensus isoforms.")
        if self.lq_isoforms_fa is not None:
            ln(self.quivered_bad_fa, self.lq_isoforms_fa)
        if self.lq_isoforms_fq is not None:
            ln(self.quivered_bad_fq, self.lq_isoforms_fq)

        if self.summary_fn is not None:
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.final_consensus_fa,
                               hq_fa=self.hq_isoforms_fa,
                               lq_fa=self.lq_isoforms_fa)

        self.close_log()
Ejemplo n.º 4
0
    def run(self):
        """Check all quiver jobs are running, failed or done. Write high-quality
        consensus and low-quality consensus to all_quivered.good|bad.fa|fq.
        """
        job_stats = self.check_quiver_jobs_completion()
        self.add_log("quiver job status: {s}".format(s=job_stats))

        if self.use_sge is not True and job_stats != "DONE":
            self.add_log("quiver jobs were not submitted via sge, " +
                         "however are still incomplete. Please check.",
                         level=logging.ERROR)
            return -1
        elif self.use_sge is True:
            while job_stats != "DONE":
                self.add_log("Sleeping for 180 seconds.")
                sleep(180)
                job_stats = self.check_quiver_jobs_completion()
                if job_stats == "DONE":
                    break
                elif job_stats == "FAILED":
                    self.add_log("There are some failed jobs. Please check.",
                                 level=logging.ERROR)
                    return 1
                elif job_stats == "RUNNING":
                    self.add_log("There are jobs still running, waiting...",
                                 level=logging.INFO)
                    if self.quit_if_not_done is True:
                        return 0
                else:
                    msg = "Unable to recognize job_stats {s}".format(job_stats)
                    self.add_log(msg, logging.ERROR)
                    raise ValueError(msg)

        self.pickup_best_clusters(self.fq_filenames)

        self.add_log("Creating polished high quality consensus isoforms.")
        if self.hq_isoforms_fa is not None:
            ln(self.quivered_good_fa, self.hq_isoforms_fa)
        if self.hq_isoforms_fq is not None:
            ln(self.quivered_good_fq, self.hq_isoforms_fq)

        self.add_log("Creating polished low quality consensus isoforms.")
        if self.lq_isoforms_fa is not None:
            ln(self.quivered_bad_fa, self.lq_isoforms_fa)
        if self.lq_isoforms_fq is not None:
            ln(self.quivered_bad_fq, self.lq_isoforms_fq)

        if self.summary_fn is not None:
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.final_consensus_fa,
                               hq_fa=self.hq_isoforms_fa,
                               lq_fa=self.lq_isoforms_fa)

        self.close_log()
Ejemplo n.º 5
0
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn=self.primer_fn,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert(self.summary.num_fl == self.summary.num_flnc +
               self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info("Detect chimeric reads from trimmed non-full-length " +
                         "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert(self.summary.num_nfl == self.summary.num_nflnc +
                   self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn, self.out_nflc_fn],
                      dst=self.out_nfl_fn)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nflnc_fn],
                      dst=self.out_all_reads_fn)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                      dst=self.out_all_reads_fn)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn,
                       self._primer_report_fl_fn])
Ejemplo n.º 6
0
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn_forward=self.primer_fn_forward,
            primer_fn_reverse=self.primer_fn_reverse,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert(self.summary.num_fl == self.summary.num_flnc +
               self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info("Detect chimeric reads from trimmed non-full-length " +
                         "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert(self.summary.num_nfl == self.summary.num_nflnc +
                   self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn, self.out_nflc_fn],
                      dst=self.out_nfl_fn)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nflnc_fn],
                      dst=self.out_all_reads_fn)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn, self.out_nfl_fn],
                      dst=self.out_all_reads_fn)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn,
                       self._primer_report_fl_fn])
Ejemplo n.º 7
0
    def run(self):
        """Call ICE to cluster consensus isoforms."""
        self.add_log("Start to run cluster.", level=logging.INFO)

        if self.ice_opts.targeted_isoseq:
            first_split = 1000
            self.ice_opts.flnc_reads_per_split = 10000
            self.add_log(
                "targeted_isoseq: further splitting JUST first split to 1000. Changing flnc_reads_per_split=10000."
            )
        else:
            first_split = None

        # Split flnc_fa into smaller files and save files to _flnc_splitted_fas.
        self.add_log(
            "Splitting {flnc} into ".format(flnc=self.flnc_fa)
            + "smaller files each containing {n} reads.".format(n=self.ice_opts.flnc_reads_per_split),
            level=logging.INFO,
        )

        self._flnc_splitted_fas = splitFasta(
            input_fasta=self.flnc_fa,
            reads_per_split=self.ice_opts.flnc_reads_per_split,
            out_dir=self.root_dir,
            out_prefix="input.split",
            first_split=first_split,
        )

        self.add_log("Splitted files are: " + "\n".join(self._flnc_splitted_fas), level=logging.INFO)

        firstSplit = self._flnc_splitted_fas[0]
        firstSplit_fq = firstSplit[: firstSplit.rfind(".")] + ".fastq"
        self.add_log(
            "Converting first split file {0} + {1} into fastq\n".format(firstSplit, self.ccs_fofn), level=logging.INFO
        )
        # Convert this into FASTQ
        ice_fa2fq(firstSplit, self.ccs_fofn, firstSplit_fq)

        # Set up probabbility and quality value model
        if self.ice_opts.use_finer_qv:
            self._setProbQV_ccs(self.ccs_fofn, firstSplit)
        else:
            self._setProbQV_fq(firstSplitFq=firstSplit_fq)

        # Initialize cluster by clique
        if os.path.exists(self.initPickleFN):
            self.add_log("Reading existing uc pickle: {0}".format(self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN) as f:
                uc = cPickle.load(f)
        else:
            self.add_log("Finding maximal cliques: initializing IceInit.", level=logging.INFO)
            self.iceinit = IceInit(
                readsFa=firstSplit,
                qver_get_func=self._probqv.get_smoothed,
                ice_opts=self.ice_opts,
                sge_opts=self.sge_opts,
                qvmean_get_func=self._probqv.get_mean,
            )
            uc = self.iceinit.uc

            # Dump uc to a file
            self.add_log("Dumping initial clusters to {f}".format(f=self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN, "w") as f:
                cPickle.dump(uc, f)

        # Run IceIterative.
        self.add_log("Iterative clustering: initializing IceIterative.", level=logging.INFO)
        # self.add_log("In Cluster. DEBUG: Calling Cluster with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))

        self.icec = IceIterative(
            fasta_filename=firstSplit,
            fasta_filenames_to_add=self._flnc_splitted_fas[1:],
            all_fasta_filename=self.flnc_fa,
            ccs_fofn=self.ccs_fofn,
            root_dir=self.root_dir,
            ice_opts=self.ice_opts,
            sge_opts=self.sge_opts,
            uc=uc,
            probQV=self._probqv,
            fastq_filename=firstSplit_fq,
            use_ccs_qv=self.ice_opts.use_finer_qv,
        )
        self.add_log("IceIterative log: {f}.".format(f=self.icec.log_fn))
        self.icec.run()
        self.add_log("IceIterative completed.", level=logging.INFO)

        # IceIterative done, write predicted (unplished) consensus isoforms
        # to an output fasta
        self.add_log("Creating a link to unpolished consensus isoforms.")
        ln(self.icec.final_consensus_fa, self.out_fa)

        # self.add_log("In Cluster. DEBUG: End Cluster with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))

        # Call quiver to polish predicted consensus isoforms.
        if self.ice_opts.quiver is not True:
            self.add_log("Creating a link to cluster report.", level=logging.INFO)
            ln(src=self.icec.report_fn, dst=self.report_fn)

            # Summarize cluster and write to summary_fn.
            self.write_summary(summary_fn=self.summary_fn, isoforms_fa=self.out_fa)
        else:  # self.ice_opts.quiver is True
            self.add_log("Polishing clusters: initializing IcePolish.", level=logging.INFO)
            # self.add_log("In Cluster. DEBUG: Calling Polish with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))
            self.pol = Polish(
                root_dir=self.root_dir,
                nfl_fa=self.nfl_fa,
                bas_fofn=self.bas_fofn,
                ccs_fofn=self.ccs_fofn,
                fasta_fofn=self.fasta_fofn,
                ice_opts=self.ice_opts,
                sge_opts=self.sge_opts,
                ipq_opts=self.ipq_opts,
                nfl_reads_per_split=self.nfl_reads_per_split,
            )
            self.add_log("IcePolish log: {f}.".format(f=self.pol.log_fn), level=logging.INFO)
            self.pol.run()
            self.add_log("IcePolish completed.", level=logging.INFO)

            # cluster report
            self.add_log("Creating a link to cluster report.", level=logging.INFO)
            ln(src=self.pol.iceq.report_fn, dst=self.report_fn)

            # Summarize cluster & polish and write to summary_fn.
            self.write_summary(
                summary_fn=self.summary_fn,
                isoforms_fa=self.out_fa,
                hq_fa=self.pol.icepq.quivered_good_fa,
                lq_fa=self.pol.icepq.quivered_bad_fa,
            )

        # Create log file.
        self.close_log()
        return 0
Ejemplo n.º 8
0
    def run(self):
        """Call ICE to cluster consensus isoforms."""
        self.add_log("Start to run cluster.", level=logging.INFO)

        #self.ice_opts.flnc_reads_per_split=1000 #FOR DEBUGGING, REMOVE LATER
        # Split flnc_fa into smaller files and save files to _flnc_splitted_fas.
        self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) +
                     "smaller files each containing {n} reads.".format(
                         n=self.ice_opts.flnc_reads_per_split),
                     level=logging.INFO)
        self._flnc_splitted_fas = splitFasta(
            input_fasta=self.flnc_fa,
            reads_per_split=self.ice_opts.flnc_reads_per_split,
            out_dir=self.root_dir,
            out_prefix="input.split")
        self.add_log("Splitted files are: " +
                     "\n".join(self._flnc_splitted_fas),
                     level=logging.INFO)

        firstSplit = self._flnc_splitted_fas[0]
        firstSplit_fq = firstSplit[:firstSplit.rfind('.')] + '.fastq'
        self.add_log("Converting first split file {0} + {1} into fastq\n".format(\
                firstSplit, self.ccs_fofn), level=logging.INFO)
        # Convert this into FASTQ
        ice_fa2fq(firstSplit, self.ccs_fofn, firstSplit_fq)

        # Set up probabbility and quality value model
        if self.ice_opts.use_finer_qv:
            self._setProbQV_ccs(self.ccs_fofn, firstSplit)
        else:
            self._setProbQV_fq(firstSplitFq=firstSplit_fq)

        # Initialize cluster by clique
        self.add_log("Finding maximal cliques: initializing IceInit.",
                     level=logging.INFO)
        self.iceinit = IceInit(readsFa=firstSplit,
                               qver_get_func=self._probqv.get_smoothed,
                               ice_opts=self.ice_opts,
                               sge_opts=self.sge_opts)
        uc = self.iceinit.uc

        # Dump uc to a file
        self.add_log(
            "Dumping initial clusters to {f}".format(f=self.initPickleFN),
            level=logging.INFO)
        with open(self.initPickleFN, 'w') as f:
            cPickle.dump(uc, f)

        # Run IceIterative.
        self.add_log("Iterative clustering: initializing IceIterative.",
                     level=logging.INFO)
        self.icec = IceIterative(
            fasta_filename=firstSplit,
            fasta_filenames_to_add=self._flnc_splitted_fas[1:],
            all_fasta_filename=self.flnc_fa,
            ccs_fofn=self.ccs_fofn,
            root_dir=self.root_dir,
            ice_opts=self.ice_opts,
            sge_opts=self.sge_opts,
            uc=uc,
            probQV=self._probqv,
            fastq_filename=firstSplit_fq,
            use_ccs_qv=self.ice_opts.use_finer_qv)
        self.add_log("IceIterative log: {f}.".format(f=self.icec.log_fn))
        self.icec.run()
        self.add_log("IceIterative completed.", level=logging.INFO)

        # IceIterative done, write predicted (unplished) consensus isoforms
        # to an output fasta
        self.add_log("Creating a link to unpolished consensus isoforms.")
        ln(self.icec.final_consensus_fa, self.out_fa)

        # Call quiver to polish predicted consensus isoforms.
        if self.ice_opts.quiver is not True:
            self.add_log("Creating a link to cluster report.",
                         level=logging.INFO)
            ln(src=self.icec.report_fn, dst=self.report_fn)

            # Summarize cluster and write to summary_fn.
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.out_fa)
        else:  # self.ice_opts.quiver is True
            self.add_log("Polishing clusters: initializing IcePolish.",
                         level=logging.INFO)
            self.pol = Polish(root_dir=self.root_dir,
                              nfl_fa=self.nfl_fa,
                              bas_fofn=self.bas_fofn,
                              ccs_fofn=self.ccs_fofn,
                              fasta_fofn=self.fasta_fofn,
                              ice_opts=self.ice_opts,
                              sge_opts=self.sge_opts,
                              ipq_opts=self.ipq_opts,
                              nfl_reads_per_split=self.nfl_reads_per_split)
            self.add_log("IcePolish log: {f}.".format(f=self.pol.log_fn),
                         level=logging.INFO)
            self.pol.run()
            self.add_log("IcePolish completed.", level=logging.INFO)

            # cluster report
            self.add_log("Creating a link to cluster report.",
                         level=logging.INFO)
            ln(src=self.pol.iceq.report_fn, dst=self.report_fn)

            # Summarize cluster & polish and write to summary_fn.
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.out_fa,
                               hq_fa=self.pol.icepq.quivered_good_fa,
                               lq_fa=self.pol.icepq.quivered_bad_fa)

        # Create log file.
        self.close_log()
        return 0
Ejemplo n.º 9
0
    def run(self):
        """Call ICE to cluster consensus isoforms."""
        self.add_log("Start to run cluster.", level=logging.INFO)

        # Split flnc_fa into smaller files and save files to _flnc_splitted_fas.
        self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) +
                     "smaller files each containing {n} reads.".format(
                     n=self.ice_opts.flnc_reads_per_split),
                     level=logging.INFO)
        self._flnc_splitted_fas = splitFasta(
            input_fasta=self.flnc_fa,
            reads_per_split=self.ice_opts.flnc_reads_per_split,
            out_dir=self.root_dir,
            out_prefix="input.split")
        self.add_log("Splitted files are: " +
                     "\n".join(self._flnc_splitted_fas),
                     level=logging.INFO)

        firstSplit = self._flnc_splitted_fas[0]
        # Set up probabbility and quality value model
        self._setProbQV(ccs_fofn=self.ccs_fofn, firstSplitFa=firstSplit)

        # Initialize cluster by clique
        # check if init.pickle already exists, if so, no need to run IceInit
        if os.path.exists(self.initPickleFN):
            self.add_log("{0} already exists. Reading to get uc.".format(self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN) as f:
                uc = cPickle.load(f)
        else:
            self.add_log("Finding maximal cliques.", level=logging.INFO)
            self.iceinit = IceInit(readsFa=firstSplit,
                      qver_get_func=self._probqv.get_smoothed,
                      ice_opts=self.ice_opts,
                      sge_opts=self.sge_opts)
            uc = self.iceinit.uc
            # Dump uc to a file
            self.add_log("Dumping initial clusters to {f}".format(
                         f=self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN, 'w') as f:
                cPickle.dump(uc, f)

        # Run IceIterative.
        self.add_log("Iteratively clustering.", level=logging.INFO)
        self.icec = IceIterative(
                fasta_filename=firstSplit,
                fasta_filenames_to_add=self._flnc_splitted_fas[1:],
                all_fasta_filename=self.flnc_fa,
                ccs_fofn=self.ccs_fofn,
                root_dir=self.root_dir,
                ice_opts=self.ice_opts,
                sge_opts=self.sge_opts,
                uc=uc,
                probQV=self._probqv)
        self.icec.run()
        clean_up_after_ICE(self.root_dir)

        # IceIterative done, write predicted (unplished) consensus isoforms
        # to an output fasta
        self.add_log("Creating a link to unpolished consensus isoforms.")
        ln(self.icec.final_consensus_fa, self.out_fa)

        # Call quiver to polish predicted consensus isoforms.
        if self.ice_opts.quiver is not True:
            self.add_log("Creating a link to cluster report.")
            ln(src=self.icec.report_fn, dst=self.report_fn)

            self.add_log("Writing a summary to {f}".format(f=self.summary_fn),
                         level=logging.INFO)
            self.writeSummary(fa=self.out_fa, summary_fn=self.summary_fn)
        else:  # self.ice_opts.quiver is True
            #TODO review code
            self.pol = Polish(root_dir=self.root_dir,
                         nfl_fa=self.nfl_fa,
                         bas_fofn=self.bas_fofn,
                         ccs_fofn=self.ccs_fofn,
                         hq_isoforms_fa=self.hq_isoforms_fa,
                         hq_isoforms_fq=self.hq_isoforms_fq,
                         lq_isoforms_fa=self.lq_isoforms_fa,
                         lq_isoforms_fq=self.lq_isoforms_fq,
                         ice_opts=self.ice_opts,
                         sge_opts=self.sge_opts)
            self.pol.run()

            # cluster report
            self.add_log("Creating a link to cluster report.")
            ln(src=self.pol.iceq.report_fn, dst=self.report_fn)

            # Write a summary.
            self.add_log("Writing a summary to {f}".format(f=self.summary_fn),
                         level=logging.INFO)
            self.writeSummary(fa=self.out_fa, summary_fn=self.summary_fn,
                              hq_fa=self.pol.icepq.quivered_good_fa,
                              lq_fa=self.pol.icepq.quivered_bad_fa)

        # Create log file.
        self.close_log()
        return 0
Ejemplo n.º 10
0
    def run(self):
        """Call ICE to cluster consensus isoforms."""
        self.add_log("Start to run cluster.", level=logging.INFO)

        # Split flnc_fa into smaller files and save files to _flnc_splitted_fas.
        self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) +
                     "smaller files each containing {n} reads.".format(
                         n=self.ice_opts.flnc_reads_per_split),
                     level=logging.INFO)
        self._flnc_splitted_fas = splitFasta(
            input_fasta=self.flnc_fa,
            reads_per_split=self.ice_opts.flnc_reads_per_split,
            out_dir=self.root_dir,
            out_prefix="input.split")
        self.add_log("Splitted files are: " +
                     "\n".join(self._flnc_splitted_fas),
                     level=logging.INFO)

        firstSplit = self._flnc_splitted_fas[0]
        # Set up probabbility and quality value model
        self._setProbQV(ccs_fofn=self.ccs_fofn, firstSplitFa=firstSplit)

        # Initialize cluster by clique
        # check if init.pickle already exists, if so, no need to run IceInit
        if os.path.exists(self.initPickleFN):
            self.add_log("{0} already exists. Reading to get uc.".format(
                self.initPickleFN),
                         level=logging.INFO)
            with open(self.initPickleFN) as f:
                uc = cPickle.load(f)
        else:
            self.add_log("Finding maximal cliques.", level=logging.INFO)
            self.iceinit = IceInit(readsFa=firstSplit,
                                   qver_get_func=self._probqv.get_smoothed,
                                   ice_opts=self.ice_opts,
                                   sge_opts=self.sge_opts)
            uc = self.iceinit.uc
            # Dump uc to a file
            self.add_log(
                "Dumping initial clusters to {f}".format(f=self.initPickleFN),
                level=logging.INFO)
            with open(self.initPickleFN, 'w') as f:
                cPickle.dump(uc, f)

        # Run IceIterative.
        self.add_log("Iteratively clustering.", level=logging.INFO)
        self.icec = IceIterative(
            fasta_filename=firstSplit,
            fasta_filenames_to_add=self._flnc_splitted_fas[1:],
            all_fasta_filename=self.flnc_fa,
            ccs_fofn=self.ccs_fofn,
            root_dir=self.root_dir,
            ice_opts=self.ice_opts,
            sge_opts=self.sge_opts,
            uc=uc,
            probQV=self._probqv)
        self.icec.run()
        clean_up_after_ICE(self.root_dir)

        # IceIterative done, write predicted (unplished) consensus isoforms
        # to an output fasta
        self.add_log("Creating a link to unpolished consensus isoforms.")
        ln(self.icec.final_consensus_fa, self.out_fa)

        # Call quiver to polish predicted consensus isoforms.
        if self.ice_opts.quiver is not True:
            self.add_log("Creating a link to cluster report.")
            ln(src=self.icec.report_fn, dst=self.report_fn)

            self.add_log("Writing a summary to {f}".format(f=self.summary_fn),
                         level=logging.INFO)
            self.writeSummary(fa=self.out_fa, summary_fn=self.summary_fn)
        else:  # self.ice_opts.quiver is True
            #TODO review code
            self.pol = Polish(root_dir=self.root_dir,
                              nfl_fa=self.nfl_fa,
                              bas_fofn=self.bas_fofn,
                              ccs_fofn=self.ccs_fofn,
                              hq_isoforms_fa=self.hq_isoforms_fa,
                              hq_isoforms_fq=self.hq_isoforms_fq,
                              lq_isoforms_fa=self.lq_isoforms_fa,
                              lq_isoforms_fq=self.lq_isoforms_fq,
                              ice_opts=self.ice_opts,
                              sge_opts=self.sge_opts)
            self.pol.run()

            # cluster report
            self.add_log("Creating a link to cluster report.")
            ln(src=self.pol.iceq.report_fn, dst=self.report_fn)

            # Write a summary.
            self.add_log("Writing a summary to {f}".format(f=self.summary_fn),
                         level=logging.INFO)
            self.writeSummary(fa=self.out_fa,
                              summary_fn=self.summary_fn,
                              hq_fa=self.pol.icepq.quivered_good_fa,
                              lq_fa=self.pol.icepq.quivered_bad_fa)

        # Create log file.
        self.close_log()
        return 0