def run(self):
     """Run"""
     logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                             v=get_version()))
     args = self.args
     try:
         convert_fofn_to_fasta(fofn_filename=args.input_fofn,
                               out_filename=args.fasta_fofn,
                               fasta_out_dir=args.fasta_out_dir,
                               force_overwrite=False)
     except:
         logging.exception("Failed to convert fofn {f} to fasta.".
                           format(f=args.input_fofn))
         return 1
     return 0
 def run(self):
     """Run"""
     logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                             v=get_version()))
     args = self.args
     try:
         convert_fofn_to_fasta(fofn_filename=args.input_fofn,
                               out_filename=args.fasta_fofn,
                               fasta_out_dir=args.fasta_out_dir,
                               force_overwrite=False)
     except:
         logging.exception("Failed to convert fofn {f} to fasta.".format(
             f=args.input_fofn))
         return 1
     return 0
Example #3
0
    def run(self):
        """
        First, split non-full-length (nfl) fasta files into smaller
        chunks, assign nfl reads in each splitted fasta file
        into unpolished isoform clusters and then merge all pickles
        into self.nfl_all_pickle_fn.
        Second, bin every 100 clusters, for each bin, call blasr,
        samto5h, loadPulses, cmph5tools to create cmp.h5 files and
        call quiver to polish each isoforms within each bin.
        Finally, pick up good isoform clusters whose QV errors is less
        than a threshold.
        Save all high quality isoforms to hq_isoforms_fa|fq if they are not None
        Save all low quality isoforms to lq_isoforms_fa|fq if they are not None
        """
        if guess_file_format(self.bas_fofn) != FILE_FORMATS.BAM:
            # Create input.fasta.fofn from bas_fofn
            self.add_log("Creating fasta fofn from bas/bax.h5/bam fofn",
                         level=logging.INFO)
            if self.fasta_fofn is None:
                self.fasta_fofn = op.join(self.nfl_dir, "input.fasta.fofn")
            self.add_log("fasta fofn={f}".format(f=self.fasta_fofn))
            convert_fofn_to_fasta(fofn_filename=self.bas_fofn,
                                  out_filename=self.fasta_fofn,
                                  fasta_out_dir=self.nfl_dir)
        else:
            self.fasta_fofn = None

        # Split non-full-length reads into smaller fasta files
        # and save files to root_dir/nfl_00.fasta, ..., .
        self.add_log("Splitting {nfl} into ".format(nfl=self.nfl_fa) +
                     "smaller files each containing {n} reads.".format(
                         n=self.ice_opts.nfl_reads_per_split),
                     level=logging.INFO)
        self._nfl_splitted_fas = splitFasta(
            input_fasta=self.nfl_fa,
            reads_per_split=self.ice_opts.nfl_reads_per_split,
            out_dir=self.nfl_dir,
            out_prefix="input.split")
        msg = "Splitted files are: " + "\n".join(self._nfl_splitted_fas)
        self.add_log(msg, level=logging.INFO)

        # Generating dazz DB for final.consensus.fasta
        ref_obj = DazzIDHandler(input_filename=self.final_consensus_fa,
                                converted=False)
        ref_obj.make_db()
        msg = "Dazz DB made for: " + ref_obj.dazz_filename
        self.add_log(msg, level=logging.INFO)

        # Process nfl reads in each splitted fasta.
        self.add_log("Initializing IceAllPartials.", level=logging.INFO)

        self.icep = IceAllPartials(root_dir=self.root_dir,
                                   fasta_filenames=self._nfl_splitted_fas,
                                   ref_fasta=self.final_consensus_fa,
                                   out_pickle=self.nfl_all_pickle_fn,
                                   sge_opts=self.sge_opts,
                                   ccs_fofn=self.ccs_fofn)
        self.add_log("IceAllPartials log: {f}.".format(f=self.icep.log_fn),
                     level=logging.INFO)
        self.icep.run()
        self.add_log("IceAllPartials completed.", level=logging.INFO)

        self.add_log("Initializing IceQuiver.", level=logging.INFO)
        self.iceq = IceQuiver(root_dir=self.root_dir,
                              bas_fofn=self.bas_fofn,
                              fasta_fofn=self.fasta_fofn,
                              sge_opts=self.sge_opts,
                              tmp_dir=self.tmp_dir)
        self.add_log("IceQuiver log: {f}.".format(f=self.iceq.log_fn),
                     level=logging.INFO)
        self.iceq.run()
        self.add_log("IceQuiver finished.", level=logging.INFO)

        self.add_log("Initializing IceQuiverPostprocess.", level=logging.INFO)
        self.icepq = IceQuiverPostprocess(root_dir=self.root_dir,
                                          use_sge=self.sge_opts.use_sge,
                                          quit_if_not_done=False,
                                          ipq_opts=self.ipq_opts)
        self.add_log(
            "IceQuiverPostprocess log: {f}.".format(f=self.icepq.log_fn),
            level=logging.INFO)
        self.icepq.run()
        self.add_log("IceQuiverPostprocess finished.", level=logging.INFO)
Example #4
0
    def run(self):
        """
        First, split non-full-length (nfl) fasta files into smaller
        chunks, assign nfl reads in each splitted fasta file
        into unpolished isoform clusters and then merge all pickles
        into self.nfl_all_pickle_fn.
        Second, bin every 100 clusters, for each bin, call blasr,
        samto5h, loadPulses, cmph5tools to create cmp.h5 files and
        call quiver to polish each isoforms within each bin.
        Finally, pick up good isoform clusters whose QV errors is less
        than a threshold.
        Save all high quality isoforms to hq_isoforms_fa|fq if they are not None
        Save all low quality isoforms to lq_isoforms_fa|fq if they are not None
        """
        if guess_file_format(self.bas_fofn) != FILE_FORMATS.BAM:
            # Create input.fasta.fofn from bas_fofn
            self.add_log("Creating fasta fofn from bas/bax.h5/bam fofn",
                         level=logging.INFO)
            if self.fasta_fofn is None:
                self.fasta_fofn = op.join(self.nfl_dir, "input.fasta.fofn")
            self.add_log("fasta fofn={f}".format(f=self.fasta_fofn))
            convert_fofn_to_fasta(fofn_filename=self.bas_fofn,
                                  out_filename=self.fasta_fofn,
                                  fasta_out_dir=self.nfl_dir)
        else:
            self.fasta_fofn = None

        # Split non-full-length reads into smaller fasta files
        # and save files to root_dir/nfl_00.fasta, ..., .
        self.add_log("Splitting {nfl} into ".format(nfl=self.nfl_fa) +
                     "smaller files each containing {n} reads.".format(
                     n=self.ice_opts.nfl_reads_per_split),
                     level=logging.INFO)
        self._nfl_splitted_fas = splitFasta(input_fasta=self.nfl_fa,
                                            reads_per_split=self.ice_opts.nfl_reads_per_split,
                                            out_dir=self.nfl_dir,
                                            out_prefix="input.split")
        msg = "Splitted files are: " + "\n".join(self._nfl_splitted_fas)
        self.add_log(msg, level=logging.INFO)

        # Generating dazz DB for final.consensus.fasta
        ref_obj = DazzIDHandler(input_filename=self.final_consensus_fa, converted=False)
        ref_obj.make_db()
        msg = "Dazz DB made for: " + ref_obj.dazz_filename
        self.add_log(msg, level=logging.INFO)

        # Process nfl reads in each splitted fasta.
        self.add_log("Initializing IceAllPartials.", level=logging.INFO)

        self.icep = IceAllPartials(
            root_dir=self.root_dir,
            fasta_filenames=self._nfl_splitted_fas,
            ref_fasta=self.final_consensus_fa,
            out_pickle=self.nfl_all_pickle_fn,
            sge_opts=self.sge_opts,
            ccs_fofn=self.ccs_fofn)
        self.add_log("IceAllPartials log: {f}.".format(f=self.icep.log_fn),
                     level=logging.INFO)
        self.icep.run()
        self.add_log("IceAllPartials completed.", level=logging.INFO)

        self.add_log("Initializing IceQuiver.", level=logging.INFO)
        self.iceq = IceQuiver(root_dir=self.root_dir,
                              bas_fofn=self.bas_fofn,
                              fasta_fofn=self.fasta_fofn,
                              sge_opts=self.sge_opts,
                              tmp_dir=self.tmp_dir)
        self.add_log("IceQuiver log: {f}.".format(f=self.iceq.log_fn),
                     level=logging.INFO)
        self.iceq.run()
        self.add_log("IceQuiver finished.", level=logging.INFO)

        self.add_log("Initializing IceQuiverPostprocess.", level=logging.INFO)
        self.icepq = IceQuiverPostprocess(root_dir=self.root_dir,
                                          use_sge=self.sge_opts.use_sge,
                                          quit_if_not_done=False,
                                          ipq_opts=self.ipq_opts)
        self.add_log("IceQuiverPostprocess log: {f}.".
                     format(f=self.icepq.log_fn), level=logging.INFO)
        self.icepq.run()
        self.add_log("IceQuiverPostprocess finished.", level=logging.INFO)