Esempio n. 1
0
    def __init__(self,
                 root_dir,
                 fasta_filenames,
                 fastq_filenames,
                 ref_fasta,
                 out_pickle,
                 ice_opts,
                 sge_opts,
                 cpus,
                 tmp_dir=None):
        """
        fasta_filenames --- a list of splitted nfl fasta files.

        ref_fasta --- (unpolished) consensus isoforms

        out_pickle --- a pickle file with all nfl fasta reads

        root_dir --- ICE root output directory

        tmp_dir --- if not None, write temporary clusters, dazz, las
                    files to the given temporaray directory

        sge_opts --- params for SGE environment, including
            use_sge    : use SGE or not
            max_sge_jobs: maximum number of sub-jobs submitted
            unique_id  : unique qsub job id, important that this
                        DOES NOT CONFLICT!

        """
        self.prog_name = "IceAllPartials"
        IceFiles.__init__(self,
                          prog_name=self.prog_name,
                          root_dir=root_dir,
                          tmp_dir=tmp_dir)

        self.fasta_filenames, self.ref_fasta = \
            self._validate_inputs(fasta_filenames=fasta_filenames,
                                  ref_fasta=ref_fasta)

        if fastq_filenames is not None:
            for fq in fastq_filenames:
                assert op.exists(fq)

        self.fastq_filenames = fastq_filenames  # note: could be None

        self.out_pickle = out_pickle

        self.ice_opts = ice_opts
        self.sge_opts = sge_opts
        self.cpus = cpus  # this is the number of CPUs to use per SGE job or per local job

        self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir)
        mkdir(self.nfl_dir)

        self.add_log("input fasta files are: " +
                     ", ".join(self.fasta_filenames))
        self.add_log("temp pickle files are: " +
                     ", ".join(self.pickle_filenames))
        self.add_log("out pickle file is: " + self.out_pickle)
        self.add_log("temp directory is: " + str(self.tmp_dir))
Esempio n. 2
0
    def __init__(self, root_dir, nfl_fa, bas_fofn, ccs_fofn,
                 ice_opts, sge_opts, ipq_opts, fasta_fofn=None,
                 tmp_dir=None):
        """
        root_dir --- IceFiles.root_dir, usually data/clusterOutDir
        nfl_fa    --- non-full-length reads in fasta, e.g., isoseq_nfl.fasta
        bas_fofn --- e.g. input.fofn of bas|bax.h5 files
        ccs_fofn --- e.g. ccs.fofn of ccs files.

        ipq_opts --- IceQuiverHQLQOptions
                     qv_trim_5: ignore QV of n bases in the 5' end
                     qv_trim_3: ignore QV of n bases in the 3' end
                     hq_quiver_min_accuracy: minimum allowed quiver accuracy
                                      to mark an isoform as high quality
                     hq_isoforms_fa|fq: polished, hiqh quality consensus
                                        isoforms in fasta|q
                     lq_isoforms_fa|fq: polished, low quality consensus
                                        isoforms in fasta|q
        """
        IceFiles.__init__(self, prog_name="IcePolish", root_dir=root_dir,
                          bas_fofn=bas_fofn, ccs_fofn=ccs_fofn,
                          fasta_fofn=fasta_fofn, tmp_dir=tmp_dir)
        self.nfl_fa = realpath(nfl_fa)
        self.ice_opts = ice_opts
        self.sge_opts = sge_opts
        self.ipq_opts = ipq_opts

        self.add_log("ece_penalty: {0}, ece_min_len: {1}".format(self.ice_opts.ece_penalty, self.ice_opts.ece_min_len))

        self.icep = None   # IceAllPartials.
        self.iceq = None   # IceQuiver
        self.icepq = None  # IceQuiverPostprocess
        self._nfl_splitted_fas = None

        self.validate_inputs()
Esempio n. 3
0
    def __init__(self,
                 root_dir,
                 fasta_filenames,
                 ref_fasta,
                 out_pickle,
                 sge_opts,
                 ccs_fofn=None,
                 tmp_dir=None):
        """
        fasta_filenames --- a list of splitted nfl fasta files.

        ref_fasta --- (unpolished) consensus isoforms

        out_pickle --- a pickle file with all nfl fasta reads

        ccs_fofn --- should be reads_of_insert.fofn or None

        root_dir --- ICE root output directory

        tmp_dir --- if not None, write temporary clusters, dazz, las
                    files to the given temporaray directory

        sge_opts --- params for SGE environment, including
            use_sge    : use SGE or not
            max_sge_jobs: maximum number of sub-jobs submitted
            unique_id  : unique qsub job id, important that this
                        DOES NOT CONFLICT!
            blasr_nproc: blasr -nproc param, number of threads per cpu.

        """
        self.prog_name = "IceAllPartials"
        IceFiles.__init__(self,
                          prog_name=self.prog_name,
                          root_dir=root_dir,
                          tmp_dir=tmp_dir)

        self.fasta_filenames, self.ref_fasta, self.ccs_fofn, = \
            self._validate_inputs(fasta_filenames=fasta_filenames,
                                  ref_fasta=ref_fasta,
                                  ccs_fofn=ccs_fofn)

        self.out_pickle = out_pickle

        self.sge_opts = sge_opts

        self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir)
        mkdir(self.nfl_dir)

        self.add_log("input fasta files are: " +
                     ", ".join(self.fasta_filenames))
        self.add_log("temp pickle files are: " +
                     ", ".join(self.pickle_filenames))
        self.add_log("out pickle file is: " + self.out_pickle)
        self.add_log("temp directory is: " + str(self.tmp_dir))
Esempio n. 4
0
    def __init__(self, root_dir, fasta_filenames, fastq_filenames, ref_fasta,
                 out_pickle, ice_opts, sge_opts, cpus, tmp_dir=None):
        """
        fasta_filenames --- a list of splitted nfl fasta files.

        ref_fasta --- (unpolished) consensus isoforms

        out_pickle --- a pickle file with all nfl fasta reads

        root_dir --- ICE root output directory

        tmp_dir --- if not None, write temporary clusters, dazz, las
                    files to the given temporaray directory

        sge_opts --- params for SGE environment, including
            use_sge    : use SGE or not
            max_sge_jobs: maximum number of sub-jobs submitted
            unique_id  : unique qsub job id, important that this
                        DOES NOT CONFLICT!

        """
        self.prog_name = "IceAllPartials"
        IceFiles.__init__(self, prog_name=self.prog_name, root_dir=root_dir, tmp_dir=tmp_dir)

        self.fasta_filenames, self.ref_fasta = \
            self._validate_inputs(fasta_filenames=fasta_filenames,
                                  ref_fasta=ref_fasta)

        if fastq_filenames is not None:
            for fq in fastq_filenames:
                assert op.exists(fq)

        self.fastq_filenames = fastq_filenames # note: could be None


        self.out_pickle = out_pickle

        self.ice_opts = ice_opts
        self.sge_opts = sge_opts
        self.cpus = cpus # this is the number of CPUs to use per SGE job or per local job


        self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir)
        mkdir(self.nfl_dir)

        self.add_log("input fasta files are: " +
                     ", ".join(self.fasta_filenames))
        self.add_log("temp pickle files are: " +
                     ", ".join(self.pickle_filenames))
        self.add_log("out pickle file is: " + self.out_pickle)
        self.add_log("temp directory is: " + str(self.tmp_dir))
Esempio n. 5
0
    def __init__(self,
                 root_dir,
                 nfl_fa,
                 bas_fofn,
                 ccs_fofn,
                 ice_opts,
                 sge_opts,
                 ipq_opts,
                 fasta_fofn=None,
                 tmp_dir=None):
        """
        root_dir --- IceFiles.root_dir, usually data/clusterOutDir
        nfl_fa    --- non-full-length reads in fasta, e.g., isoseq_nfl.fasta
        bas_fofn --- e.g. input.fofn of bas|bax.h5 files
        ccs_fofn --- e.g. ccs.fofn of ccs files.

        ipq_opts --- IceQuiverHQLQOptions
                     qv_trim_5: ignore QV of n bases in the 5' end
                     qv_trim_3: ignore QV of n bases in the 3' end
                     hq_quiver_min_accuracy: minimum allowed quiver accuracy
                                      to mark an isoform as high quality
                     hq_isoforms_fa|fq: polished, hiqh quality consensus
                                        isoforms in fasta|q
                     lq_isoforms_fa|fq: polished, low quality consensus
                                        isoforms in fasta|q
        """
        IceFiles.__init__(self,
                          prog_name="IcePolish",
                          root_dir=root_dir,
                          bas_fofn=bas_fofn,
                          ccs_fofn=ccs_fofn,
                          fasta_fofn=fasta_fofn,
                          tmp_dir=tmp_dir)
        self.nfl_fa = realpath(nfl_fa)
        self.ice_opts = ice_opts
        self.sge_opts = sge_opts
        self.ipq_opts = ipq_opts

        self.add_log("ece_penalty: {0}, ece_min_len: {1}".format(
            self.ice_opts.ece_penalty, self.ice_opts.ece_min_len))

        self.icep = None  # IceAllPartials.
        self.iceq = None  # IceQuiver
        self.icepq = None  # IceQuiverPostprocess
        self._nfl_splitted_fas = None

        self.validate_inputs()
Esempio n. 6
0
    def __init__(self, root_dir, fasta_filenames, ref_fasta,
                 out_pickle, sge_opts, ccs_fofn=None, tmp_dir=None):
        """
        fasta_filenames --- a list of splitted nfl fasta files.

        ref_fasta --- (unpolished) consensus isoforms

        out_pickle --- a pickle file with all nfl fasta reads

        ccs_fofn --- should be reads_of_insert.fofn or None

        root_dir --- ICE root output directory

        tmp_dir --- if not None, write temporary clusters, dazz, las
                    files to the given temporaray directory

        sge_opts --- params for SGE environment, including
            use_sge    : use SGE or not
            max_sge_jobs: maximum number of sub-jobs submitted
            unique_id  : unique qsub job id, important that this
                        DOES NOT CONFLICT!
            blasr_nproc: blasr -nproc param, number of threads per cpu.

        """
        self.prog_name = "IceAllPartials"
        IceFiles.__init__(self, prog_name=self.prog_name, root_dir=root_dir, tmp_dir=tmp_dir)

        self.fasta_filenames, self.ref_fasta, self.ccs_fofn, = \
            self._validate_inputs(fasta_filenames=fasta_filenames,
                                  ref_fasta=ref_fasta,
                                  ccs_fofn=ccs_fofn)

        self.out_pickle = out_pickle

        self.sge_opts = sge_opts

        self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir)
        mkdir(self.nfl_dir)

        self.add_log("input fasta files are: " +
                     ", ".join(self.fasta_filenames))
        self.add_log("temp pickle files are: " +
                     ", ".join(self.pickle_filenames))
        self.add_log("out pickle file is: " + self.out_pickle)
        self.add_log("temp directory is: " + str(self.tmp_dir))