コード例 #1
0
    def _validate_inputs(self, root_dir, N):
        """
        Check inputs, return
        (splitted_pickles, out_pickle)
        """
        icef = IceFiles(prog_name="ice_partial_merge",
                        root_dir=root_dir, no_log_f=False)

        # root_dir/output/map_noFL/input.split_{0:03d}.fasta.partial_uc.pickle
        splitted_pickles = [icef.nfl_pickle_i(i) for i in range(0, N)]
        dones = [icef.nfl_done_i(i) for i in range(0, N)]

        # Check if inputs exist.
        errMsg = ""
        for done in dones:
            if not nfs_exists(done):
                errMsg = "DONE file {f} does not exist.".format(f=done)
        for pickle in splitted_pickles:
            if not nfs_exists(pickle):
                errMsg = "Pickle file {f} does not exist.".format(f=pickle)

        if len(errMsg) != 0:
            raise ValueError(errMsg)

        # root_dir/output/map_noFL/nfl.all.partial_uc.pickle
        out_pickle = icef.nfl_all_pickle_fn
        return (splitted_pickles, out_pickle)
コード例 #2
0
ファイル: IcePartialI.py プロジェクト: dayedepps/pbtranscript
    def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir):
        """
        Check inputs, write $ICE_PARTIAL_PY i command to script_file
        and return (input_fasta, ref_fasta, out_pickle, done_file)
        for the i-th chunk of nfl reads.
        """
        icef = IceFiles(prog_name="ice_partial_{i}".format(i=i),
                        root_dir=root_dir,
                        no_log_f=False)

        # root_dir/output/final.consensus.fasta
        ref_fasta = icef.final_consensus_fa
        ref_dazz = icef.final_dazz_db

        # root_dir/output/map_noFL/input.split_{0:03d}.fasta
        input_fasta = icef.nfl_fa_i(i)

        # $input_fasta.partial_uc.pickle
        out_pickle = icef.nfl_pickle_i(i)

        # $input_fasta.partial_uc.pickle.DONE
        done_file = icef.nfl_done_i(i)

        # $input_fasta.partial_uc.sh
        script_file = icef.nfl_script_i(i)

        # Check if inputs exist.
        errMsg = ""
        if not nfs_exists(input_fasta):
            errMsg = (
                "The {i}-th splitted non-full-length reads ".format(i=i) +
                "fasta file {f} does not exist. ".format(f=input_fasta) +
                "Please run $ICE_PARTIAL_PY split first.")
        elif not nfs_exists(ref_fasta):
            errMsg = ("The unpolished consensus isoforms fasta file " +
                      "{f} does not exist. ".format(f=ref_fasta) +
                      "Please make sure ICE is successfully done.")
        elif not nfs_exists(ref_dazz):
            errMsg = ("The dazz db " +
                      "{f} does not exist. ".format(f=ref_dazz) +
                      "Please make sure it is already built.")
        if len(errMsg) != 0:
            raise ValueError(errMsg)

        # Save cmd to script_file.
        cmd = self._cmd_str(root_dir=root_dir,
                            i=[i],
                            ccs_fofn=ccs_fofn,
                            blasr_nproc=blasr_nproc,
                            tmp_dir=tmp_dir)
        with open(script_file, 'w') as writer:
            writer.write(cmd + "\n")

        icef.add_log(
            "Writing CMD to: {script_file}".format(script_file=script_file))
        icef.close_log()

        return (input_fasta, ref_fasta, out_pickle, done_file)
コード例 #3
0
ファイル: IcePartialI.py プロジェクト: natechols/pbtranscript
    def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir):
        """
        Check inputs, write $ICE_PARTIAL_PY i command to script_file
        and return (input_fasta, ref_fasta, out_pickle, done_file)
        for the i-th chunk of nfl reads.
        """
        icef = IceFiles(prog_name="ice_partial_{i}".format(i=i),
                        root_dir=root_dir, no_log_f=False)

        # root_dir/output/final.consensus.fasta
        ref_fasta = icef.final_consensus_fa
        ref_dazz = icef.final_dazz_db

        # root_dir/output/map_noFL/input.split_{0:03d}.fasta
        input_fasta = icef.nfl_fa_i(i)

        # $input_fasta.partial_uc.pickle
        out_pickle = icef.nfl_pickle_i(i)

        # $input_fasta.partial_uc.pickle.DONE
        done_file = icef.nfl_done_i(i)

        # $input_fasta.partial_uc.sh
        script_file = icef.nfl_script_i(i)

        # Check if inputs exist.
        errMsg = ""
        if not nfs_exists(input_fasta):
            errMsg = ("The {i}-th splitted non-full-length reads ".format(i=i) +
                      "fasta file {f} does not exist. ".format(f=input_fasta) +
                      "Please run $ICE_PARTIAL_PY split first.")
        elif not nfs_exists(ref_fasta):
            errMsg = ("The unpolished consensus isoforms fasta file " +
                      "{f} does not exist. ".format(f=ref_fasta) +
                      "Please make sure ICE is successfully done.")
        elif not nfs_exists(ref_dazz):
            errMsg = ("The dazz db " +
                      "{f} does not exist. ".format(f=ref_dazz) +
                      "Please make sure it is already built.")
        if len(errMsg) != 0:
            raise ValueError(errMsg)

        # Save cmd to script_file.
        cmd = self._cmd_str(root_dir=root_dir, i=[i],
                            ccs_fofn=ccs_fofn,
                            blasr_nproc=blasr_nproc,
                            tmp_dir=tmp_dir)
        with open(script_file, 'w') as writer:
            writer.write(cmd + "\n")

        icef.add_log("Writing CMD to: {script_file}".
                     format(script_file=script_file))
        icef.close_log()

        return (input_fasta, ref_fasta, out_pickle, done_file)
コード例 #4
0
ファイル: IcePartialI.py プロジェクト: lpp1985/lpp_Script
    def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir,
                         ref_fasta=None):
        """
        Check inputs, write $ICE_PARTIAL_PY i command to script_file
        and return (input_fasta, ref_fasta, out_pickle, done_file)
        for the i-th chunk of nfl reads.
        """
        icef = IceFiles(prog_name="ice_partial_{i}".format(i=i),
                        root_dir=root_dir, no_log_f=False)

        # root_dir/output/final.consensus.fasta
        ref_fasta = icef.final_consensus_fa
        ref_dazz = icef.final_dazz_db

        # root_dir/output/map_noFL/input.split_{0:03d}.fasta
        input_fasta = icef.nfl_fa_i(i)

        # $input_fasta.partial_uc.pickle
        out_pickle = icef.nfl_pickle_i(i)

        # $input_fasta.partial_uc.pickle.DONE
        done_file = icef.nfl_done_i(i)

        # $input_fasta.partial_uc.sh
        script_file = icef.nfl_script_i(i)

        # Check if inputs exist.
        errMsg = ""
        if not nfs_exists(input_fasta):
            errMsg = ("The {i}-th splitted non-full-length reads ".format(i=i) +
                      "fasta file {f} does not exist. ".format(f=input_fasta) +
                      "Please run $ICE_PARTIAL_PY split first.")
        elif not nfs_exists(ref_fasta):
            # ref_fasta --- root_dir/output/final.consensus.fasta
            # ref_dazz --- root_dir/output/final.consensus.dazz.fasta.db
            # ref_fasta and ref_dazz must exist if ICE has run successfully in
            # root_dir. If either one does not exist, it means ICE has not
            # successfully run in root_dir. Then we have to throw an error message
            # requring users to copy the root_dir/output directory manually,
            # rather than providing an option to overwrite ref_fasta and build
            # ref_dazz, because a race condition can happen when multiple
            # IcePartialI tasks start to run at the same time, which can corrupt
            # fasta and dazz db files and lead to unexpected runtime errors.
            errMsg = ("The unpolished consensus isoforms fasta file " +
                      "{f} does not exist. ".format(f=ref_fasta) +
                      "Please make sure ICE is successfully done in root_dir, " +
                      "or copy ICE output directory (e.g., cluster_out/output) " +
                      "to {dst}".format(dst=op.dirname(ref_fasta)))
        elif not nfs_exists(ref_dazz):
            errMsg = ("The dazz db " +
                      "{f} does not exist. ".format(f=ref_dazz) +
                      "Please make sure it is already built.")
        if len(errMsg) != 0:
            raise IOError(errMsg)

        # Save cmd to script_file.
        cmd = self._cmd_str(root_dir=root_dir, i=[i],
                            ccs_fofn=ccs_fofn,
                            blasr_nproc=blasr_nproc,
                            tmp_dir=tmp_dir)
        with open(script_file, 'w') as writer:
            writer.write(cmd + "\n")

        icef.add_log("Writing CMD to: {script_file}".
                     format(script_file=script_file))
        icef.close_log()

        return (input_fasta, ref_fasta, out_pickle, done_file)