def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir): """ Check inputs, write $ICE_PARTIAL_PY i command to script_file and return (input_fasta, ref_fasta, out_pickle, done_file) for the i-th chunk of nfl reads. """ icef = IceFiles(prog_name="ice_partial_{i}".format(i=i), root_dir=root_dir, no_log_f=False) # root_dir/output/final.consensus.fasta ref_fasta = icef.final_consensus_fa ref_dazz = icef.final_dazz_db # root_dir/output/map_noFL/input.split_{0:03d}.fasta input_fasta = icef.nfl_fa_i(i) # $input_fasta.partial_uc.pickle out_pickle = icef.nfl_pickle_i(i) # $input_fasta.partial_uc.pickle.DONE done_file = icef.nfl_done_i(i) # $input_fasta.partial_uc.sh script_file = icef.nfl_script_i(i) # Check if inputs exist. errMsg = "" if not nfs_exists(input_fasta): errMsg = ( "The {i}-th splitted non-full-length reads ".format(i=i) + "fasta file {f} does not exist. ".format(f=input_fasta) + "Please run $ICE_PARTIAL_PY split first.") elif not nfs_exists(ref_fasta): errMsg = ("The unpolished consensus isoforms fasta file " + "{f} does not exist. ".format(f=ref_fasta) + "Please make sure ICE is successfully done.") elif not nfs_exists(ref_dazz): errMsg = ("The dazz db " + "{f} does not exist. ".format(f=ref_dazz) + "Please make sure it is already built.") if len(errMsg) != 0: raise ValueError(errMsg) # Save cmd to script_file. cmd = self._cmd_str(root_dir=root_dir, i=[i], ccs_fofn=ccs_fofn, blasr_nproc=blasr_nproc, tmp_dir=tmp_dir) with open(script_file, 'w') as writer: writer.write(cmd + "\n") icef.add_log( "Writing CMD to: {script_file}".format(script_file=script_file)) icef.close_log() return (input_fasta, ref_fasta, out_pickle, done_file)
def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir): """ Check inputs, write $ICE_PARTIAL_PY i command to script_file and return (input_fasta, ref_fasta, out_pickle, done_file) for the i-th chunk of nfl reads. """ icef = IceFiles(prog_name="ice_partial_{i}".format(i=i), root_dir=root_dir, no_log_f=False) # root_dir/output/final.consensus.fasta ref_fasta = icef.final_consensus_fa ref_dazz = icef.final_dazz_db # root_dir/output/map_noFL/input.split_{0:03d}.fasta input_fasta = icef.nfl_fa_i(i) # $input_fasta.partial_uc.pickle out_pickle = icef.nfl_pickle_i(i) # $input_fasta.partial_uc.pickle.DONE done_file = icef.nfl_done_i(i) # $input_fasta.partial_uc.sh script_file = icef.nfl_script_i(i) # Check if inputs exist. errMsg = "" if not nfs_exists(input_fasta): errMsg = ("The {i}-th splitted non-full-length reads ".format(i=i) + "fasta file {f} does not exist. ".format(f=input_fasta) + "Please run $ICE_PARTIAL_PY split first.") elif not nfs_exists(ref_fasta): errMsg = ("The unpolished consensus isoforms fasta file " + "{f} does not exist. ".format(f=ref_fasta) + "Please make sure ICE is successfully done.") elif not nfs_exists(ref_dazz): errMsg = ("The dazz db " + "{f} does not exist. ".format(f=ref_dazz) + "Please make sure it is already built.") if len(errMsg) != 0: raise ValueError(errMsg) # Save cmd to script_file. cmd = self._cmd_str(root_dir=root_dir, i=[i], ccs_fofn=ccs_fofn, blasr_nproc=blasr_nproc, tmp_dir=tmp_dir) with open(script_file, 'w') as writer: writer.write(cmd + "\n") icef.add_log("Writing CMD to: {script_file}". format(script_file=script_file)) icef.close_log() return (input_fasta, ref_fasta, out_pickle, done_file)
def _validate_inputs(self, root_dir, i, ccs_fofn, blasr_nproc, tmp_dir, ref_fasta=None): """ Check inputs, write $ICE_PARTIAL_PY i command to script_file and return (input_fasta, ref_fasta, out_pickle, done_file) for the i-th chunk of nfl reads. """ icef = IceFiles(prog_name="ice_partial_{i}".format(i=i), root_dir=root_dir, no_log_f=False) # root_dir/output/final.consensus.fasta ref_fasta = icef.final_consensus_fa ref_dazz = icef.final_dazz_db # root_dir/output/map_noFL/input.split_{0:03d}.fasta input_fasta = icef.nfl_fa_i(i) # $input_fasta.partial_uc.pickle out_pickle = icef.nfl_pickle_i(i) # $input_fasta.partial_uc.pickle.DONE done_file = icef.nfl_done_i(i) # $input_fasta.partial_uc.sh script_file = icef.nfl_script_i(i) # Check if inputs exist. errMsg = "" if not nfs_exists(input_fasta): errMsg = ("The {i}-th splitted non-full-length reads ".format(i=i) + "fasta file {f} does not exist. ".format(f=input_fasta) + "Please run $ICE_PARTIAL_PY split first.") elif not nfs_exists(ref_fasta): # ref_fasta --- root_dir/output/final.consensus.fasta # ref_dazz --- root_dir/output/final.consensus.dazz.fasta.db # ref_fasta and ref_dazz must exist if ICE has run successfully in # root_dir. If either one does not exist, it means ICE has not # successfully run in root_dir. Then we have to throw an error message # requring users to copy the root_dir/output directory manually, # rather than providing an option to overwrite ref_fasta and build # ref_dazz, because a race condition can happen when multiple # IcePartialI tasks start to run at the same time, which can corrupt # fasta and dazz db files and lead to unexpected runtime errors. errMsg = ("The unpolished consensus isoforms fasta file " + "{f} does not exist. ".format(f=ref_fasta) + "Please make sure ICE is successfully done in root_dir, " + "or copy ICE output directory (e.g., cluster_out/output) " + "to {dst}".format(dst=op.dirname(ref_fasta))) elif not nfs_exists(ref_dazz): errMsg = ("The dazz db " + "{f} does not exist. ".format(f=ref_dazz) + "Please make sure it is already built.") if len(errMsg) != 0: raise IOError(errMsg) # Save cmd to script_file. cmd = self._cmd_str(root_dir=root_dir, i=[i], ccs_fofn=ccs_fofn, blasr_nproc=blasr_nproc, tmp_dir=tmp_dir) with open(script_file, 'w') as writer: writer.write(cmd + "\n") icef.add_log("Writing CMD to: {script_file}". format(script_file=script_file)) icef.close_log() return (input_fasta, ref_fasta, out_pickle, done_file)