def index_fasta(self): """index subreads in fasta_fofn, return""" files = get_files_from_fofn(self.fasta_fofn) msg = "Indexing {0} fasta files, please wait.".format(len(files)) self.add_log(msg) d = MetaSubreadFastaReader(files) self.add_log("Fasta files indexing done.") return d
def run(self): """Run quiver for ICE.""" # Create directories: root_dir/quivered and root_dir/log_dir/quivered mkdir(self.quivered_dir) mkdir(self.quivered_log_dir) files = get_files_from_fofn(self.fasta_fofn) msg = "Indexing {0} fasta files, please wait.".format(len(files)) self.add_log(msg) d = MetaSubreadFastaReader(files) self.add_log("Fasta files indexing done.") self.add_log("Loading uc from {f}.".format(f=self.final_pickle_fn)) a = load(open(self.final_pickle_fn)) uc = a['uc'] refs = a['refs'] self.add_log("Loading partial uc from {f}.". format(f=self.nfl_all_pickle_fn)) partial_uc = load(open(self.nfl_all_pickle_fn))['partial_uc'] partial_uc2 = defaultdict(lambda: []) partial_uc2.update(partial_uc) # Write report to quivered/cluster_report.FL_nonFL.csv self.add_log("Writing a csv report of cluster -> FL/NonFL reads to {f}". format(f=self.report_fn), level=logging.INFO) self.write_report(uc=uc, partial_uc=partial_uc2, report_fn=self.report_fn) good = [x for x in uc] #[x for x in uc if len(uc[x]) > 1 or len(partial_uc2[x]) >= 10] keys = sorted(good) # sort good keys (cluster ids) start = 0 end = len(keys) submitted = [] # submitted jobs todo = [] # to-do jobs self.submit_quiver_jobs(d=d, uc=uc, partial_uc=partial_uc2, refs=refs, keys=keys, start=start, end=end, submitted=submitted, todo=todo, use_sge=self.sge_opts.use_sge, max_sge_jobs=self.sge_opts.max_sge_jobs, quiver_nproc=self.sge_opts.quiver_nproc) with open(self.submitted_quiver_jobs_log, 'w') as f: f.write("\n".join(str(x[0]) + '\t' + str(x[1]) for x in submitted)) self.close_log() return 0
def validate_inputs(self): """Validate input fofns, and root_dir, log_dir, tmp_dir, create quivered_dir and quivered_log_dir""" self.add_log("Validating inputs.") # Create directories: root_dir/quivered and root_dir/log_dir/quivered try: mkdir(self.quivered_dir) mkdir(self.quivered_log_dir) except OSError: # Multiple ice_quiver_i jobs may run at the same time and try to # mkdir, race condition may happen, so ignore OSError here. pass errMsg = "" if not nfs_exists(self.log_dir) or not op.isdir(self.log_dir): errMsg = "Log dir {l} is not an existing directory.".\ format(l=self.log_dir) elif self.bas_fofn is None: errMsg = "Please specify bas_fofn (e.g. input.fofn)." elif not nfs_exists(self.bas_fofn): errMsg = "bas_fofn {f} ".format(f=self.bas_fofn) + \ "which contains bas/bax.h5 files does not exist." elif self.fasta_fofn is None: errMsg = "Please make sure ice_make_fasta_fofn has " + \ "been called, and specify fasta_fofn." elif not nfs_exists(self.fasta_fofn): errMsg = "Input fasta_fofn {f} does not exists.".\ format(f=self.fasta_fofn) fasta_files = get_files_from_fofn(self.fasta_fofn) for fasta_file in fasta_files: if not nfs_exists(fasta_file): errMsg = "A file {f} in fasta_fofn does not exist.".\ format(f=fasta_file) elif not nfs_exists(self.nfl_all_pickle_fn): #"output/map_noFL/noFL.ALL.partial_uc.pickle"): errMsg = "Pickle file {f} ".format(f=self.nfl_all_pickle_fn) + \ "which assigns all non-full-length reads to isoforms " + \ "does not exist. Please check 'ice_partial.py *' are " + \ "all done." elif not nfs_exists(self.final_pickle_fn): errMsg = "Pickle file {f} ".format(f=self.final_pickle_fn) + \ "which assigns full-length non-chimeric reads to " + \ "isoforms does not exist." if errMsg != "": self.add_log(errMsg, level=logging.ERROR) raise IOError(errMsg)
def run(self): """Run quiver for ICE.""" # Create directories: root_dir/quivered and root_dir/log_dir/quivered mkdir(self.quivered_dir) mkdir(self.quivered_log_dir) files = get_files_from_fofn(self.fasta_fofn) msg = "Indexing {0} fasta files, please wait.".format(len(files)) self.add_log(msg) d = MetaSubreadFastaReader(files) self.add_log("Fasta files indexing done.") self.add_log("Loading uc from {f}.".format(f=self.final_pickle_fn)) a = load(open(self.final_pickle_fn)) uc = a['uc'] refs = a['refs'] self.add_log( "Loading partial uc from {f}.".format(f=self.nfl_all_pickle_fn)) partial_uc = load(open(self.nfl_all_pickle_fn))['partial_uc'] partial_uc2 = defaultdict(lambda: []) partial_uc2.update(partial_uc) # Write report to quivered/cluster_report.FL_nonFL.csv self.add_log( "Writing a csv report of cluster -> FL/NonFL reads to {f}".format( f=self.report_fn), level=logging.INFO) self.write_report(uc=uc, partial_uc=partial_uc2, report_fn=self.report_fn) good = [ x for x in uc ] #[x for x in uc if len(uc[x]) > 1 or len(partial_uc2[x]) >= 10] keys = sorted(good) # sort good keys (cluster ids) start = 0 end = len(keys) submitted = [] # submitted jobs todo = [] # to-do jobs self.submit_quiver_jobs(d=d, uc=uc, partial_uc=partial_uc2, refs=refs, keys=keys, start=start, end=end, submitted=submitted, todo=todo, use_sge=self.sge_opts.use_sge, max_sge_jobs=self.sge_opts.max_sge_jobs, quiver_nproc=self.sge_opts.quiver_nproc) with open(self.submitted_quiver_jobs_log, 'w') as f: f.write("\n".join(str(x[0]) + '\t' + str(x[1]) for x in submitted)) self.close_log() return 0