Example #1
0
    def index_fasta(self):
        """index subreads in fasta_fofn, return"""
        files = get_files_from_fofn(self.fasta_fofn)
        msg = "Indexing {0} fasta files, please wait.".format(len(files))
        self.add_log(msg)

        d = MetaSubreadFastaReader(files)
        self.add_log("Fasta files indexing done.")
        return d
Example #2
0
    def index_fasta(self):
        """index subreads in fasta_fofn, return"""
        files = get_files_from_fofn(self.fasta_fofn)
        msg = "Indexing {0} fasta files, please wait.".format(len(files))
        self.add_log(msg)

        d = MetaSubreadFastaReader(files)
        self.add_log("Fasta files indexing done.")
        return d
Example #3
0
    def run(self):
        """Run quiver for ICE."""
        # Create directories: root_dir/quivered and root_dir/log_dir/quivered
        mkdir(self.quivered_dir)
        mkdir(self.quivered_log_dir)

        files = get_files_from_fofn(self.fasta_fofn)
        msg = "Indexing {0} fasta files, please wait.".format(len(files))
        self.add_log(msg)

        d = MetaSubreadFastaReader(files)
        self.add_log("Fasta files indexing done.")

        self.add_log("Loading uc from {f}.".format(f=self.final_pickle_fn))
        a = load(open(self.final_pickle_fn))
        uc = a['uc']
        refs = a['refs']

        self.add_log("Loading partial uc from {f}.".
                     format(f=self.nfl_all_pickle_fn))
        partial_uc = load(open(self.nfl_all_pickle_fn))['partial_uc']
        partial_uc2 = defaultdict(lambda: [])
        partial_uc2.update(partial_uc)

        # Write report to quivered/cluster_report.FL_nonFL.csv
        self.add_log("Writing a csv report of cluster -> FL/NonFL reads to {f}".
                     format(f=self.report_fn), level=logging.INFO)
        self.write_report(uc=uc, partial_uc=partial_uc2,
                          report_fn=self.report_fn)

        good = [x for x in uc] #[x for x in uc if len(uc[x]) > 1 or len(partial_uc2[x]) >= 10]
        keys = sorted(good)  # sort good keys (cluster ids)

        start = 0
        end = len(keys)

        submitted = []  # submitted jobs
        todo = []       # to-do jobs

        self.submit_quiver_jobs(d=d, uc=uc, partial_uc=partial_uc2,
            refs=refs, keys=keys, start=start, end=end,
            submitted=submitted, todo=todo,
            use_sge=self.sge_opts.use_sge,
            max_sge_jobs=self.sge_opts.max_sge_jobs,
            quiver_nproc=self.sge_opts.quiver_nproc)

        with open(self.submitted_quiver_jobs_log, 'w') as f:
            f.write("\n".join(str(x[0]) + '\t' + str(x[1]) for x in submitted))

        self.close_log()
        return 0
Example #4
0
    def validate_inputs(self):
        """Validate input fofns, and root_dir, log_dir, tmp_dir,
        create quivered_dir and quivered_log_dir"""
        self.add_log("Validating inputs.")

        # Create directories: root_dir/quivered and root_dir/log_dir/quivered
        try:
            mkdir(self.quivered_dir)
            mkdir(self.quivered_log_dir)
        except OSError:
            # Multiple ice_quiver_i jobs may run at the same time and try to
            # mkdir, race condition may happen, so ignore OSError here.
            pass

        errMsg = ""

        if not nfs_exists(self.log_dir) or not op.isdir(self.log_dir):
            errMsg = "Log dir {l} is not an existing directory.".\
                format(l=self.log_dir)
        elif self.bas_fofn is None:
            errMsg = "Please specify bas_fofn (e.g. input.fofn)."
        elif not nfs_exists(self.bas_fofn):
            errMsg = "bas_fofn {f} ".format(f=self.bas_fofn) + \
                     "which contains bas/bax.h5 files does not exist."
        elif self.fasta_fofn is None:
            errMsg = "Please make sure ice_make_fasta_fofn has " + \
                     "been called, and specify fasta_fofn."
        elif not nfs_exists(self.fasta_fofn):
            errMsg = "Input fasta_fofn {f} does not exists.".\
                     format(f=self.fasta_fofn)
            fasta_files = get_files_from_fofn(self.fasta_fofn)
            for fasta_file in fasta_files:
                if not nfs_exists(fasta_file):
                    errMsg = "A file {f} in fasta_fofn does not exist.".\
                             format(f=fasta_file)
        elif not nfs_exists(self.nfl_all_pickle_fn):
            #"output/map_noFL/noFL.ALL.partial_uc.pickle"):
            errMsg = "Pickle file {f} ".format(f=self.nfl_all_pickle_fn) + \
                     "which assigns all non-full-length reads to isoforms " + \
                     "does not exist. Please check 'ice_partial.py *' are " + \
                     "all done."
        elif not nfs_exists(self.final_pickle_fn):
            errMsg = "Pickle file {f} ".format(f=self.final_pickle_fn) + \
                     "which assigns full-length non-chimeric reads to " + \
                     "isoforms does not exist."

        if errMsg != "":
            self.add_log(errMsg, level=logging.ERROR)
            raise IOError(errMsg)
Example #5
0
    def validate_inputs(self):
        """Validate input fofns, and root_dir, log_dir, tmp_dir,
        create quivered_dir and quivered_log_dir"""
        self.add_log("Validating inputs.")

        # Create directories: root_dir/quivered and root_dir/log_dir/quivered
        try:
            mkdir(self.quivered_dir)
            mkdir(self.quivered_log_dir)
        except OSError:
            # Multiple ice_quiver_i jobs may run at the same time and try to
            # mkdir, race condition may happen, so ignore OSError here.
            pass

        errMsg = ""

        if not nfs_exists(self.log_dir) or not op.isdir(self.log_dir):
            errMsg = "Log dir {l} is not an existing directory.".\
                format(l=self.log_dir)
        elif self.bas_fofn is None:
            errMsg = "Please specify bas_fofn (e.g. input.fofn)."
        elif not nfs_exists(self.bas_fofn):
            errMsg = "bas_fofn {f} ".format(f=self.bas_fofn) + \
                     "which contains bas/bax.h5 files does not exist."
        elif self.fasta_fofn is None:
            errMsg = "Please make sure ice_make_fasta_fofn has " + \
                     "been called, and specify fasta_fofn."
        elif not nfs_exists(self.fasta_fofn):
            errMsg = "Input fasta_fofn {f} does not exists.".\
                     format(f=self.fasta_fofn)
            fasta_files = get_files_from_fofn(self.fasta_fofn)
            for fasta_file in fasta_files:
                if not nfs_exists(fasta_file):
                    errMsg = "A file {f} in fasta_fofn does not exist.".\
                             format(f=fasta_file)
        elif not nfs_exists(self.nfl_all_pickle_fn):
            #"output/map_noFL/noFL.ALL.partial_uc.pickle"):
            errMsg = "Pickle file {f} ".format(f=self.nfl_all_pickle_fn) + \
                     "which assigns all non-full-length reads to isoforms " + \
                     "does not exist. Please check 'ice_partial.py *' are " + \
                     "all done."
        elif not nfs_exists(self.final_pickle_fn):
            errMsg = "Pickle file {f} ".format(f=self.final_pickle_fn) + \
                     "which assigns full-length non-chimeric reads to " + \
                     "isoforms does not exist."

        if errMsg != "":
            self.add_log(errMsg, level=logging.ERROR)
            raise IOError(errMsg)
Example #6
0
    def run(self):
        """Run quiver for ICE."""
        # Create directories: root_dir/quivered and root_dir/log_dir/quivered
        mkdir(self.quivered_dir)
        mkdir(self.quivered_log_dir)

        files = get_files_from_fofn(self.fasta_fofn)
        msg = "Indexing {0} fasta files, please wait.".format(len(files))
        self.add_log(msg)

        d = MetaSubreadFastaReader(files)
        self.add_log("Fasta files indexing done.")

        self.add_log("Loading uc from {f}.".format(f=self.final_pickle_fn))
        a = load(open(self.final_pickle_fn))
        uc = a['uc']
        refs = a['refs']

        self.add_log(
            "Loading partial uc from {f}.".format(f=self.nfl_all_pickle_fn))
        partial_uc = load(open(self.nfl_all_pickle_fn))['partial_uc']
        partial_uc2 = defaultdict(lambda: [])
        partial_uc2.update(partial_uc)

        # Write report to quivered/cluster_report.FL_nonFL.csv
        self.add_log(
            "Writing a csv report of cluster -> FL/NonFL reads to {f}".format(
                f=self.report_fn),
            level=logging.INFO)
        self.write_report(uc=uc,
                          partial_uc=partial_uc2,
                          report_fn=self.report_fn)

        good = [
            x for x in uc
        ]  #[x for x in uc if len(uc[x]) > 1 or len(partial_uc2[x]) >= 10]
        keys = sorted(good)  # sort good keys (cluster ids)

        start = 0
        end = len(keys)

        submitted = []  # submitted jobs
        todo = []  # to-do jobs

        self.submit_quiver_jobs(d=d,
                                uc=uc,
                                partial_uc=partial_uc2,
                                refs=refs,
                                keys=keys,
                                start=start,
                                end=end,
                                submitted=submitted,
                                todo=todo,
                                use_sge=self.sge_opts.use_sge,
                                max_sge_jobs=self.sge_opts.max_sge_jobs,
                                quiver_nproc=self.sge_opts.quiver_nproc)

        with open(self.submitted_quiver_jobs_log, 'w') as f:
            f.write("\n".join(str(x[0]) + '\t' + str(x[1]) for x in submitted))

        self.close_log()
        return 0