예제 #1
0
    def test_qsub_cmd(self):
        """Test qsub_cmd."""
        sge_opts = SgeOptions(unique_id=100)
        self.assertEqual(sge_opts.qsub_cmd("a.sh", num_threads=1),
                         "qsub -cwd -V -S /bin/bash -pe smp 1 -e /dev/null -o /dev/null a.sh")

        sge_opts = SgeOptions(unique_id=100, sge_queue="my_sge_queue",
                              sge_env_name="orte")

        self.assertEqual(sge_opts.qsub_cmd("a.sh", num_threads=1,
                         wait_before_exit=True, depend_on_jobs=['1', '2', '3']),
                         "qsub -cwd -V -S /bin/bash -pe orte 1 -q my_sge_queue -sync y -hold_jid 1,2,3 -e /dev/null -o /dev/null a.sh")
예제 #2
0
    def run(self):
        """Execute ice_partial.py all|split|i|merge."""
        cmd = self.args.subCommand
        logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                      cmd=cmd,
                                                      v=get_version()))
        cmd_str = ""
        try:
            args = self.args
            obj = None
            if cmd == "all":
                sge_opts = SgeOptions(unique_id=args.unique_id,
                                      use_sge=args.use_sge,
                                      max_sge_jobs=args.max_sge_jobs,
                                      blasr_nproc=args.blasr_nproc)
                obj = IceAllPartials(
                    root_dir=args.root_dir,
                    fasta_filenames=args.fasta_filenames.split(','),
                    ref_fasta=args.ref_fasta,
                    out_pickle=args.out_pickle,
                    sge_opts=sge_opts,
                    ccs_fofn=args.ccs_fofn,
                    tmp_dir=args.tmp_dir)
            elif cmd == "one":
                # Only assign nfl reads in the given input_fasta file to isoforms
                obj = IcePartialOne(input_fasta=args.input_fasta,
                                    ref_fasta=args.ref_fasta,
                                    out_pickle=args.out_pickle,
                                    ccs_fofn=args.ccs_fofn,
                                    done_filename=args.done_filename,
                                    blasr_nproc=args.blasr_nproc,
                                    tmp_dir=args.tmp_dir)
            elif cmd == "split":
                obj = IcePartialSplit(root_dir=args.root_dir,
                                      nfl_fa=args.nfl_fa,
                                      N=args.N)
            elif cmd == "i":
                obj = IcePartialI(root_dir=args.root_dir,
                                  i=args.i,
                                  ccs_fofn=args.ccs_fofn,
                                  blasr_nproc=args.blasr_nproc,
                                  tmp_dir=args.tmp_dir)
            elif cmd == "merge":
                obj = IcePartialMerge(root_dir=args.root_dir, N=args.N)
            else:
                raise ValueError(
                    "Unknown command passed to {f}: {cmd}.".format(
                        f=op.basename(__file__), cmd=cmd))

            cmd_str = obj.cmd_str()
            logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
            obj.run()
        except:
            logging.exception("Exiting {cmd_str} with return code 1.".format(
                cmd_str=cmd_str))
            return 1
        return 0
예제 #3
0
def main(query_filename, target_filename, output_dir):
    """Main function to call DalignerRunner"""
    obj = DalignerRunner(query_filename=query_filename,
                         target_filename=target_filename,
                         is_FL=False,
                         same_strand_only=True,
                         query_converted=False,
                         target_converted=False,
                         use_sge=True,
                         sge_opts=SgeOptions(100))
    return obj.run(output_dir=output_dir)
예제 #4
0
 def __init__(self, root_dir, subread_set, nproc):
     tmp_dir = op.join(root_dir, "tmp")
     mkdir(tmp_dir)
     super(IceQuiverRTC,
           self).__init__(root_dir=root_dir,
                          bas_fofn=subread_set,
                          fasta_fofn=None,
                          sge_opts=SgeOptions(unique_id=12345,
                                              use_sge=False,
                                              max_sge_jobs=0,
                                              blasr_nproc=nproc,
                                              quiver_nproc=nproc),
                          prog_name="IceQuiver")
예제 #5
0
    def test_run(self):
        """Test run(output_dir, min_match_len, sensitive_mode).
        running on sge and locally.
        """
        run_on_sge = (backticks('qstat')[1] == 0)

        if run_on_sge:
            self.runner.use_sge = True
            self.runner.sge_opts = SgeOptions(100)
            mknewdir(self.out_dir)
            self.runner.run(output_dir=self.out_dir)

            for las_filename in self.runner.las_filenames:
                print "Checking existance of " + las_filename
                self.assertTrue(op.exists(las_filename))

            for la4ice_filename in self.runner.la4ice_filenames:
                print "Checking existance of " + la4ice_filename
                self.assertTrue(op.exists(la4ice_filename))

        # Run locally
        self.runner.use_sge = False
        mknewdir(self.out_dir)
        self.runner.run(output_dir=self.out_dir)

        for las_filename in self.runner.las_filenames:
            print "Checking existance of " + las_filename
            self.assertTrue(op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking existance of " + la4ice_filename
            self.assertTrue(op.exists(la4ice_filename))

        # clean all output
        self.runner.clean_run()

        for las_filename in self.runner.las_filenames:
            print "Checking %s has been removed.\n" % las_filename
            self.assertTrue(not op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking %s has been removed.\n" % la4ice_filename
            self.assertTrue(not op.exists(la4ice_filename))
예제 #6
0
    def run(self):
        """ Call DalignerRunner """
        logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                                v=self.getVersion()))
        args = self.args
        mkdir(args.output_dir)

        sge_opts = SgeOptions(unique_id=args.unique_id,
                              use_sge=args.use_sge,
                              max_sge_jobs=args.max_sge_jobs,
                              blasr_nproc=args.blasr_nproc,
                              sge_env_name=args.sge_env_name,
                              sge_queue=args.sge_queue)

        obj = DalignerRunner(query_filename=args.query_fasta,
                             target_filename=args.target_fasta,
                             is_FL=args.is_FL, same_strand_only=args.same_strand_only,
                             query_converted=False, target_converted=False,
                             use_sge=args.use_sge, sge_opts=sge_opts)
        obj.run(output_dir=args.output_dir)
예제 #7
0
    def run(self):
        """Run"""
        logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                                v=self.getVersion()))
        args = self.args

        sge_opts = SgeOptions(unique_id=args.unique_id,
                              use_sge=args.use_sge,
                              max_sge_jobs=args.max_sge_jobs,
                              quiver_nproc=args.quiver_nproc,
                              blasr_nproc=args.blasr_nproc,
                              sge_env_name=args.sge_env_name,
                              sge_queue=args.sge_queue)
        ipq_opts = IceQuiverHQLQOptions(
            hq_isoforms_fa=args.hq_isoforms_fa,
            hq_isoforms_fq=args.hq_isoforms_fq,
            lq_isoforms_fa=args.lq_isoforms_fa,
            lq_isoforms_fq=args.lq_isoforms_fq,
            qv_trim_5=args.qv_trim_5,
            qv_trim_3=args.qv_trim_3,
            hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
        try:
            obj = Polish(root_dir=args.root_dir,
                         nfl_fa=args.nfl_fa,
                         bas_fofn=args.bas_fofn,
                         ccs_fofn=args.ccs_fofn,
                         sge_opts=sge_opts,
                         ice_opts=IceOptions(),
                         ipq_opts=ipq_opts,
                         tmp_dir=args.tmp_dir)
            obj.run()
        except Exception as e:
            logging.error(str(e))
            import traceback
            traceback.print_exc()
            return 1
        return 0
예제 #8
0
    def test_sge_job_runner(self):
        """Test sge_job_runner"""
        cmds = ["sleep 5", "sleep 5", "sleep 5", "sleep 5"]
        script_files = [
            op.join(self.out_dir, "test_sge_job_runner_%s.sh" % i)
            for i in range(0, len(cmds))
        ]
        #done_script = op.join(self.out_dir, "test_sge_job_runner.done.sh")
        #done_file = op.join(self.out_dir, "test_sge_job_runner.done")

        delete_files = script_files  #+ [done_script, done_file]
        for f in delete_files:
            backticks('rm %s' % f)

        sge_opts = SgeOptions(100)
        #write_cmd_to_script(cmd="echo 'done' > %s" % done_file,
        #                    script=done_script)
        jids = sge_job_runner(
            cmds,
            script_files=script_files,
            #done_script=done_script,
            num_threads_per_job=1,
            sge_opts=sge_opts,
            qsub_try_times=1)
예제 #9
0
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self._subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window,
                    detect_chimera_nfl=self.args.detect_chimera_nfl)

                obj = Classifier(
                    reads_fn=self.args.readsFN,
                    out_dir=self.args.outDir,
                    out_reads_fn=self.args.outReadsFN,
                    primer_fn=self.args.primerFN,
                    primer_report_fn=self.args.primerReportFN,
                    summary_fn=self.args.summary_fn,
                    cpus=self.args.cpus,
                    change_read_id=True,
                    opts=opts,
                    out_flnc_fn=self.args.flnc_fa,
                    out_nfl_fn=self.args.nfl_fa,
                    ignore_polyA=self.args.ignore_polyA,
                    reuse_dom=self.args.reuse_dom,
                    ignore_empty_output=self.args.ignore_empty_output)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(
                    quiver=self.args.quiver,
                    use_finer_qv=self.args.use_finer_qv,
                    targeted_isoseq=self.args.targeted_isoseq,
                    flnc_reads_per_split=self.args.flnc_reads_per_split,
                    nfl_reads_per_split=self.args.nfl_reads_per_split,
                    num_clusters_per_bin=self.args.num_clusters_per_bin)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc,
                                      sge_queue=self.args.sge_queue,
                                      sge_env_name=self.args.sge_env_name)

                ipq_opts = IceQuiverHQLQOptions(
                    qv_trim_5=self.args.qv_trim_5,
                    qv_trim_3=self.args.qv_trim_3,
                    hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy,
                    hq_isoforms_fa=self.args.hq_isoforms_fa,
                    hq_isoforms_fq=self.args.hq_isoforms_fq,
                    lq_isoforms_fa=self.args.lq_isoforms_fa,
                    lq_isoforms_fq=self.args.lq_isoforms_fq)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              ipq_opts=ipq_opts,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn,
                              output_pickle_file=self.args.pickle_fn,
                              tmp_dir=self.args.tmp_dir)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(
                    inFN=self.args.readsFN,
                    outFN=self.args.outFN,
                    rules=rules,
                    ignore_polyA=self.args.ignore_polyA,
                    printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(
                    cmd, "Unknown command passed to pbtranscript:" +
                    self.args.subName)
        except Exception:
            logging.exception("Exiting pbtranscript with return code 1.")
            return 1
        return 0
예제 #10
0
 def run(self):
     """Execute ice_quiver.py all|i|merge|postprocess."""
     cmd = self.args.subCommand
     logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                   cmd=cmd, v=get_version()))
     cmd_str = ""
     try:
         args = self.args
         obj = None
         if cmd == "all":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverAll(root_dir=args.root_dir,
                                bas_fofn=args.bas_fofn,
                                fasta_fofn=None,
                                sge_opts=sge_opts,
                                ipq_opts=ipq_opts,
                                tmp_dir=args.tmp_dir)
         elif cmd == "i":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             obj = IceQuiverI(root_dir=args.root_dir, i=args.i, N=args.N,
                              bas_fofn=args.bas_fofn,
                              fasta_fofn=None,
                              sge_opts=sge_opts,
                              tmp_dir=args.tmp_dir)
         elif cmd == "merge":
             obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N)
         elif cmd == "postprocess":
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverPostprocess(root_dir=args.root_dir,
                                        ipq_opts=ipq_opts,
                                        use_sge=args.use_sge,
                                        quit_if_not_done=args.quit_if_not_done,
                                        summary_fn=args.summary_fn,
                                        report_fn=args.report_fn)
         else:
             raise ValueError("Unknown command passed to {f}: {cmd}.".
                              format(f=op.basename(__file__), cmd=cmd))
         cmd_str = obj.cmd_str()
         logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
         obj.run()
     except:
         logging.exception("Exiting {cmd_str} with return code 1.".
                           format(cmd_str=cmd_str))
         return 1
     return 0
예제 #11
0
def args_runner(args):
    """args runner"""
    logging.info("%s arguments are:\n%s\n", __file__, args)

    # sanity check arguments
    _sanity_check_args(args)

    # make option objects
    ice_opts = IceOptions(quiver=args.quiver,
                          use_finer_qv=args.use_finer_qv,
                          targeted_isoseq=args.targeted_isoseq,
                          ece_penalty=args.ece_penalty,
                          ece_min_len=args.ece_min_len,
                          flnc_reads_per_split=args.flnc_reads_per_split,
                          nfl_reads_per_split=args.nfl_reads_per_split)
    sge_opts = SgeOptions(unique_id=args.unique_id,
                          use_sge=args.use_sge,
                          max_sge_jobs=args.max_sge_jobs,
                          blasr_nproc=args.blasr_nproc,
                          quiver_nproc=args.quiver_nproc,
                          gcon_nproc=args.gcon_nproc,
                          sge_env_name=args.sge_env_name,
                          sge_queue=args.sge_queue)
    ipq_opts = IceQuiverHQLQOptions(
        qv_trim_5=args.qv_trim_5,
        qv_trim_3=args.qv_trim_3,
        hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)

    # (1) separate flnc reads into bins
    logging.info("Separating FLNC reads into bins.")
    tofu_f = TofuFiles(tofu_dir=args.tofu_dir)
    s = SeparateFLNCRunner(flnc_fa=args.flnc_fa,
                           root_dir=args.tofu_dir,
                           out_pickle=tofu_f.separate_flnc_pickle,
                           bin_size_kb=args.bin_size_kb,
                           bin_by_primer=args.bin_by_primer,
                           bin_manual=args.bin_manual,
                           max_base_limit_MB=args.max_base_limit_MB)
    s.run()

    flnc_files = SeparateFLNCBase.convert_pickle_to_sorted_flnc_files(
        tofu_f.separate_flnc_pickle)
    logging.info("Separated FLNC reads bins are %s", flnc_files)

    # (2) apply 'pbtranscript cluster' to each bin
    # run ICE/Quiver (the whole thing), providing the fasta_fofn
    logging.info("Running ICE/Polish on separated FLNC reads bins.")
    split_dirs = []
    for flnc_file in flnc_files:
        split_dir = op.join(realpath(op.dirname(flnc_file)), "cluster_out")
        mkdir(split_dir)
        split_dirs.append(split_dir)
        cur_out_cons = op.join(split_dir, "consensus_isoforms.fasta")

        ipq_f = IceQuiverPostprocess(root_dir=split_dir, ipq_opts=ipq_opts)
        if op.exists(ipq_f.quivered_good_fq):
            logging.warning("HQ polished isoforms %s already exist. SKIP!",
                            ipq_f.quivered_good_fq)
            continue
        else:
            logging.info("Running ICE/Quiver on %s", split_dir)
            rmpath(cur_out_cons)

        obj = Cluster(root_dir=split_dir,
                      flnc_fa=flnc_file,
                      nfl_fa=args.nfl_fa,
                      bas_fofn=args.bas_fofn,
                      ccs_fofn=args.ccs_fofn,
                      fasta_fofn=args.fasta_fofn,
                      out_fa=cur_out_cons,
                      sge_opts=sge_opts,
                      ice_opts=ice_opts,
                      ipq_opts=ipq_opts)

        if args.mem_debug:  # DEBUG
            from memory_profiler import memory_usage
            start_t = time.time()
            mem_usage = memory_usage(obj.run, interval=60)
            end_t = time.time()
            with open('mem_debug.log', 'a') as f:
                f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(
                    split_dir, end_t - start_t))
                f.write("Maximum memory usage: {0}\n".format(max(mem_usage)))
                f.write("Memory usage: {0}\n".format(mem_usage))
        else:
            obj.run()

        if not args.keep_tmp_files:  # by deafult, delete all tempory files.
            logging.info("Deleting %s", ipq_f.tmp_dir)
            subprocess.Popen(['rm', '-rf', '%s' % ipq_f.tmp_dir])
            logging.info("Deleting %s", ipq_f.quivered_dir)
            subprocess.Popen(['rm', '-rf', '%s' % ipq_f.quivered_dir])

    # (3) merge polished isoform cluster from all bins
    logging.info("Merging isoforms from all bins to %s.", tofu_f.combined_dir)
    c = CombineRunner(combined_dir=tofu_f.combined_dir,
                      sample_name=get_sample_name(args.sample_name),
                      split_dirs=split_dirs,
                      ipq_opts=ipq_opts)
    c.run()
    if args.summary_fn is not None:
        ln(tofu_f.all_cluster_summary_fn, args.summary_fn)
    if args.report_fn is not None:
        ln(tofu_f.all_cluster_report_fn, args.report_fn)

    # (4) map HQ isoforms to GMAP reference genome
    map_isoforms_and_sort(input_filename=tofu_f.all_hq_fq,
                          sam_filename=tofu_f.sorted_gmap_sam,
                          gmap_db_dir=args.gmap_db,
                          gmap_db_name=args.gmap_name,
                          gmap_nproc=args.gmap_nproc)

    # (5) post mapping to genome analysis, including
    #     * collapse polished HQ isoform clusters into groups
    #     * count abundance of collapsed isoform groups
    #     * filter collapsed isoforms based on abundance info
    logging.info("Post mapping to genome analysis.")
    out_isoforms = args.collapsed_filtered_fn
    if any(out_isoforms.endswith(ext) for ext in (".fa", ".fasta")):
        in_isoforms = tofu_f.all_hq_fa
    elif any(out_isoforms.endswith(ext) for ext in (".fq", ".fastq")):
        in_isoforms = tofu_f.all_hq_fq
    else:
        raise ValueError("Output file %s must be FASTA or FASTQ!" %
                         out_isoforms)

    post_mapping_to_genome_runner(in_isoforms=in_isoforms,
                                  in_sam=tofu_f.sorted_gmap_sam,
                                  in_pickle=tofu_f.hq_lq_prefix_dict_pickle,
                                  out_isoforms=args.collapsed_filtered_fn,
                                  out_gff=args.gff_fn,
                                  out_abundance=args.abundance_fn,
                                  out_group=args.group_fn,
                                  out_read_stat=args.read_stat_fn,
                                  min_aln_coverage=args.min_aln_coverage,
                                  min_aln_identity=args.min_aln_identity,
                                  min_flnc_coverage=args.min_flnc_coverage,
                                  max_fuzzy_junction=args.max_fuzzy_junction,
                                  allow_extra_5exon=args.allow_extra_5exon,
                                  min_count=args.min_count)

    return 0
예제 #12
0
 def test_sanity_check_sge(self):
     """sanity_check_sge."""
     self.assertTrue(IceUtils.sanity_check_sge(SgeOptions(100),
                                               self.outDir))
예제 #13
0
 def tes_daligner_against_ref_use_sge(self):
     """Test daligner_against_ref() using fake prob model on sge."""
     test_name = "test_daligner_against_ref_use_sge"
     self._test_daligner_against_ref(test_name=test_name,
                                     use_sge=True,
                                     sge_opts=SgeOptions())