Esempio n. 1
0
    def run(self):
        """Execute ice_partial.py all|split|i|merge."""
        cmd = self.args.subCommand
        logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                      cmd=cmd,
                                                      v=get_version()))
        cmd_str = ""
        try:
            args = self.args
            obj = None
            if cmd == "all":
                sge_opts = SgeOptions(unique_id=args.unique_id,
                                      use_sge=args.use_sge,
                                      max_sge_jobs=args.max_sge_jobs,
                                      blasr_nproc=args.blasr_nproc)
                obj = IceAllPartials(
                    root_dir=args.root_dir,
                    fasta_filenames=args.fasta_filenames.split(','),
                    ref_fasta=args.ref_fasta,
                    out_pickle=args.out_pickle,
                    sge_opts=sge_opts,
                    sa_file=args.sa_file,
                    ccs_fofn=args.ccs_fofn)

            elif cmd == "one":
                # Only assign nfl reads in the given input_fasta file to isoforms
                obj = IcePartialOne(input_fasta=args.input_fasta,
                                    ref_fasta=args.ref_fasta,
                                    out_pickle=args.out_pickle,
                                    sa_file=args.sa_file,
                                    ccs_fofn=args.ccs_fofn,
                                    done_filename=args.done_filename,
                                    blasr_nproc=args.blasr_nproc,
                                    use_finer_qv=args.use_finer_qv)
            elif cmd == "split":
                obj = IcePartialSplit(root_dir=args.root_dir,
                                      nfl_fa=args.nfl_fa,
                                      N=args.N)
            elif cmd == "i":
                obj = IcePartialI(root_dir=args.root_dir,
                                  i=args.i,
                                  ccs_fofn=args.ccs_fofn,
                                  blasr_nproc=args.blasr_nproc)
            elif cmd == "merge":
                obj = IcePartialMerge(root_dir=args.root_dir, N=args.N)
            else:
                raise ValueError(
                    "Unknown command passed to {f}: {cmd}.".format(
                        f=op.basename(__file__), cmd=cmd))

            cmd_str = obj.cmd_str()
            logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
            obj.run()
        except:
            logging.exception("Exiting {cmd_str} with return code 1.".format(
                cmd_str=cmd_str))
            return 1
        return 0
Esempio n. 2
0
    def run(self):
        """Execute ice_partial.py all|split|i|merge."""
        cmd = self.args.subCommand
        logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                      cmd=cmd, v=get_version()))
        cmd_str = ""
        try:
            args = self.args
            obj = None
            if cmd == "all":
                sge_opts = SgeOptions(unique_id=args.unique_id,
                                      use_sge=args.use_sge,
                                      max_sge_jobs=args.max_sge_jobs,
                                      blasr_nproc=args.blasr_nproc)
                obj = IceAllPartials(root_dir=args.root_dir,
                                     fasta_filenames=args.fasta_filenames.split(','),
                                     ref_fasta=args.ref_fasta,
                                     out_pickle=args.out_pickle,
                                     sge_opts=sge_opts,
                                     sa_file=args.sa_file,
                                     ccs_fofn=args.ccs_fofn)

            elif cmd == "one":
                # Only assign nfl reads in the given input_fastq file to isoforms
                obj = IcePartialOne(input_fastq=args.input_fastq,
                                    ref_fasta=args.ref_fasta,
                                    out_pickle=args.out_pickle,
                                    sa_file=args.sa_file,
                                    ccs_fofn=args.ccs_fofn,
                                    done_filename=args.done_filename,
                                    blasr_nproc=args.blasr_nproc,
                                    use_finer_qv=args.use_finer_qv)
            elif cmd == "split":
                obj = IcePartialSplit(root_dir=args.root_dir,
                                      nfl_fa=args.nfl_fa,
                                      N=args.N)
            elif cmd == "i":
                obj = IcePartialI(root_dir=args.root_dir, i=args.i,
                                  ccs_fofn=args.ccs_fofn,
                                  blasr_nproc=args.blasr_nproc)
            elif cmd == "merge":
                obj = IcePartialMerge(root_dir=args.root_dir,
                                      N=args.N)
            else:
                raise ValueError("Unknown command passed to {f}: {cmd}.".
                                 format(f=op.basename(__file__), cmd=cmd))

            cmd_str = obj.cmd_str()
            logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
            obj.run()
        except:
            logging.exception("Exiting {cmd_str} with return code 1.".
                              format(cmd_str=cmd_str))
            return 1
        return 0
Esempio n. 3
0
 def run(self):
     """Run"""
     logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                     v=get_version()))
     args = self.args
     try:
         convert_fofn_to_fasta(fofn_filename=args.input_fofn,
                               out_filename=args.fasta_fofn,
                               fasta_out_dir=args.fasta_out_dir,
                               force_overwrite=False)
     except Exception as e:
         logging.error(str(e))
         return 1
     return 0
Esempio n. 4
0
 def run(self):
     """Run"""
     logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                             v=get_version()))
     cmd_str = ""
     try:
         args = self.args
         iceqm = IceQuiverMerge(root_dir=args.root_dir, N=args.N)
         cmd_str = iceqm.cmd_str()
         iceqm.run()
     except:
         logging.exception("Exiting {cmd_str} with return code 1.".
                           format(cmd_str=cmd_str))
         return 1
     return 0
Esempio n. 5
0
 def run(self):
     """Run"""
     logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                             v=get_version()))
     cmd_str = ""
     try:
         args = self.args
         iceqm = IceQuiverMerge(root_dir=args.root_dir, N=args.N)
         cmd_str = iceqm.cmd_str()
         iceqm.run()
     except:
         logging.exception("Exiting {cmd_str} with return code 1.".format(
             cmd_str=cmd_str))
         return 1
     return 0
Esempio n. 6
0
def main():
    """Main function, split a fasta into smaller chunks."""
    import logging
    from pbtools.pbtranscript.__init__ import get_version
    log = logging.getLogger(__name__)
    args = get_args()
    from pbtools.pbtranscript.Utils import setup_log
    setup_log(alog=log, level=logging.DEBUG)
    log.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                        v=get_version()))

    splitFasta(input_fasta=args.input_fasta,
               reads_per_split=args.reads_per_split,
               out_dir=args.out_dir,
               out_prefix=args.out_prefix)
Esempio n. 7
0
def main():
    """Main function, split a fasta into smaller chunks."""
    import logging
    from pbtools.pbtranscript.__init__ import get_version
    log = logging.getLogger(__name__)
    args = get_args()
    from pbtools.pbtranscript.Utils import setup_log
    setup_log(alog=log, level=logging.DEBUG)
    log.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                        v=get_version()))

    splitFasta(input_fasta=args.input_fasta,
               reads_per_split=args.reads_per_split,
               out_dir=args.out_dir,
               out_prefix=args.out_prefix)
Esempio n. 8
0
def set_parser():
    """Set up and return argument parser."""
    parser = ArgumentParser()
    parser.add_argument("input_fasta",
                        help="Input fasta filename")
    parser.add_argument("output_prefix",
                        help="Output filename prefix (ex: g_consensus)")
    parser.add_argument("consensus_id",
                        help="Consensus sequence ID name (ex: consensus)")
    parser.add_argument("--nproc",
                        default=8, type=int,
                        help="Number of processes")
    parser.add_argument("--maxScore", default=-1000, type=int,
                        help="blasr maxScore")
    parser.add_argument("--version", "-v",
                        action='version', version='%(prog)s ' + get_version())
    return parser
Esempio n. 9
0
def set_parser():
    """Set up and return argument parser."""
    parser = ArgumentParser()
    parser.add_argument("input_fasta", help="Input fasta filename")
    parser.add_argument("output_prefix",
                        help="Output filename prefix (ex: g_consensus)")
    parser.add_argument("consensus_id",
                        help="Consensus sequence ID name (ex: consensus)")
    parser.add_argument("--nproc",
                        default=8,
                        type=int,
                        help="Number of processes")
    parser.add_argument("--maxScore",
                        default=-1000,
                        type=int,
                        help="blasr maxScore")
    parser.add_argument("--version",
                        "-v",
                        action='version',
                        version='%(prog)s ' + get_version())
    return parser
Esempio n. 10
0
    def run(self):
        """Execute ice_fa2fq.py."""
        logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                                v=get_version()))
        cmd_str = ""
        try:
            args = self.args
            in_fa, ccs_fofn, out_fq = args.in_fa, args.ccs_fofn, \
                                      args.out_fq

            self.validate_inputs(in_fa=in_fa, ccs_fofn=ccs_fofn)

            cmd_str = self.cmd_str(in_fa=in_fa,
                                   ccs_fofn=ccs_fofn,
                                   out_fq=out_fq)

            ice_fa2fq(in_fa=in_fa, ccs_fofn=ccs_fofn, out_fq=out_fq)

        except:
            logging.exception(
                "Exiting {cmd} with return code 1.".format(cmd=cmd_str))
            return 1
        return 0
Esempio n. 11
0
    def run(self):
        """Execute ice_fa2fq.py."""
        logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
                                                v=get_version()))
        cmd_str = ""
        try:
            args = self.args
            in_fa, ccs_fofn, out_fq = args.in_fa, args.ccs_fofn, \
                                      args.out_fq

            self.validate_inputs(in_fa=in_fa,
                                 ccs_fofn=ccs_fofn)

            cmd_str = self.cmd_str(in_fa=in_fa, ccs_fofn=ccs_fofn,
                                   out_fq=out_fq)

            ice_fa2fq(in_fa=in_fa, ccs_fofn=ccs_fofn, out_fq=out_fq)

        except:
            logging.exception("Exiting {cmd} with return code 1.".
                              format(cmd=cmd_str))
            return 1
        return 0
Esempio n. 12
0
def tofu_wrap_main():
    parser = argparse.ArgumentParser(prog='tofu_wrap')
    add_cluster_arguments(parser, show_sge_env_name=True, show_sge_queue=True)

    parser.add_argument("--bin_size_kb", default=1, type=int, help="Bin size by kb (default: 1)")
    parser.add_argument("--bin_manual", default=None, help="Bin manual (ex: (1,2,3,5)), overwrites bin_size_kb")
    parser.add_argument("--bin_by_primer", default=False, action="store_true", help="Instead of binning by size, bin by primer (overwrites --bin_size_kb and --bin_manual)")
    parser.add_argument("--max_base_limit_MB", default=600, type=int, help="Maximum number of bases per partitioned bin, in MB (default: 600)")
    parser.add_argument("--gmap_name", default="hg19", help="GMAP DB name (default: hg19)")
    parser.add_argument("--gmap_db", default="/home/UNIXHOME/etseng/share/gmap_db_new/", help="GMAP DB location (default: /home/UNIXHOME/etseng/share/gmap_db_new/)")
    parser.add_argument("--output_seqid_prefix", type=str, default=None, help="Output seqid prefix. If not given, a random ID is generated")
    parser.add_argument("--mem_debug", default=False, action="store_true", help=argparse.SUPPRESS)
    parser.add_argument("--max_fuzzy_junction", default=5, type=int, help="Max fuzzy junction (default: 5 bp)")
    parser.add_argument("--version", action='version', version='%(prog)s ' + str(get_version()))
    args = parser.parse_args()

    # PRINT VERSION AND EXIT
#    if args.version:
#        print >> sys.stderr, get_version()
#        sys.exit(0)
    # DEBUG
    if args.mem_debug:
        from memory_profiler import memory_usage
    
    # #################################################################
    # SANITY CHECKS
    if not args.quiver:
        print >> sys.stderr, "--quiver must be turned on for tofu_wrap. Quit."
        sys.exit(-1)
    if args.nfl_fa is None:
        print >> sys.stderr, "--nfl_fa must be provided for tofu_wrap. Quit."
        sys.exit(-1)
    if not os.path.exists(args.gmap_db):
        print >> sys.stderr, "GMAP DB location not valid: {0}. Quit.".format(args.gmap_db)
        sys.exit(-1)
    if not os.path.exists(os.path.join(args.gmap_db, args.gmap_name)):
        print >> sys.stderr, "GMAP name not valid: {0}. Quit.".format(args.gmap_name)
        sys.exit(-1)
    # #################################################################

    tofu_prefix = binascii.b2a_hex(os.urandom(3)) if args.output_seqid_prefix is None else args.output_seqid_prefix

    ice_opts = IceOptions(quiver=args.quiver,
            use_finer_qv=args.use_finer_qv,
            targeted_isoseq=args.targeted_isoseq,
            ece_penalty=args.ece_penalty,
            ece_min_len=args.ece_min_len,
    )
    sge_opts = SgeOptions(unique_id=args.unique_id,
            use_sge=args.use_sge,
            max_sge_jobs=args.max_sge_jobs,
            blasr_nproc=args.blasr_nproc,
            quiver_nproc=args.quiver_nproc,
            gcon_nproc=args.gcon_nproc,
            sge_env_name=args.sge_env_name,
            sge_queue=args.sge_queue)
    ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5,
            qv_trim_3=args.qv_trim_3,
            hq_quiver_min_accuracy=args.hq_quiver_min_accuracy,
            hq_isoforms_fa=args.hq_isoforms_fa,
            hq_isoforms_fq=args.hq_isoforms_fq,
            lq_isoforms_fa=args.lq_isoforms_fa,
            lq_isoforms_fq=args.lq_isoforms_fq)

    # ex: all_quivered_hq.100_30_0.99.fastq
    quiver_hq_filename = "all_quivered_hq.{0}_{1}_{2:.2f}.fastq".format(\
            args.qv_trim_5,args.qv_trim_3,args.hq_quiver_min_accuracy)
    quiver_lq_filename = "all_quivered_lq.fastq"

    # (1) separate input flnc into size bins or primers
    if args.bin_by_primer:
        split_files = sep_flnc_by_primer(args.flnc_fa, os.path.abspath(args.root_dir))
    else:
        bin_manual = eval(args.bin_manual) if args.bin_manual is not None else None
        split_files = sep_flnc_by_size(args.flnc_fa, args.root_dir, bin_size_kb=args.bin_size_kb, bin_manual=bin_manual, max_base_limit_MB=args.max_base_limit_MB)
    print >> sys.stderr, "split input {0} into {1} bins".format(args.flnc_fa, len(split_files))

    # (2) if fasta_fofn already is there, use it; otherwise make it first
    if args.quiver and args.fasta_fofn is None:
        print >> sys.stderr, "Making fasta_fofn now"
        nfl_dir = os.path.abspath(os.path.join(args.root_dir, "fasta_fofn_files"))
        if not os.path.exists(nfl_dir):
            os.makedirs(nfl_dir)
        args.fasta_fofn = os.path.join(nfl_dir, 'input.fasta.fofn')
        print >> sys.stderr, "fasta_fofn", args.fasta_fofn
        print >> sys.stderr, "nfl_dir", nfl_dir
        convert_fofn_to_fasta(fofn_filename=args.bas_fofn,
                            out_filename=args.fasta_fofn,
                            fasta_out_dir=nfl_dir,
                            cpus=args.blasr_nproc)
    else:
        if not os.path.exists(args.fasta_fofn):
            raise Exception, "fasta_fofn {0} does not exist!".format(args.fasta_fofn)
        for line in open(args.fasta_fofn):
            file = line.strip()
            if len(file) > 0 and not os.path.exists(file):
                raise Exception, "File {0} does not exists in {1}".format(file, args.fasta_fofn)

    # (3) run ICE/Quiver (the whole thing), providing the fasta_fofn
    split_dirs = []
    for cur_file in split_files:
        cur_dir = os.path.abspath(os.path.dirname(cur_file))
        split_dirs.append(cur_dir)
        cur_out_cons = os.path.join(cur_dir, args.consensusFa)
        
        hq_quiver = os.path.join(cur_dir, quiver_hq_filename)
        if os.path.exists(hq_quiver):
            print >> sys.stderr, "{0} already exists. SKIP!".format(hq_quiver)
            continue
        print >> sys.stderr, "running ICE/Quiver on", cur_dir
        start_t = time.time()

        obj = Cluster(root_dir=cur_dir,
                flnc_fa=cur_file,
                nfl_fa=realpath(args.nfl_fa),
                bas_fofn=realpath(args.bas_fofn),
                ccs_fofn=realpath(args.ccs_fofn),
                fasta_fofn=realpath(args.fasta_fofn),
                out_fa=cur_out_cons,
                sge_opts=sge_opts,
                ice_opts=ice_opts,
                ipq_opts=ipq_opts,
                report_fn=args.report_fn,
                summary_fn=args.summary_fn,
                nfl_reads_per_split=args.nfl_reads_per_split)
        
        # DEBUG
        if args.mem_debug: 
            mem_usage = memory_usage(obj.run, interval=60)
            end_t = time.time()
            with open('mem_debug.log', 'a') as f:
                f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(cur_dir, end_t-start_t))
                f.write("Maximum memory usage: {0}\n".format(max(mem_usage)))
                f.write("Memory usage: {0}\n".format(mem_usage))
        else:
            obj.run()

    combined_dir = os.path.join(args.root_dir, 'combined')
    if not os.path.exists(combined_dir):
        os.makedirs(combined_dir)
    # (4) combine quivered HQ/LQ results
    hq_filename, lq_filename, hq_pre_dict, lq_pre_dict = \
            combine_quiver_results(split_dirs, combined_dir, quiver_hq_filename, quiver_lq_filename,\
            tofu_prefix)
    with open(os.path.join(args.root_dir, 'combined', 'combined.hq_lq_pre_dict.pickle'), 'w') as f:
        dump({'HQ': hq_pre_dict, 'LQ': lq_pre_dict}, f)
    # (5) collapse quivered HQ results
    collapse_prefix_hq = run_collapse_sam(hq_filename, args.gmap_db, args.gmap_name, cpus=args.blasr_nproc, max_fuzzy_junction=args.max_fuzzy_junction, dun_merge_5_shorter=True)
    # (6) make abundance 
    get_abundance(collapse_prefix_hq, hq_pre_dict, collapse_prefix_hq)
    # (7) run filtering & removing subsets in no5merge
    if args.targeted_isoseq:
        run_filtering_by_count(collapse_prefix_hq, collapse_prefix_hq+'.min_fl_5', min_count=5)
        run_filtering_away_subsets(collapse_prefix_hq+'.min_fl_5', collapse_prefix_hq+'.min_fl_5.filtered', args.max_fuzzy_junction)
    else:
        run_filtering_by_count(collapse_prefix_hq, collapse_prefix_hq+'.min_fl_2', min_count=2)
        run_filtering_away_subsets(collapse_prefix_hq+'.min_fl_2', collapse_prefix_hq+'.min_fl_2.filtered', args.max_fuzzy_junction)
Esempio n. 13
0
 def getVersion(self):
     return get_version()
Esempio n. 14
0
def tofu_wrap_main():
    parser = argparse.ArgumentParser(prog='tofu_wrap')
    add_cluster_arguments(parser, show_sge_env_name=True, show_sge_queue=True)

    parser.add_argument("--bin_size_kb", default=1, type=int, help="Bin size by kb (default: 1)")
    parser.add_argument("--bin_manual", default=None, help="Bin manual (ex: (1,2,3,5)), overwrites bin_size_kb")
    parser.add_argument("--bin_by_primer", default=False, action="store_true", help="Instead of binning by size, bin by primer (overwrites --bin_size_kb and --bin_manual)")
    parser.add_argument("--max_base_limit_MB", default=600, type=int, help="Maximum number of bases per partitioned bin, in MB (default: 600)")
    parser.add_argument("--gmap_name", default="hg19", help="GMAP DB name (default: hg19)")
    parser.add_argument("--gmap_db", default="/home/UNIXHOME/etseng/share/gmap_db_new/", help="GMAP DB location (default: /home/UNIXHOME/etseng/share/gmap_db_new/)")
    parser.add_argument("--output_seqid_prefix", type=str, default=None, help="Output seqid prefix. If not given, a random ID is generated")
    parser.add_argument("--mem_debug", default=False, action="store_true", help=argparse.SUPPRESS)
    parser.add_argument("--max_fuzzy_junction", default=5, type=int, help="Max fuzzy junction (default: 5 bp)")
    parser.add_argument("--version", action='version', version='%(prog)s ' + str(get_version()))
    args = parser.parse_args()

    # PRINT VERSION AND EXIT
#    if args.version:
#        print >> sys.stderr, get_version()
#        sys.exit(0)
    # DEBUG
    if args.mem_debug:
        from memory_profiler import memory_usage
    
    # #################################################################
    # SANITY CHECKS
    if not args.quiver:
        print >> sys.stderr, "--quiver must be turned on for tofu_wrap. Quit."
        sys.exit(-1)
    if args.nfl_fa is None:
        print >> sys.stderr, "--nfl_fa must be provided for tofu_wrap. Quit."
        sys.exit(-1)
    if not os.path.exists(args.gmap_db):
        print >> sys.stderr, "GMAP DB location not valid: {0}. Quit.".format(args.gmap_db)
        sys.exit(-1)
    if not os.path.exists(os.path.join(args.gmap_db, args.gmap_name)):
        print >> sys.stderr, "GMAP name not valid: {0}. Quit.".format(args.gmap_name)
        sys.exit(-1)
    # #################################################################

    tofu_prefix = binascii.b2a_hex(os.urandom(3)) if args.output_seqid_prefix is None else args.output_seqid_prefix

    ice_opts = IceOptions(quiver=args.quiver,
            use_finer_qv=args.use_finer_qv,
            targeted_isoseq=args.targeted_isoseq,
            ece_penalty=args.ece_penalty,
            ece_min_len=args.ece_min_len,
    )
    sge_opts = SgeOptions(unique_id=args.unique_id,
            use_sge=args.use_sge,
            max_sge_jobs=args.max_sge_jobs,
            blasr_nproc=args.blasr_nproc,
            quiver_nproc=args.quiver_nproc,
            gcon_nproc=args.gcon_nproc,
            sge_env_name=args.sge_env_name,
            sge_queue=args.sge_queue)
    ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5,
            qv_trim_3=args.qv_trim_3,
            hq_quiver_min_accuracy=args.hq_quiver_min_accuracy,
            hq_isoforms_fa=args.hq_isoforms_fa,
            hq_isoforms_fq=args.hq_isoforms_fq,
            lq_isoforms_fa=args.lq_isoforms_fa,
            lq_isoforms_fq=args.lq_isoforms_fq)

    # ex: all_quivered_hq.100_30_0.99.fastq
    quiver_hq_filename = "all_quivered_hq.{0}_{1}_{2:.2f}.fastq".format(\
            args.qv_trim_5,args.qv_trim_3,args.hq_quiver_min_accuracy)
    quiver_lq_filename = "all_quivered_lq.fastq"

    # (1) separate input flnc into size bins or primers
    if args.bin_by_primer:
        split_files = sep_flnc_by_primer(args.flnc_fa, os.path.abspath(args.root_dir))
    else:
        bin_manual = eval(args.bin_manual) if args.bin_manual is not None else None
        split_files = sep_flnc_by_size(args.flnc_fa, args.root_dir, bin_size_kb=args.bin_size_kb, bin_manual=bin_manual, max_base_limit_MB=args.max_base_limit_MB)
    print >> sys.stderr, "split input {0} into {1} bins".format(args.flnc_fa, len(split_files))

    # (2) if fasta_fofn already is there, use it; otherwise make it first
    if args.quiver and args.fasta_fofn is None:
        print >> sys.stderr, "Making fasta_fofn now"
        nfl_dir = os.path.abspath(os.path.join(args.root_dir, "fasta_fofn_files"))
        if not os.path.exists(nfl_dir):
            os.makedirs(nfl_dir)
        args.fasta_fofn = os.path.join(nfl_dir, 'input.fasta.fofn')
        print >> sys.stderr, "fasta_fofn", args.fasta_fofn
        print >> sys.stderr, "nfl_dir", nfl_dir
        convert_fofn_to_fasta(fofn_filename=args.bas_fofn,
                            out_filename=args.fasta_fofn,
                            fasta_out_dir=nfl_dir,
                            cpus=args.blasr_nproc)
    else:
        if not os.path.exists(args.fasta_fofn):
            raise Exception, "fasta_fofn {0} does not exist!".format(args.fasta_fofn)
        for line in open(args.fasta_fofn):
            file = line.strip()
            if len(file) > 0 and not os.path.exists(file):
                raise Exception, "File {0} does not exists in {1}".format(file, args.fasta_fofn)

    # (3) run ICE/Quiver (the whole thing), providing the fasta_fofn
    split_dirs = []
    for cur_file in split_files:
        cur_dir = os.path.abspath(os.path.dirname(cur_file))
        split_dirs.append(cur_dir)
        cur_out_cons = os.path.join(cur_dir, args.consensusFa)
        
        hq_quiver = os.path.join(cur_dir, quiver_hq_filename)
        if os.path.exists(hq_quiver):
            print >> sys.stderr, "{0} already exists. SKIP!".format(hq_quiver)
            continue
        print >> sys.stderr, "running ICE/Quiver on", cur_dir
        start_t = time.time()

        obj = Cluster(root_dir=cur_dir,
                flnc_fa=cur_file,
                nfl_fa=realpath(args.nfl_fa),
                bas_fofn=realpath(args.bas_fofn),
                ccs_fofn=realpath(args.ccs_fofn),
                fasta_fofn=realpath(args.fasta_fofn),
                out_fa=cur_out_cons,
                sge_opts=sge_opts,
                ice_opts=ice_opts,
                ipq_opts=ipq_opts,
                report_fn=args.report_fn,
                summary_fn=args.summary_fn,
                nfl_reads_per_split=args.nfl_reads_per_split)
        
        # DEBUG
        if args.mem_debug: 
            mem_usage = memory_usage(obj.run, interval=60)
            end_t = time.time()
            with open('mem_debug.log', 'a') as f:
                f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(cur_dir, end_t-start_t))
                f.write("Maximum memory usage: {0}\n".format(max(mem_usage)))
                f.write("Memory usage: {0}\n".format(mem_usage))
        else:
            obj.run()

    combined_dir = os.path.join(args.root_dir, 'combined')
    if not os.path.exists(combined_dir):
        os.makedirs(combined_dir)
    # (4) combine quivered HQ/LQ results
    hq_filename, lq_filename, hq_pre_dict, lq_pre_dict = \
            combine_quiver_results(split_dirs, combined_dir, quiver_hq_filename, quiver_lq_filename,\
            tofu_prefix)
    with open(os.path.join(args.root_dir, 'combined', 'combined.hq_lq_pre_dict.pickle'), 'w') as f:
        dump({'HQ': hq_pre_dict, 'LQ': lq_pre_dict}, f)
    # (5) collapse quivered HQ results
    collapse_prefix_hq = run_collapse_sam(hq_filename, args.gmap_db, args.gmap_name, cpus=args.blasr_nproc, max_fuzzy_junction=args.max_fuzzy_junction)
    # (6) make abundance 
    get_abundance(collapse_prefix_hq, hq_pre_dict, collapse_prefix_hq)
    # (7) run filtering
    run_filtering_by_count(collapse_prefix_hq, collapse_prefix_hq+'.min_fl_2', min_count=2)
Esempio n. 15
0
 def run(self):
     """Execute ice_quiver.py all|i|merge|postprocess."""
     cmd = self.args.subCommand
     logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                   cmd=cmd, v=get_version()))
     cmd_str = ""
     try:
         args = self.args
         obj = None
         if cmd == "all":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverAll(root_dir=args.root_dir,
                                bas_fofn=args.bas_fofn,
                                fasta_fofn=args.fasta_fofn,
                                sge_opts=sge_opts,
                                ipq_opts=ipq_opts)
         elif cmd == "i":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             obj = IceQuiverI(root_dir=args.root_dir, i=args.i, N=args.N,
                              bas_fofn=args.bas_fofn,
                              fasta_fofn=args.fasta_fofn,
                              sge_opts=sge_opts)
         elif cmd == "merge":
             obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N)
         elif cmd == "postprocess":
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverPostprocess(root_dir=args.root_dir,
                                        ipq_opts=ipq_opts,
                                        use_sge=args.use_sge,
                                        quit_if_not_done=args.quit_if_not_done,
                                        summary_fn=args.summary_fn,
                                        report_fn=args.report_fn)
         else:
             raise ValueError("Unknown command passed to {f}: {cmd}.".
                              format(f=op.basename(__file__), cmd=cmd))
         cmd_str = obj.cmd_str()
         logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
         obj.run()
     except:
         logging.exception("Exiting {cmd_str} with return code 1.".
                           format(cmd_str=cmd_str))
         return 1
     return 0
Esempio n. 16
0
 def getVersion(self):
     """Return version string."""
     return get_version()
Esempio n. 17
0
 def run(self):
     """Execute ice_quiver.py all|i|merge|postprocess."""
     cmd = self.args.subCommand
     logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__),
                                                   cmd=cmd,
                                                   v=get_version()))
     cmd_str = ""
     try:
         args = self.args
         obj = None
         if cmd == "all":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverAll(root_dir=args.root_dir,
                                bas_fofn=args.bas_fofn,
                                fasta_fofn=args.fasta_fofn,
                                sge_opts=sge_opts,
                                ipq_opts=ipq_opts)
         elif cmd == "i":
             sge_opts = SgeOptions(unique_id=args.unique_id,
                                   use_sge=args.use_sge,
                                   max_sge_jobs=args.max_sge_jobs,
                                   blasr_nproc=args.blasr_nproc,
                                   quiver_nproc=args.quiver_nproc)
             obj = IceQuiverI(root_dir=args.root_dir,
                              i=args.i,
                              N=args.N,
                              bas_fofn=args.bas_fofn,
                              fasta_fofn=args.fasta_fofn,
                              sge_opts=sge_opts)
         elif cmd == "merge":
             obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N)
         elif cmd == "postprocess":
             ipq_opts = IceQuiverHQLQOptions(
                 hq_isoforms_fa=args.hq_isoforms_fa,
                 hq_isoforms_fq=args.hq_isoforms_fq,
                 lq_isoforms_fa=args.lq_isoforms_fa,
                 lq_isoforms_fq=args.lq_isoforms_fq,
                 qv_trim_5=args.qv_trim_5,
                 qv_trim_3=args.qv_trim_3,
                 hq_quiver_min_accuracy=args.hq_quiver_min_accuracy)
             obj = IceQuiverPostprocess(
                 root_dir=args.root_dir,
                 ipq_opts=ipq_opts,
                 use_sge=args.use_sge,
                 quit_if_not_done=args.quit_if_not_done,
                 summary_fn=args.summary_fn,
                 report_fn=args.report_fn)
         else:
             raise ValueError(
                 "Unknown command passed to {f}: {cmd}.".format(
                     f=op.basename(__file__), cmd=cmd))
         cmd_str = obj.cmd_str()
         logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str))
         obj.run()
     except:
         logging.exception("Exiting {cmd_str} with return code 1.".format(
             cmd_str=cmd_str))
         return 1
     return 0
Esempio n. 18
0
 def _logConfigs(self):
     """Log configuration."""
     with open(self.configFN, "w", 0) as f:
         f.write("pbtranscript " + get_version() + "\n")
         f.write(str(self.ice_opts) + "\n")
         f.write(str(self.sge_opts) + "\n")
Esempio n. 19
0
 def _logConfigs(self):
     """Log configuration."""
     with open(self.configFN, 'w', 0) as f:
         f.write('pbtranscript ' + get_version() + "\n")
         f.write(str(self.ice_opts) + "\n")
         f.write(str(self.sge_opts) + "\n")
Esempio n. 20
0
 def getVersion(self):
     """Get version string"""
     return get_version()
Esempio n. 21
0
 def getVersion(self):
     """Get version string"""
     return get_version()
Esempio n. 22
0
 def getVersion(self):
     return get_version()
Esempio n. 23
0
 def getVersion(self):
     """Return version string."""
     return get_version()