Python runLSF Exemples, broad_helperFunctions.runLSF Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : run_build_DB_FromGTF_by_chr.py Projet : Tmacme/juncBASE

def main():
	
    opt_parser = OptionParser()
   
    # Add Options. Required options should have default=None
    opt_parser.add_option("--initialize",
                          dest="initialize",
                          action="store_true",
                          help="""Will split up the gtf file into separate temp files
                                  and initalize the database.""",
                          default=False)
    opt_parser.add_option("--tmp_dir",
                          dest="tmp_dir",
                          type="string",
                          help="""Directory to place temporary files and to look
                                  for temporary files.""",
                          default=None)
    opt_parser.add_option("--keep_temp",
                          dest="keep_temp",
                          action="store_true",
                          help="""TEMP FILES ARE KEPT FOR NOW. Will keep the temporary gtf files. Default is
                                  to delete them.""",
                          default=False)
    opt_parser.add_option("-g",
                          dest="gtf_file",
                          type="string",
                          help="GTF annotation file.",
                          default=None)
    opt_parser.add_option("--use_gene_name",
                          dest="use_gene_name",
                          action="store_true",
                          help="""By default, the gene_id attribute will be used
                                  for the gene name used in the database, but
                                  the gene_name attribute can be used
                                  instead.""",
                          default=False)
    # May revisit this option, but do not need now
#   opt_parser.add_option("-f",
#                         dest="genome_file_name",
#                         type="string",
#                         help="""Fasta file containing all chromosome
#                                 sequences.  If this option is given, exon and
#                                 intron sequences will be stored in the
#                                 database as well. Chromosome names must be the
#                                 same format as in the gtf file.""",
#                         default=None)
    opt_parser.add_option("-d",
                          dest="db_name",
                          type="string",
                          help="Name of the new database",
                          default=None)
    opt_parser.add_option("--sqlite_db_dir",
                          dest="sqlite_db_dir",
                          type="string",
                          help="Location to put sqlite database. Default=%s" % DB_DIR,
                          default=DB_DIR)
    opt_parser.add_option("-p",
                          dest="num_processes",
                          type="int",
                          help="""Will run getASEventReadCounts.py
                                  simultaneously with this many samples.
                                  Default=%d""" % DEF_NUM_PROCESSES,
                          default=DEF_NUM_PROCESSES)
    opt_parser.add_option("--LSF",
                          dest="run_lsf",
                          action="store_true",
                          help="""Will launch jobs on LSF. Default is running on
                                  local.""",
                          default=False)
    opt_parser.add_option("--force",
                          dest="force",
                          action="store_true",
                          help="""By default, will check for the existence of
                                  the final output before running commands. This
                                  option will force all runs.""",
                          default=False)
    opt_parser.add_option("--check",
                          dest="check",
                          action="store_true",
                          help="""Will check samples that are not done and print
                                  out which need to still be run""",
                         default=False)
    opt_parser.add_option("--print_cmd",
                          dest="print_cmd",
                          action="store_true",
                          help="""Will print commands that will be run, but will
                                  not run them. Used for debugging.""",
                         default=False)


    (options, args) = opt_parser.parse_args()
	
    # validate the command line arguments
    opt_parser.check_required("-g")
    opt_parser.check_required("--tmp_dir")
    opt_parser.check_required("-d")

    gtf_file_name = options.gtf_file
    tmp_dir = formatDir(options.tmp_dir)

    db_name = options.db_name

    sqlite_db_dir = options.sqlite_db_dir

    num_processes = options.num_processes
    run_lsf = options.run_lsf

    force = options.force
    check = options.check
    print_cmd = options.print_cmd

    ##############
    # INITIALIZE #
    ##############

    # If it's initilalizing, split gtf file and initialize database return
    if options.initialize:    
        chr2lines = {}

        gtf_file_path = gtf_file_name
        gtf_file_name = gtf_file_name.split("/")[-1]
        gtf_file_comp = gtf_file_name.split(".")   
        gtf_file_prefix = ".".join(gtf_file_comp[:-1])
 
        gtf_file = open(gtf_file_path)

        for line in gtf_file:
            this_chr = line.split("\t")[0]
            updateDictOfLists(chr2lines, this_chr, line)
        gtf_file.close()

        for chr in chr2lines:
            tmp_chr_file = open("%s/%s_%s.gtf" % (tmp_dir,
                                                  gtf_file_prefix, chr),
                                "w")
            for line in chr2lines[chr]:
                tmp_chr_file.write(line)
            tmp_chr_file.close()

        # Now initialize the database
        cmd = "python %s " % SCRIPT
        cmd += "--initialize -d %s" % db_name
        os.system(cmd)
        
        sys.exit(0)

    ##################
    # BUILD DATABASE #
    ##################
    db = DB(sqlite_db_dir)

    # Use gtf file to figure out temp file names, Build the database from them
    tmp_file_list = []
    
    gtf_file_name = gtf_file_name.split("/")[-1]
    gtf_file_comp = gtf_file_name.split(".")   
    gtf_file_prefix = ".".join(gtf_file_comp[:-1])

    for this_file in os.listdir(tmp_dir):
        if gtf_file_prefix in this_file:
            if this_file == gtf_file_name:
                continue
            tmp_file_list.append(this_file)


    # Now run script for every chromosome file
    ctr = 0
    for tmp_file in tmp_file_list:

        this_chr = getChr(tmp_dir + "/" + tmp_file)

        if (not force) or check:

            # For now, just checks that records exist in the database, It is
            # better to force since it difficult to really know if a chromosome was
            # built or not.
            chr_built = checkChr(db, db_name, this_chr)
            
            if chr_built:
                if not force:
                    continue

            if check:
                if not chr_built:
                    print "Chromosome %s not built" % this_chr
                    continue

        ctr += 1

        cmd = "python %s " % SCRIPT
        cmd += "-g %s/%s " % (tmp_dir, tmp_file)
        cmd += "-d %s " % db_name

        if options.use_gene_name:
            cmd += "--use_gene_name "

        cmd += "--sqlite_db_dir %s" % sqlite_db_dir
        
        if print_cmd:
            print cmd
            continue


        if run_lsf:
            runLSF(cmd,
                   "%s.build_DB.bsub.out" % this_chr,
                   this_chr + "build_DB",
                   "hour")
            continue
        
        if ctr % num_processes == 0:                             
            os.system(cmd)                                       
        else:
            print cmd                                       
            Popen(cmd, shell=True, executable=SHELL)     
    
    # Remove temp files, but first check that exons are returned from the same
    # chromosome in the database
#    if not options.keep_temp:
			
    sys.exit(0)

Exemple #2

0

Afficher le fichier

Fichier : run_build_DB_FromGTF_by_chr.py Projet : noahpieta/juncBASE

def main():

    opt_parser = OptionParser()

    # Add Options. Required options should have default=None
    opt_parser.add_option(
        "--initialize",
        dest="initialize",
        action="store_true",
        help="""Will split up the gtf file into separate temp files
                                  and initalize the database.""",
        default=False)
    opt_parser.add_option(
        "--tmp_dir",
        dest="tmp_dir",
        type="string",
        help="""Directory to place temporary files and to look
                                  for temporary files.""",
        default=None)
    opt_parser.add_option(
        "--keep_temp",
        dest="keep_temp",
        action="store_true",
        help=
        """TEMP FILES ARE KEPT FOR NOW. Will keep the temporary gtf files. Default is
                                  to delete them.""",
        default=False)
    opt_parser.add_option("-g",
                          dest="gtf_file",
                          type="string",
                          help="GTF annotation file.",
                          default=None)
    opt_parser.add_option(
        "--use_gene_name",
        dest="use_gene_name",
        action="store_true",
        help="""By default, the gene_id attribute will be used
                                  for the gene name used in the database, but
                                  the gene_name attribute can be used
                                  instead.""",
        default=False)
    # May revisit this option, but do not need now
    #   opt_parser.add_option("-f",
    #                         dest="genome_file_name",
    #                         type="string",
    #                         help="""Fasta file containing all chromosome
    #                                 sequences.  If this option is given, exon and
    #                                 intron sequences will be stored in the
    #                                 database as well. Chromosome names must be the
    #                                 same format as in the gtf file.""",
    #                         default=None)
    opt_parser.add_option("-d",
                          dest="db_name",
                          type="string",
                          help="Name of the new database",
                          default=None)
    opt_parser.add_option("--sqlite_db_dir",
                          dest="sqlite_db_dir",
                          type="string",
                          help="Location to put sqlite database. Default=%s" %
                          DB_DIR,
                          default=DB_DIR)
    opt_parser.add_option("-p",
                          dest="num_processes",
                          type="int",
                          help="""Will run getASEventReadCounts.py
                                  simultaneously with this many samples.
                                  Default=%d""" % DEF_NUM_PROCESSES,
                          default=DEF_NUM_PROCESSES)
    opt_parser.add_option(
        "--LSF",
        dest="run_lsf",
        action="store_true",
        help="""Will launch jobs on LSF. Default is running on
                                  local.""",
        default=False)
    opt_parser.add_option("--force",
                          dest="force",
                          action="store_true",
                          help="""By default, will check for the existence of
                                  the final output before running commands. This
                                  option will force all runs.""",
                          default=False)
    opt_parser.add_option(
        "--check",
        dest="check",
        action="store_true",
        help="""Will check samples that are not done and print
                                  out which need to still be run""",
        default=False)
    opt_parser.add_option(
        "--print_cmd",
        dest="print_cmd",
        action="store_true",
        help="""Will print commands that will be run, but will
                                  not run them. Used for debugging.""",
        default=False)

    (options, args) = opt_parser.parse_args()

    # validate the command line arguments
    opt_parser.check_required("-g")
    opt_parser.check_required("--tmp_dir")
    opt_parser.check_required("-d")

    gtf_file_name = options.gtf_file
    tmp_dir = formatDir(options.tmp_dir)

    db_name = options.db_name

    sqlite_db_dir = options.sqlite_db_dir

    num_processes = options.num_processes
    run_lsf = options.run_lsf

    force = options.force
    check = options.check
    print_cmd = options.print_cmd

    ##############
    # INITIALIZE #
    ##############

    # If it's initilalizing, split gtf file and initialize database return
    if options.initialize:
        chr2lines = {}

        gtf_file_path = gtf_file_name
        gtf_file_name = gtf_file_name.split("/")[-1]
        gtf_file_comp = gtf_file_name.split(".")
        gtf_file_prefix = ".".join(gtf_file_comp[:-1])

        gtf_file = open(gtf_file_path)

        for line in gtf_file:
            this_chr = line.split("\t")[0]
            updateDictOfLists(chr2lines, this_chr, line)
        gtf_file.close()

        for chr in chr2lines:
            tmp_chr_file = open(
                "%s/%s_%s.gtf" % (tmp_dir, gtf_file_prefix, chr), "w")
            for line in chr2lines[chr]:
                tmp_chr_file.write(line)
            tmp_chr_file.close()

        # Now initialize the database
        cmd = "python %s " % SCRIPT
        cmd += "--initialize -d %s" % db_name
        os.system(cmd)

        sys.exit(0)

    ##################
    # BUILD DATABASE #
    ##################
    db = DB(sqlite_db_dir)

    # Use gtf file to figure out temp file names, Build the database from them
    tmp_file_list = []

    gtf_file_name = gtf_file_name.split("/")[-1]
    gtf_file_comp = gtf_file_name.split(".")
    gtf_file_prefix = ".".join(gtf_file_comp[:-1])

    for this_file in os.listdir(tmp_dir):
        if gtf_file_prefix in this_file:
            if this_file == gtf_file_name:
                continue
            tmp_file_list.append(this_file)

    # Now run script for every chromosome file
    ctr = 0
    for tmp_file in tmp_file_list:

        this_chr = getChr(tmp_dir + "/" + tmp_file)

        if (not force) or check:

            # For now, just checks that records exist in the database, It is
            # better to force since it difficult to really know if a chromosome was
            # built or not.
            chr_built = checkChr(db, db_name, this_chr)

            if chr_built:
                if not force:
                    continue

            if check:
                if not chr_built:
                    print "Chromosome %s not built" % this_chr
                    continue

        ctr += 1

        cmd = "python %s " % SCRIPT
        cmd += "-g %s/%s " % (tmp_dir, tmp_file)
        cmd += "-d %s " % db_name

        if options.use_gene_name:
            cmd += "--use_gene_name "

        cmd += "--sqlite_db_dir %s" % sqlite_db_dir

        if print_cmd:
            print cmd
            continue

        if run_lsf:
            runLSF(cmd, "%s.build_DB.bsub.out" % this_chr,
                   this_chr + "build_DB", "hour")
            continue

        if ctr % num_processes == 0:
            os.system(cmd)
        else:
            print cmd
            Popen(cmd, shell=True, executable=SHELL)

    # Remove temp files, but first check that exons are returned from the same
    # chromosome in the database
#    if not options.keep_temp:

    sys.exit(0)

Exemple #3

0

Afficher le fichier

Fichier : runCufflinks.py Projet : Tmacme/juncBASE

def main():

    opt_parser = OptionParser()

    # Add Options. Required options should have default=None
    opt_parser.add_option(
        "-i",
        dest="input",
        type="string",
        help="""Tab-delimited file that specifies sample name
                                  and bam location""",
        default=None,
    )
    opt_parser.add_option(
        "--force",
        dest="force",
        action="store_true",
        help="""By default, will only run Cufflinks if no
                                  output file exists. This option forces the
                                  runs on every sample.""",
        default=False,
    )
    opt_parser.add_option(
        "--txt_ref", dest="txt_ref", type="string", help="Transcript reference used for assembly.", default=None
    )
    opt_parser.add_option(
        "--quantitate",
        dest="quantitate",
        action="store_true",
        help="""Will quantitate against reference transcript
                                  annotations instead of assembly""",
        default=False,
    )
    opt_parser.add_option(
        "--out_dir", dest="out_dir", type="string", help="Root output directory of cufflinks runs", default=None
    )
    opt_parser.add_option(
        "--check",
        dest="check",
        action="store_true",
        help="""Will check samples that are not done and print
                                  out which need to still be run""",
        default=False,
    )
    opt_parser.add_option(
        "--num_processes",
        dest="num_processes",
        type="int",
        help="""If running locally, indicate the number of
                                  processes to batch. Def=%d"""
        % DEF_NUM_PROCESSES,
        default=None,
    )
    opt_parser.add_option(
        "--nice", dest="nice", action="store_true", help="If running locally, run using nice", default=False
    )
    opt_parser.add_option("--LSF", dest="run_lsf", action="store_true", help="Run through LSF", default=None)
    opt_parser.add_option(
        "--print_cmd",
        dest="print_cmd",
        action="store_true",
        help="Print the commands to run, but do not run",
        default=False,
    )

    (options, args) = opt_parser.parse_args()

    # validate the command line arguments
    opt_parser.check_required("-i")
    opt_parser.check_required("--out_dir")
    opt_parser.check_required("--txt_ref")

    out_dir = formatDir(options.out_dir)
    if not os.path.exists(out_dir):
        print "Output directory does not exist: %s" % out_dir
        opt_parser.print_help()
        sys.exit(1)

    num_processes = options.num_processes
    run_lsf = options.run_lsf
    nice = options.nice

    quantitate = options.quantitate

    print_cmd = options.print_cmd

    force = options.force
    check = options.check

    bsub_options = '#!/bin/tcsh\n#BSUB -q week\n#BSUB -R "rusage[mem=8]"\n'
    bsub_options += "#BSUB -P cgafolk\n"

    input = open(options.input)

    ctr = 0
    for line in input:
        line = formatLine(line)

        s_id, bam = line.split("\t")

        # Make subdir
        subdir = out_dir + "/" + s_id + "_cufflinks"
        if not os.path.exists(subdir):
            os.mkdir(subdir)

        # Check for existence
        file_is_present = False
        try:
            if os.path.getsize(subdir + "/transcripts.gtf") == 0:
                if check:
                    print "Need to run %s" % s_id
            else:
                file_is_present = True
        except:  # File doesn't exist
            if check:
                print "Need to run %s" % s_id

        if check:
            continue

        if not force:
            if file_is_present:
                continue

        ctr += 1

        cmd = "%s -o %s " % (CUFF_EXEC, subdir)

        if quantitate:
            cmd += "-G %s " % options.txt_ref
        else:
            cmd += "-g %s " % options.txt_ref
        cmd += "-u %s" % bam

        if num_processes:
            if nice:
                cmd = "nice " + cmd

            if print_cmd:
                print cmd
                continue

            if ctr % num_processes == 0:
                print cmd
                os.system(cmd)
            else:
                print cmd
                Popen(cmd, shell=True, executable=SHELL)
        else:

            if print_cmd:
                print cmd
                continue

            tmp_file = "%s/tmp_cuff_%s.txt" % (os.curdir, s_id)
            runLSF(cmd, "%s.cufflinks.bsub.out" % s_id, "cuff_%s" % s_id, "week", tmp_file_name=tmp_file)

    input.close()

    sys.exit(0)

Exemple #4

0

Afficher le fichier

def main():

    opt_parser = OptionParser()

    # Add Options. Required options should have default=None
    opt_parser.add_option("-i",
                          dest="input",
                          type="string",
                          help="""Tab-delimited file that specifies sample name
                                  and bam location""",
                          default=None)
    opt_parser.add_option("--force",
                          dest="force",
                          action="store_true",
                          help="""By default, will only run Cufflinks if no
                                  output file exists. This option forces the
                                  runs on every sample.""",
                          default=False)
    opt_parser.add_option("--txt_ref",
                          dest="txt_ref",
                          type="string",
                          help="Transcript reference used for assembly.",
                          default=None)
    opt_parser.add_option("--quantitate",
                          dest="quantitate",
                          action="store_true",
                          help="""Will quantitate against reference transcript
                                  annotations instead of assembly""",
                          default=False)
    opt_parser.add_option("--out_dir",
                          dest="out_dir",
                          type="string",
                          help="Root output directory of cufflinks runs",
                          default=None)
    opt_parser.add_option(
        "--check",
        dest="check",
        action="store_true",
        help="""Will check samples that are not done and print
                                  out which need to still be run""",
        default=False)
    opt_parser.add_option("--num_processes",
                          dest="num_processes",
                          type="int",
                          help="""If running locally, indicate the number of
                                  processes to batch. Def=%d""" %
                          DEF_NUM_PROCESSES,
                          default=None)
    opt_parser.add_option("--nice",
                          dest="nice",
                          action="store_true",
                          help="If running locally, run using nice",
                          default=False)
    opt_parser.add_option("--LSF",
                          dest="run_lsf",
                          action="store_true",
                          help="Run through LSF",
                          default=None)
    opt_parser.add_option("--print_cmd",
                          dest="print_cmd",
                          action="store_true",
                          help="Print the commands to run, but do not run",
                          default=False)

    (options, args) = opt_parser.parse_args()

    # validate the command line arguments
    opt_parser.check_required("-i")
    opt_parser.check_required("--out_dir")
    opt_parser.check_required("--txt_ref")

    out_dir = formatDir(options.out_dir)
    if not os.path.exists(out_dir):
        print "Output directory does not exist: %s" % out_dir
        opt_parser.print_help()
        sys.exit(1)

    num_processes = options.num_processes
    run_lsf = options.run_lsf
    nice = options.nice

    quantitate = options.quantitate

    print_cmd = options.print_cmd

    force = options.force
    check = options.check

    bsub_options = "#!/bin/tcsh\n#BSUB -q week\n#BSUB -R \"rusage[mem=8]\"\n"
    bsub_options += "#BSUB -P cgafolk\n"

    input = open(options.input)

    ctr = 0
    for line in input:
        line = formatLine(line)

        s_id, bam = line.split("\t")

        # Make subdir
        subdir = out_dir + "/" + s_id + "_cufflinks"
        if not os.path.exists(subdir):
            os.mkdir(subdir)

        # Check for existence
        file_is_present = False
        try:
            if os.path.getsize(subdir + "/transcripts.gtf") == 0:
                if check:
                    print "Need to run %s" % s_id
            else:
                file_is_present = True
        except:  # File doesn't exist
            if check:
                print "Need to run %s" % s_id

        if check:
            continue

        if not force:
            if file_is_present:
                continue

        ctr += 1

        cmd = "%s -o %s " % (CUFF_EXEC, subdir)

        if quantitate:
            cmd += "-G %s " % options.txt_ref
        else:
            cmd += "-g %s " % options.txt_ref
        cmd += "-u %s" % bam

        if num_processes:
            if nice:
                cmd = "nice " + cmd

            if print_cmd:
                print cmd
                continue

            if ctr % num_processes == 0:
                print cmd
                os.system(cmd)
            else:
                print cmd
                Popen(cmd, shell=True, executable=SHELL)
        else:

            if print_cmd:
                print cmd
                continue

            tmp_file = "%s/tmp_cuff_%s.txt" % (os.curdir, s_id)
            runLSF(cmd,
                   "%s.cufflinks.bsub.out" % s_id,
                   "cuff_%s" % s_id,
                   "week",
                   tmp_file_name=tmp_file)

    input.close()

    sys.exit(0)

Exemple #5

0

Afficher le fichier

def main():

    opt_parser = OptionParser()

    # Add Options. Required options should have default=None
    opt_parser.add_option("-d",
                          dest="root_dir",
                          type="string",
                          help="""Root directory that contains subdirectoires
                                  with output from getASEventReadCounts""",
                          default=None)
    opt_parser.add_option("-i",
                          dest="input_dir",
                          type="string",
                          help="""Directory containing original input files to
                                  getASEventReadCounts.py. This is used to
                                  obtain the chromosome information.""",
                          default=None)
    opt_parser.add_option("-s",
                          dest="samples",
                          type="string",
                          help="""Comma separated list of the samples that will
                                  be used.  The order which they are given is
                                  the order in the output of the file.""",
                          default=None)
    opt_parser.add_option("--lengthNorm",
                          dest="lengthNorm",
                          action="store_true",
                          help="""Flag to indicate length normalization was
                                  done on the counts. Used for splitting the IR
                                  counts back into left and right counts""",
                          default=False)
    opt_parser.add_option(
        "--num_processes",
        dest="num_processes",
        type="int",
        help="""Will run each chromosome in batches using this
                                  number of parallel processes. DEF=%d""" %
        DEF_NUM_PROCESSES,
        default=DEF_NUM_PROCESSES)
    opt_parser.add_option("--run_LSF",
                          dest="run_lsf",
                          action="store_true",
                          help="Will run everything through LSF",
                          default=False)

    (options, args) = opt_parser.parse_args()

    # validate the command line arguments
    opt_parser.check_required("-d")
    opt_parser.check_required("-i")
    opt_parser.check_required("-s")

    root_dir = formatDir(options.root_dir)
    # Change to the root directory to make sure output files are put here
    os.chdir(root_dir)

    input_dir = formatDir(options.input_dir)

    samples = options.samples

    lengthNorm = options.lengthNorm

    num_processes = options.num_processes
    run_lsf = options.run_lsf

    chr_list = getChr(input_dir)

    ctr = 0
    for this_chr in chr_list:
        ctr += 1

        cmd = "python %s " % SCRIPT
        cmd += "-d %s " % root_dir
        cmd += "-o tmp_clusterASExons2_%s.out " % this_chr
        cmd += "--left_intron tmp_clusterASExons2_%s_left_intron.out " % this_chr
        cmd += "--right_intron tmp_clusterASExons2_%s_right_intron.out " % this_chr
        cmd += "-s %s " % samples

        if lengthNorm:
            cmd += "--lengthNorm "

        cmd += "--which_chr %s" % this_chr

        if run_lsf:
            runLSF(cmd, "%s.clusterASExons2.bsub.out" % this_chr,
                   samples.replace(",", "-") + "_" + this_chr, "hour")
            continue

        if ctr % num_processes == 0:
            os.system(cmd)
        else:
            print(cmd)
            Popen(cmd, shell=True, executable=SHELL)

    sys.exit(0)

Exemple #6

0

Afficher le fichier

Fichier : run_clusterASExons2_by_chr.py Projet : Tmacme/juncBASE

def main():
	
    opt_parser = OptionParser()
   
    # Add Options. Required options should have default=None
    opt_parser.add_option("-d",
                          dest="root_dir",
                          type="string",
                          help="""Root directory that contains subdirectoires
                                  with output from getASEventReadCounts""",
                          default=None)
    opt_parser.add_option("-i",
                          dest="input_dir",
                          type="string",
                          help="""Directory containing original input files to
                                  getASEventReadCounts.py. This is used to
                                  obtain the chromosome information.""",
                          default=None)
    opt_parser.add_option("-s",
                          dest="samples",
                          type="string",
                          help="""Comma separated list of the samples that will
                                  be used.  The order which they are given is
                                  the order in the output of the file.""",
                          default=None)
    opt_parser.add_option("--lengthNorm",
                          dest="lengthNorm",
                          action="store_true",
                          help="""Flag to indicate length normalization was
                                  done on the counts. Used for splitting the IR
                                  counts back into left and right counts""",
                          default=False)
    opt_parser.add_option("--num_processes",
                          dest="num_processes",
                          type="int",
                          help="""Will run each chromosome in batches using this
                                  number of parallel processes. DEF=%d""" % DEF_NUM_PROCESSES,
                          default=DEF_NUM_PROCESSES)
    opt_parser.add_option("--run_LSF",
                          dest="run_lsf",
                          action="store_true",
                          help="Will run everything through LSF",
                          default=False)


    (options, args) = opt_parser.parse_args()
	
    # validate the command line arguments
    opt_parser.check_required("-d")
    opt_parser.check_required("-i")
    opt_parser.check_required("-s")

    root_dir = formatDir(options.root_dir)
    # Change to the root directory to make sure output files are put here
    os.chdir(root_dir)

    input_dir = formatDir(options.input_dir)

    samples = options.samples

    lengthNorm = options.lengthNorm

    num_processes = options.num_processes
    run_lsf = options.run_lsf

    chr_list = getChr(input_dir)

    ctr = 0
    for this_chr in chr_list:
        ctr += 1

        cmd = "python %s " % SCRIPT
        cmd += "-d %s " % root_dir
        cmd += "-o tmp_clusterASExons2_%s.out " % this_chr
        cmd += "--left_intron tmp_clusterASExons2_%s_left_intron.out " % this_chr
        cmd += "--right_intron tmp_clusterASExons2_%s_right_intron.out " % this_chr
        cmd += "-s %s " % samples

        if lengthNorm:
            cmd += "--lengthNorm "

        cmd += "--which_chr %s" % this_chr

        if run_lsf:
            runLSF(cmd,
                   "%s.clusterASExons2.bsub.out" % this_chr,
                   samples.replace(",","-") + "_" + this_chr,
                   "hour")
            continue

        if ctr % num_processes == 0:
            os.system(cmd)
        else:
            print cmd
            Popen(cmd, shell=True, executable=SHELL)

    sys.exit(0)