Beispiel #1
0
def SetupOptionsParser():  # {
    description_string = (
        "Compares a list of fusions predicted by Barnacle "
        "and a list predicted by TopHat-Fusion and returns which predictions "
        "are common to both, or unique to one or the other."
    )
    args = ["LIB", "BARNACLE_FILE", "TOPHAT_FUSION_FILES"]
    usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
    parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string)
    parser.num_args = len(args)
    parser.add_option(
        "--coord-buffer",
        type="int",
        metavar="N",
        help="Consider events identical if their coordinates " "are within Nbp of each other. [default: %default]",
    )
    parser.add_option(
        "-f",
        "--force",
        action="store_true",
        help="Force filtering to take place, even if the output " "directory already exists.",
    )
    parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.")
    parser.set_defaults(coord_buffer=1000, force=False, debug=False)
    return parser
Beispiel #2
0
def SetupOptionsParser(): #{
  description_string = ("") #TODO
  args = [ "LIB", "BARNACLE_FILE", ] #TODO
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #3
0
def SetupOptionsParser(): #{
  description_string = ("Use Primer3 and gfPcr to create qPCR and sequence "
    "validation primers for the Barnacle events in the given file.")
  args = [ "LIB", "BARNACLE_FILE", "ALIGNMENTS_FILES", "CONTIG_SEQS_FILE",
    "TRANSCRIPT_SEQS_FILE", "TRANSCRIPTOME_HOSTNAME", "TRANSCRIPTOME_PORT",
    "GENOME_HOSTNAME", "GENOME_PORT" ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--qpcr-buffer",
                    type="int", metavar="N",
                    help="Ensure that qPCR primers are at least Nbp from the "
                      "breakpoint. [default: %default]")
  parser.add_option("--seq-buffer",
                    type="int", metavar="N",
                    help="Ensure that sequencing primers are at least Nbp "
                      "from the breakpoint. [default: %default]")
  parser.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(qpcr_buffer=20,
                      seq_buffer=70,
                      force=False,
                      debug=False)
  return parser
Beispiel #4
0
def SetupOptionsParser(): #{
  description_string = ("Go through the group members in the input file and "
    "for each member check how many of its breakpoint coordinates match up "
    "with exon boundaries.")
  args = [ "LIB", "BARNACLE_FILE", "GENES_FILE", ]
  usage_string = ("%prog " + " ".join(args) + " [ OPTIONS ]")
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-b", "--buffer-size",
                    type="int", metavar="N",
                    help="Count the breakpoint coordinate as matching an exon "
                         "coordinate if it is within Nbp to either side of "
                         "it. [default:%default]")
  # add option for additional annotations files?
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(buffer_size=4,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #5
0
def SetupOptionsParser(): #{
  description_string = ("Calculates read-to-contig support for groups in "
    "the input file")
  args = [ "LIB", "BARNACLE_FILE", "READ_TO_CONTIG_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--min-overlap",
                    type="int", metavar="N",
                    help="Require that reads overlap breakpoints by at "
                         "least N bp. [default: %default]")
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.set_defaults(min_overlap=5,
                      dpt=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #6
0
def SetupOptionsParser(): #{
  description_string = ("Counts number of predictions, number of recurrent "
    "predictions, number of genes with predictions, and number of genes with "
    "recurrent predictions for each library in the input list.")
  args = [ "LIBRARY_LIST", "LIBS_DIR", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--write-events",
                    action="store_true", dest="write_events",
                    help="Write events to recurrent or library-specific "
                      "output files. [default]")
  parser.add_option("--count-only",
                    action="store_false", dest="write_events",
                    help="Do not write events to recurrent or "
                      "library-specific output files.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(write_events=True,
                      force=False,
                      debug=False)
  return parser
Beispiel #7
0
def SetupOptionsParser(): #{
  description_string = ("Calculates pair-to-genome support for groups in "
    "the input file")
  args = [ "BARNACLE_FILE", "PAIR_TO_GENOME_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--read-length",
                    type="int", metavar="N",
                    help="Each read is N bp long. [ default: %default ]")
  parser.add_option("--frag-len",
                    type="int",
                    help="The expected length of the mate-pair fragments "
                         "is N bp. [ default: %default ]")
  parser.add_option("--frag-fract",
                    type="float", metavar="F",
                    help="When looking for read-pairs spanning the genomic "
                         "event region, only count a pair as being "
                         "significantly different from the expected length if "
                         "the fractional difference between the observed and "
                         "expected fragment lengths is more than F. "
                         "[ default: %default ]")
  parser.add_option("--min-mapq",
                    type="int", metavar="N",
                    help="When looking for read-pairs spanning the genomic "
                         "event region, filter pairs with mapping quality "
                         "less than N. [ default: %default ]")
  parser.add_option("--max-sam-retries",
                    type="int", metavar="N",
                    help="If there is an error running a samtools view "
                         "command, retry the command a maximum of N times. "
                         "[ default: %default ]")
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.set_defaults(read_length = 50,
                      frag_len    = 200,
                      frag_fract  = 0.10,
                      min_mapq    = 10,
                      max_sam_retries = 5,
                      dpt=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #8
0
def SetupOptionsParser(): #{
  description_string = ("Calculates maximum total coverage of exons involved "
      "in events, then compares that to event coverage to estimate relative "
      "coverage of event transcripts.")
  args = [ "LIB", "BARNACLE_FILE",  "P2G_FILE" ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-r", "--read-length",
    type="int",
    help="The length of the reads in the paired-read to genome file. "
      "[default=%default].")
  parser.add_option("--min-overlap",
    type="int", metavar="N",
    help="Require that reads overlap positions by at least N bp. "
      "[default: %default]")
  parser.add_option("--allow-mismatches",
    action="store_false", dest="require_perfect",
    help="Allow gaps and mismatches in read-to-genome alignments. [default]")
  parser.add_option("--require-perfect",
    action="store_true", dest="require_perfect",
    help="Only count reads with perfect read-to-genome alignments.")
  parser.add_option("-e", "--max-edit-distance",
    type="int", metavar="N",
    help="Only count reads with an edit distance not greater than N. "
      "[default: %default]")
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(read_length=75,
                      min_overlap=5,
                      max_edit_distance=1,
                      dpt=False,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
def SetupOptionsParser():  # {
    description_string = (
        "Extract exon coordinates from a gene annotation "
        "file and create a file holding the exon coordinates formatted for the "
        "overlap code, with non-coding genes and UTRs marked."
    )
    args = ["INPUT_FILE", "OUTPUT_DIR"]
    usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
    parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string)
    parser.num_args = len(args)
    parser.add_option(
        "--annotations-type",
        help="The type of annotations file being used for the "
        "gene names, e.g.: %s. " % ", ".join(ANNOT_TYPES) + "The code will try to automatically determine the "
        "annotations type if this option is not used.",
    )
    parser.add_option(
        "--filter-chromosomes",
        action="store_true",
        help="Only write out exons for genes on standard "
        "chromosomes: chr<I>, chrX, chrY, chrM (where "
        "<I> is any integer). [default]",
    )
    parser.add_option(
        "--all-chromosomes",
        action="store_false",
        dest="filter_chromosomes",
        help="Write out exons for genes on all chromosomes.",
    )
    parser.add_option(
        "--no-introns", action="store_false", dest="include_introns", help="Write only exon coordinates, not introns."
    )
    parser.add_option(
        "--introns",
        action="store_true",
        dest="include_introns",
        help="Write both exon and intron coordinates. [default]",
    )
    parser.add_option(
        "-f",
        "--force",
        action="store_true",
        help="Force filtering to take place, overwriting the exon " "coordinates file if it already exists.",
    )
    parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.")
    parser.add_option(
        "--extreme-debug",
        action="store_true",
        dest="extreme_debug",
        help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.",
    )
    parser.set_defaults(filter_chromosomes=True, include_introns=True, force=False, debug=False, extreme_debug=False)
    return parser
Beispiel #10
0
def SetupOptionsParser(): #{
  description_string = ("Makes a backup of the input file, and creates a new "
    "file with any unparsable events removed. Unparsable events are saved in "
    "their own file.")
  args = [ "LIB", "BARNACLE_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(
                      debug=False)
  return parser
Beispiel #11
0
def SetupOptionsParser(): #{
  description_string = ("") #TODO
  args = [ "LIB", "BARNACLE_FILE", "GENES_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--event-buffer",
                    type="int", metavar="N",
                    help="For split events, check whether the breakpoint "
                         "coordinates are within N bases of an exon. "
                         "[default: %default]")
  parser.add_option("--use-existing-coords",
                    action="store_true", dest="use_existing_group_coords",
                    help="If a breakpoint coordinates file already exists, "
                         "just use it rather than generating a new one.")
  parser.add_option("--keep-coords-file",
                    action="store_true",
                    help="After exon-overlap processing, do not remove the "
                         "alignment-coordinates file that was produced.")
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.set_defaults(event_buffer=5,
                      use_existing_group_coords=False,
                      keep_coords_file=False,
                      dpt=False,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #12
0
def SetupOptionsParser():
  description_string = ("Runs the check ACF status script on all libraries "
    "in the input file.")
  args = [ "LIB_LIST_FILE", "DIR_HEAD", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(
                      debug=False)
  return parser
Beispiel #13
0
def SetupOptionsParser(): #{
  description_string = "Submit jobs to cluster"
  args = [ "JOB_NAME", "JOB_FILE", "CLUSTER_HEAD", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-s", "--single",
                    action="store_true",
                    help="JOB_FILE is a single job [default].")
  parser.add_option("-m", "--multiple",
                    action="store_false", dest="single",
                    help="JOB_FILE is a list of jobs.")
  parser.add_option("--mem", "--memory",
                    help="The memory requirement of the jobs "
                         "[default:\"%default\"].")
  parser.add_option("--hostname",
                    help="The hostname(s) to submit to "
                         "[default: read from barnacle.cfg].")
                         #"[default:\"%default\"].")
  parser.add_option("--queue",
                    help="The queue(s) to submit to [default: read from "
                      "barnacle.cfg].")
                    #help="The queue(s) to submit to [default:\"%default\"].")
  parser.add_option("-w", "--wall-time",
                    metavar="H:MM:SS",
                    help="The maximum time to spend on the job.")
  parser.add_option("--email",
                    help="E-mail status updates to the given email address")
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(single=True,
                      mem="1G",
                      dpt=False,
                      debug=False)
  return parser
Beispiel #14
0
def SetupOptionsParser(): #{
  description_string = ("Merges split read-to-contig support results file "
    "and integrates it into the chimeric transcript results")
  args = [ "LIB", "BARNACLE_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--r2c-dir",
                    dest = "jobs_dir", metavar="DIR",
                    help="Use this directory for the read-to-contig support "
                         "results, rather than the default.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(
                      debug=False)
  return parser
Beispiel #15
0
def SetupOptionsParser(): #{
  description_string = ("Use Primer3 and gfPcr to create qPCR and sequence "
    "validation primers for the Barnacle events in the given file.")
  args = [ "LIB", "BARNACLE_FILE", "ALIGNMENTS_FILES", "CONTIG_SEQS_FILE",
    "TRANSCRIPT_SEQS_FILE" ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(force=False,
                      debug=False)
  return parser
Beispiel #16
0
def SetupOptionsParser(): #{
  description_string = ("Merges split pair-to-genome support results "
    "and integrates them into the Barnacle results")
  args = [ "LIB", "BARNACLE_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(
                      dpt=False,
                      debug=False)
  return parser
Beispiel #17
0
def SetupOptionsParser(): #{
  description_string = ("Setup the Barnacle tool-suite: compile Gap "
    "Realigner binaries")
  args = [ "CLUSTER_HEAD", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--no-annots",
                    action="store_false", dest="setup_annots",
                    help="Skipping setting up annotation files.")
  parser.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(setup_annots=True,
                      force=False,
                      debug=False)
  return parser
def SetupOptionsParser(): #{
  description_string = ("Examines contig alignments to find contigs "
    "resulting in interesting split or gapped alignments potentially "
    "representing chimeric transcripts.")
  args = [ "ALIGNMENT_FILE", "OUTPUT_DIR", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--split-candidates",
                    action="store_true", dest="check_split",
                    help="Check alignments for contigs matching split "
                         "candidate alignment signatures. [default]")
  parser.add_option("--no-split-candidates",
                    action="store_false", dest="check_split",
                    help="Do not check alignments for contigs matching split "
                         "candidate alignment signatures.")
  parser.add_option("--gap-candidates",
                    action="store_true", dest="check_gap",
                    help="Check alignments for contigs matching gapped "
                         "candidate alignment signatures. [default]")
  parser.add_option("--no-gap-candidates",
                    action="store_false", dest="check_gap",
                    help="Do not check alignments for contigs matching "
                         "gapped candidate alignment signatures.")
  alignment_parsing_grp = OptionGroup(parser, "Alignment Parsing Options")
  alignment_parsing_grp.add_option("-n", "--num-aligns",
                    type='int', dest="num_aligns", metavar="N",
                    help="For each contig extract from the alignments file "
                         "the N highest scoring alignments that pass the PID "
                         "filtering criteria used. [default: %default]")
  alignment_parsing_grp.add_option("-i", "--min-identity",
                    type='float', dest="min_identity", metavar="PID",
                    help="Extract from the alignments file only alignments "
                         "with a percent identity of at least PID. "
                         "[default: %default]")
  alignment_parsing_grp.add_option("--genes",
                    action="store_true", dest="add_gene_annotation",
                    help="Record the gene features overlapped by each "
                         "alignment [default]")
  alignment_parsing_grp.add_option("--no-genes",
                    action="store_false", dest="add_gene_annotation",
                    help="Do not record the gene features overlapped by any "
                         "alignments")
  alignment_parsing_grp.add_option("--gene-coords",
                    metavar="FILE", dest="gene_coords_path",
                    help="Determine whether the alignments overlap any gene "
                         "features using the coordinates in FILE")
  alignment_parsing_grp.add_option("--transcript-selection-buffer",
                    type="int", metavar="N",
                    help="When selecting which transcript(s) a contig "
                         "represents, select all transcripts that overlap "
                         "the alignments by amounts at most Nbp less than "
                         "the largest amount of overlap. [default: %default]")
  alignment_parsing_grp.add_option("--keep-coords-file",
                    action="store_true",
                    help="After gene feature-overlap processing, do not remove "
                         "the alignment-coordinates file that was produced.")
  parser.add_option_group(alignment_parsing_grp)
  alignment_grouping_grp = OptionGroup(parser, "Alignment Grouping Options")
  alignment_grouping_grp.add_option("--single-align",
                    type='float', dest="longest_single_align",
                    metavar="FRACTION",
                    help="If any single alignment for a contig uses at least "
                         "FRACTION of the total contig length, do not look "
                         "for any split or gapped alignments for that contig. "
                         "[default: %default]")
  alignment_grouping_grp.add_option("--merge-overlap",
                    type='float', dest="min_merge_overlap",
                    metavar="FRACTION",
                    help="If two alignments represent portions of the contig "
                         "that overlap by at least FRACTION of the length of "
                         "the shorter alignment, group the alignments "
                         "together. [default: %default]")
  parser.add_option_group(alignment_grouping_grp)
  alignment_selecting_grp = OptionGroup(parser, "Alignment Selection Options")
  alignment_selecting_grp.add_option("--smart-chooser",
                    action="store_false", dest="use_quick_chooser",
                    help="Use the smarter, but slower, choose best "
                         "alignments method [default]")
  alignment_selecting_grp.add_option("--quick-chooser",
                    action="store_true", dest="use_quick_chooser",
                    help="Use the simple, but faster, choose best alignments "
                         "method")
  alignment_selecting_grp.add_option("--maintain-pared-groups",
                    action="store_true", dest="maintain_pared_groups",
                    help="Use paring info to only add an alignment to a "
                         "group if the alignment will not later be pared; "
                         "only relevant when using smart chooser [default]")
  alignment_selecting_grp.add_option("--pare-after-grouping",
                    action="store_false", dest="maintain_pared_groups",
                    help="Do not pare alignments until all alignments for "
                         "the current contig have been grouped; only "
                         "relevant when using smart chooser")
  alignment_selecting_grp.add_option("--mm-min-score",
                    type='float', metavar="F", dest="mm_min_score_fract",
                    help="When deciding whether an alignment group "
                         "multi-maps, only use alignments with scores at "
                         "least F of the maximum score in the group "
                         "[default: %default]")
  alignment_selecting_grp.add_option("--mm-max-pid-diff",
                    type='float', metavar="F", dest="mm_max_pid_diff",
                    help="When deciding whether an alignment group "
                         "multi-maps, only use alignments with percent "
                         "identities at most F less than the maximum percent "
                         "identify in the group; only relevant when using "
                         "smart chooser [default: %default]")
  alignment_selecting_grp.add_option("--mm-max-pid-diff-gap",
                    type='float', metavar="F", dest="mm_max_pid_diff_gap",
                    help="When deciding whether the alignment group used to "
                         "look for gap candidates multi-maps, only use "
                         "alignments with percent identities at most F less "
                         "than the maximum percent identify in the group; "
                         "only relevant when using smart chooser [default: "
                         "%default]")
  alignment_selecting_grp.add_option("--min-score",
                    type='float', metavar="F", dest="min_score_fract",
                    help="When paring alignment groups, only keep alignments "
                         "with scores at least F of the maximum score "
                         "in the group [default: %default]")
  alignment_selecting_grp.add_option("--max-pid-diff",
                    type='float', metavar="F", dest="max_pid_diff",
                    help="When paring alignment groups, only keep alignments "
                         "with percent identities at most F less than the "
                         "maximum percent identify in the group; only "
                         "relevant when using smart chooser [default: "
                         "%default]")
  alignment_selecting_grp.add_option("--prefer-exons",
                    action="store_true",
                    help="When paring alignment groups, if any alignments in "
                         "the group overlap exons, discard all alignments "
                         "in the group that do not overlap any exon")
  alignment_selecting_grp.add_option("--prefer-spliced",
                    action="store_true",
                    help="When paring alignment groups, if any alignments in "
                         "the group are spliced, discard all unspliced "
                         "alignments in the group; only relevant when using "
                         "smart chooser")
  parser.add_option_group(alignment_selecting_grp)
  split_candidate_grp = OptionGroup(parser, "Split Candidate Options")
  split_candidate_grp.add_option("--ctg-rep",
                    type='float', dest="min_ctg_represented",
                    metavar="FRACTION",
                    help="Only report split alignments when the total amount "
                         "of the contig involved in the two alignments are at "
                         "least FRACTION of the total contig length. "
                         "[default: %default]")
  split_candidate_grp.add_option("--min-end-dup-fract",
                    type='float', dest="min_end_dup_fract",
                    metavar="F",
                    help="When labeling alignment topologies, use the "
                         "end-duplication label if the target regions "
                         "overlap by at least F of the smaller target "
                         "region [default: %default]")
  parser.add_option_group(split_candidate_grp)
  gap_realignment_grp = OptionGroup(parser, "Gap Realignment Options")
  gap_realignment_grp.add_option("--ctg-file",
                    dest="ctg_seq_path", metavar="FILE",
                    help="Read in contig sequences from FILE. Otherwise, the "
                         "contig sequences file path is inferred from the "
                         "alignments path. This option is ignored without "
                         "the --use-gap option.")
  gap_realignment_grp.add_option("--gap-realigner",
                    metavar="FILE",
                    help="Use FILE as the program to realign gap sequence of "
                         "gapped alignments")
  gap_realignment_grp.add_option("--gap-config",
                    metavar="FILE",
                    help="FILE is the configuration file for the gap "
                         "realigner.")
  gap_realignment_grp.add_option("--gap-min-size",
                    type='int', dest="min_gap_size", metavar="N",
                    help="When processing alignments, only realign gaps of "
                         "at least N bases [default: %default]")
  gap_realignment_grp.add_option("--gap-check-min-pid",
                    type='float', dest="min_gap_check_pid", metavar="F",
                    help="When processing alignments, only realign gaps "
                         "when the initial alignment has a percent identity "
                         "of at least F. [default: %default]")
  gap_realignment_grp.add_option("--gap-check-min-len",
                    type='float', dest="min_gap_check_len_fract", metavar="F",
                    help="When processing alignments, only realign gaps "
                         "when the initial alignment involves at least F of "
                         "the total contig length [default: %default]")
  gap_realignment_grp.add_option("--gap-check-min-score",
                    type='float', dest="min_gap_check_score_fract",
                    metavar="F",
                    help="When processing alignments, only realign gaps "
                         "when the initial alignment score is at least  F "
                         "of the total contig length [default: %default]")
  gap_realignment_grp.add_option("--gap-max-num-aligns",
                    type="int", metavar="N",
                    help="If a contig has more than N \"good\" alignments, "
                         "do not attempt gap realignment for any of them. "
                         "[default: %default]")
  gap_realignment_grp.add_option("--gap-min-identity",
                   type='float', dest="min_gap_pid", metavar="PID",
                   help="When realigning gaps, only report candidates when "
                        "the gap sequence aligns back to the rest of the "
                        "contig with a percent identity of at least PID "
                        "[default: %default]")
  gap_realignment_grp.add_option("--gap-min-fraction",
                   type='float', dest="min_gap_fract", metavar="F",
                   help="When realigning gaps, only report candidates when "
                        "at least F of the bases in the gap sequence align "
                        "back to the rest of the contig [default: %default]")
  gap_realignment_grp.add_option("--gap-max-len",
                  type="int", metavar="N",
                  help="Only run the gap-realigner on contigs shorter "
                    "than Nbp long. [default: %default]")
  gap_realignment_grp.add_option("--gap-debug",
                  action="store_true",
                  help="Use the debug option when calling the gap "
                       "realignment tool.")
  parser.add_option_group(gap_realignment_grp)
  misc_grp = OptionGroup(parser, "Miscellaneous Options")
  misc_grp.add_option("--no-mito",
                    action="store_true", dest="no_mito",
                    help="Do not report results involving mitochondrial DNA. "
                         "[default]")
  misc_grp.add_option("--allow-mito",
                    action="store_false", dest="no_mito",
                    help="Allow results involving mitochondrial DNA.")
  misc_grp.add_option("--output-psl",
                    action="store_true",
                    help="Output psl alignment lines for events found "
                         "[default]")
  misc_grp.add_option("--no-output-psl",
                    action="store_false", dest="output_psl",
                    help="Do not output psl alignment lines for events found")
  misc_grp.add_option("--contig-set",
                    help="Include the contig set name in the output file "
                         "names (e.g. main, adj, etc.)")
  misc_grp.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  misc_grp.add_option("-a", "--append",
                    action="store_true", dest="append",
                    help="Append to the output file, rather than "
                         "overwriting it.")
  misc_grp.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  misc_grp.add_option("-d", "--debug",
                    action="store_true", dest="debug",
                    help="Print debug information while the program runs.")
  misc_grp.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_grp)
  parser.set_defaults(check_split=True,
                      check_gap=True,
                      num_aligns=500,
                      min_identity=40.0,
                      add_gene_annotation=True,
                      transcript_selection_buffer=10,
                      keep_coords_file=False,
                      longest_single_align=0.999,
                      min_merge_overlap=0.8,
                      use_quick_chooser=False,
                      maintain_pared_groups=True,
                      mm_min_score_fract=0.8,
                      mm_max_pid_diff=1.0,
                      mm_max_pid_diff_gap=5.0,
                      min_score_fract=0.85,
                      max_pid_diff=0.5,
                      prefer_exons=False,
                      prefer_spliced=False,
                      min_ctg_represented=0.85,
                      min_end_dup_fract=0.80,
                      min_gap_size=4,
                      min_gap_check_pid=40.0,
                      min_gap_check_len_fract=0.4,
                      min_gap_check_score_fract=0.4,
                      gap_max_num_aligns=3,
                      min_gap_pid=0.95,
                      min_gap_fract=0.3,
                      gap_max_len=50000,
                      gap_debug=False,
                      no_mito=True,
                      output_psl=True,
                      dpt=False,
                      append=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #19
0
def SetupOptionsParser(): #{
  description_string = ("Create and submit parallel jobs for calculating "
    "hybrid read-support")
  args = [ "LIB", "BARNACLE_FILE", "PAIR_TO_GENOME_FILE",
    "READ_TO_CONTIG_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--min-overlap",
                    type="int", metavar="N",
                    help="Require that reads overlap breakpoints by at "
                         "least N bp. [default: %default]")
  parser.add_option("--records-per-job",
                    type="int", metavar="N",
                    help="Process N records in each job. [default=%default]")
  cluster_group = OptionGroup(parser, "Cluster Options")
  cluster_group.add_option("-c", "--cluster-head",
                    dest="cluster_head",
                    help="Cluster head node to submit to.")
  cluster_group.add_option("--mem", "--memory",
                    help="The memory requirement of the jobs "
                         "[default:\"%default\"].")
  cluster_group.add_option("--hostname",
                    help="The hostname(s) to submit to "
                         "[default:\"%default\"].")
  cluster_group.add_option("--queue",
                    help="The queue(s) to submit to [default:\"%default\"].")
  cluster_group.add_option("-w", "--wall-time",
                    metavar="H:MM:SS",
                    help="The maximum time to spend on the job.")
  cluster_group.add_option("--email",
                    help="E-mail status updates on submitted jobs to the "
                         "given email address")
  parser.add_option_group(cluster_group)
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(min_overlap=5,
                      records_per_job=1000,
                      mem="3G",
                      hostname="q*",
                      queue="all.q",
                      dpt=False,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #20
0
def SetupOptionsParser(): #{
  description_string = ("Applies the given filters to the candidate contig "
    "groups that Barnacle identified.")
  args = [ "LIB", "BARNACLE_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--read-to-contig-with-weak",
                    type="int", dest="read_to_ctg", metavar="N",
                    help="Fail groups with no contig with more than N "
                         "supporting read-to-contig alignments, unless they "
                         "pass the strong-only read-to-contig filter. [default: "
                         "%default]")
  parser.add_option("--read-to-contig",
                    type="int", dest="read_to_ctg_u", metavar="N",
                    help="Fail groups with no contig with more than N "
                         "strongly-supporting read-to-contig alignments, unless "
                         "they pass the with-weak read-to-contig filter. "
                         "[default: %default]")
  parser.add_option("--read-to-contig-weak",
                    action="store_true", dest="req_u_read_to_ctg",
                    help="Do not fail groups failing the unique "
                         "read-to-contig filter, unless they also fail the "
                         "total read-to-contig filter. [default]")
  parser.add_option("--read-to-contig-strong",
                    action="store_true", dest="req_u_read_to_ctg",
                    help="Fail groups failing the unique read-to-contig "
                         "filter, even if they would pass the total "
                         "read-to-contig filter.")
  parser.add_option("--pair-to-genome",
                    type="int", dest="pair_to_gen", metavar="N",
                    help="Fail groups with fewer than N supporting "
                         "read-pair-to-genome alignments. [default: %default]")
  parser.add_option("--filtered-pair-to-genome",
                    type="int", dest="pair_to_gen_f", metavar="N",
                    help="Fail groups with fewer than N mapq-filtered "
                         "supporting read-pair-to-genome alignments. "
                         "[default: %default]")
  parser.add_option("--ignore-intronic",
                    action="store_true", dest="ignore_intron",
                    help="Do not count read-pairs mapping to inferred "
                         "intronic locations when calculating "
                         "read-pair-to-genome support. [default]")
  parser.add_option("--count-intronic",
                    action="store_false", dest="ignore_intron",
                    help="Count read-pairs mapping to inferred intronic "
                         "locations when calculating read-pair-to-genome "
                         "support.")
  parser.add_option("--no-struct-RNA",
                    action="store_true", dest="no_rna",
                    help="Fail groups overlapping small structural RNA "
                         "regions. [default]")
  parser.add_option("--allow-struct-RNA",
                    action="store_false", dest="no_rna",
                    help="Do not fail groups overlapping small structural "
                         "RNA regions.")
  parser.add_option("--no-mitochondria",
                    action="store_true", dest="no_mito",
                    help="Fail groups involving mitochondrial DNA. "
                         "[default]")
  parser.add_option("--allow-mitochondria",
                    action="store_false", dest="no_mito",
                    help="Do not fail groups involving mitochondrial DNA.")
  parser.add_option("--max-num-groups",
                    type="int", dest="max_num_groups", metavar="N",
                    help="Fail groups with no contig appearing in fewer than "
                         "N groups. Set this to zero to disable this filter. "
                         "[default: %default]")
  parser.add_option("--allow-single-aligner",
                    action="store_false", dest="reguire_multiple_aligns",
                    help="Do not fail groups based on the number of aligners "
                         "they were found with. [default]")
  parser.add_option("--require-multiple-aligners",
                    action="store_true", dest="reguire_multiple_aligns",
                    help="Fail groups that were not found with multiple "
                         "aligners.")
  parser.add_option("--min-identity",
                    type="float", dest="min_pid", metavar="F",
                    help="Fail groups with no contig that has greater than "
                         "F% percent identity for both alignments. [default: "
                         "%default]")
  parser.add_option("--length-sensitive-pid",
                    action="store_true", dest="adjust_pid",
                    help="Adjust the minimum percent identity to use "
                         "maximum number of mismatches for short alignment "
                         "blocks. [default]")
  parser.add_option("--absolute-pid",
                    action="store_false", dest="adjust_pid",
                    help="Use the minimum percent identity provided without "
                         "adjustment from maximum number of mismatches for "
                         "short alignment blocks.")
  parser.add_option("--max-mismatches",
                    type="int", metavar="N",
                    help="If --length-sensitive-pid is used, use the "
                         "minimum of the given minimum percent identity "
                         "value and the calculated percent identity of an "
                         "alignment with N mismatches. [default=%default]")
  parser.add_option("--no-homopolymers",
                    action="store_true", dest="no_runs",
                    help="Fail gapped events involving runs of a single "
                         "base. [default]")
  parser.add_option("--allow-homopolymers",
                    action="store_false", dest="no_runs",
                    help="Do not fail gapped events involving runs of a "
                         "single base.")
  parser.add_option("--soft-runs",
                    action="store_true", dest="soft_runs",
                    help="Allow a single divergent base when determining "
                         "whether a sequence will be considered a homopolymer "
                         "run. [default]")
  parser.add_option("--hard-runs",
                    action="store_false", dest="soft_runs",
                    help="Strictly require all bases to be identical for a "
                         "sequence to be considered a homopolymer run.")
  parser.add_option("--max-ctg-overlap",
                    type="int", dest="ctg_olap", metavar="N",
                    help="Fail paired-alignment candidates when the "
                         "contig coordinates of the alignments overlap by "
                         "more than Nbp. It makes sense for this value to be "
                         "approximately one read-length. [default: %default]")
  parser.add_option("--min-ctg-rep",
                    type="float", dest="ctg_rep", metavar="F",
                    help="Fail groups with no contig with the fraction of "
                         "its length represented by the alignments being at "
                         "least F. [default: %default]")
  parser.add_option("--no-multi-mapping",
                    action="store_true", dest="filter_multi_maps",
                    help="Fail groups with alignments that are flagged "
                         "as multi-mapping. [default]")
  parser.add_option("--allow-multi-mapping",
                    action="store_false", dest="filter_multi_maps",
                    help="Do not fail groups with alignments that are "
                         "flagged as multi-mapping.")
  parser.add_option("--no-repeats",
                    action="store_true", dest="filter_repeats",
                    help="Fail groups that are flagged as "
                         "overlapping repeat sequence.")
  parser.add_option("--allow-repeats",
                    action="store_false", dest="filter_repeats",
                    help="Do not fail groups that are flagged as "
                         "overlapping repeat sequence. [default]")
  parser.add_option("--no-polyA-events",
                    action="store_true", dest="filter_polyA",
                    help="Fail groups that are probably polyA "
                         "tails: single-base runs at the very "
                         "beginning or end of a contig. [default]")
  parser.add_option("--allow-polyA-events",
                    action="store_false", dest="filter_polyA",
                    help="Do not fail groups that are probably "
                         "polyA tails: single-base runs at the very "
                         "beginning or end of a contig.")
  parser.add_option("--ignore-topologies",
                    metavar="TOPOLOGIES",
                    help="Ignore groups classified as any of the topologies "
                         "in the comma-separated list TOPOLOGIES.")
  parser.add_option("--sort-by-r2cu",
                    action="store_true",
                    help="Output the groups sorted by their unique "
                         "read-to-contig coverage in descending order.")
  parser.add_option("-p", "--pretty",
                    action="store_true",
                    help="Print output in a more readable, but less easily "
                         "searched/parsed format.")
  parser.add_option("--data-check",
                    action="store_true",
                    help="Perform some simple sanity checks on the groups to "
                         "ensure that the upstream code is not having any "
                         "obvious problems. [default]")
  parser.add_option("--no-data-check",
                    action="store_false", dest="data_check",
                    help="Presume that the input file is well-formatted.")
  parser.add_option("--no-split-out",
                    action="store_false", dest="split_out",
                    help="Do not output groups of different topologies to "
                         "separate output files (create only pass and fail "
                         "output files). [default]")
  parser.add_option("--split-out",
                    action="store_true", dest="split_out",
                    help="Output groups with different alignment topologies "
                         "to separate output files.")
  parser.add_option("--print-fails",
                    action="store_true", dest="print_fails",
                    help="Write failing groups to a failed groups file. "
                         "[default]")
  parser.add_option("--no-print-fails",
                    action="store_false", dest="print_fails",
                    help="Do not write failing groups to a failed groups "
                         "file.")
  parser.add_option("--filter-gene-names",
                    dest="gene_names_path", metavar="FILE",
                    help="Use only gene names found in FILE (file must have "
                         "one gene name per line).")
  parser.add_option("--recalculate-ctg-rep",
                    action="store_true",
                    help="Recalculate the contig representation fraction "
                         "for gap events. [default]")
  parser.add_option("--no-recalculate-ctg-rep",
                    action="store_true",
                    help="Always use the reported contig representation "
                         "fraction.")
  parser.add_option("--robust-crc",
                    action="store_true", dest="robust_crc",
                    help="If a reasonable contig representation cannot be "
                         "calculated for a gap event, just fail that "
                         "candidate and display a warning rather than "
                         "raising an error. [default]")
  parser.add_option("--brittle-crc",
                    action="store_false", dest="robust_crc",
                    help="If a reasonable contig representation cannot be "
                         "calculated for a gap event, raise an error.")
  parser.add_option("--member-wise",
                    action="store_true", dest="member_wise",
                    help="At least one member of a group must pass all "
                         "filters for the whole group to pass. Only output "
                         "group members that pass all filters. [default]")
  parser.add_option("--group-wise",
                    action="store_false", dest="member_wise",
                    help="If any member of a group passes a filter, then the "
                         "whole group passes that filter.")
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(read_to_ctg=100,
                      read_to_ctg_u=5,
                      req_u_read_to_ctg=False,
                      pair_to_gen=0,
                      pair_to_gen_f=0,
                      ignore_intron=True,
                      no_rna=True,
                      no_mito=True,
                      max_num_groups=3,
                      reguire_multiple_aligns=False,
                      min_pid=99.0,
                      adjust_pid=True,
                      max_mismatches=1,
                      no_runs=True,
                      soft_runs=True,
                      ctg_olap=75,
                      ctg_rep=0.90,
                      filter_multi_maps=True,
                      filter_repeats=False,
                      filter_polyA=True,
                      ignore_topologies="gap-nontandem-inverted_duplication,gap-tandem-inverted_duplication,local-inversion",
                      sort_by_r2cu=False,
                      pretty=False,
                      check_data=True,
                      split_out=False,
                      print_fails=True,
                      recalculate_ctg_rep=True,
                      robust_crc=True,
                      member_wise=True,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #21
0
def SetupOptionsParser(): #{
  description_string = ("Checks status of read-to-contig support "
    "cluster jobs.")
  args = [ "JOBS_DIR", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  resubmit_group = OptionGroup(parser, "Resubmission Options")
  resubmit_group.add_option("-r", "--resubmit",
                    action="store_true",
                    help="Resubmit all incomplete jobs.")
  resubmit_group.add_option("-c", "--cluster-head",
                    dest="cluster_head",
                    help="Specify cluster head node if resubmitting jobs to "
                         "cluster")
  resubmit_group.add_option("--mem", "--memory",
                    help="The memory requirement of the resubmitted jobs "
                         "[default:\"%default\"].")
  resubmit_group.add_option("--hostname",
                    help="The hostname(s) to resubmit to "
                         "[default: read from barnacle.cfg].")
  resubmit_group.add_option("--queue",
                    help="The queue(s) to resubmit to [default: read "
                      "from barnacle.cfg].")
  resubmit_group.add_option("-w", "--wall-time",
                    metavar="H:MM:SS",
                    help="When resubmitting, use this value as the "
                         "wall-time option.")
  resubmit_group.add_option("--email",
                    help="E-mail status updates on resubmitted jobs to the "
                         "given email address")
  parser.add_option_group(resubmit_group)
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  misc_group.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  misc_group.add_option("-t", "--terse",
                    action="store_true",
                    help="Only write number complete and total number of jobs")
  misc_group.add_option("-q", "--quiet",
                    action="store_true",
                    help="Only write output to log-file, not to the screen.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(resubmit=False,
                      mem="5G",
                      dpt=False,
                      terse=False,
                      quiet=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #22
0
def SetupOptionsParser(): #{
  description_string = "Produce sequences for simulated chimeric transcripts"
  args = [ "GENE_ANNOTATIONS_FILE", "GENOME_SEQUENCE_FILE", "OUTPUT_DIR", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("--skip-single-exon-genes",
                    action="store_true", dest="skip_single",
                    help="Do not use genes with only a single exon when "
                         "simulating events. [default]")
  parser.add_option("--allow-single-exon-genes",
                    action="store_false", dest="skip_single",
                    help="Allow the use of genes with only a single exon "
                         "when simulating events.")
  parser.add_option("--chr-filter",
      help="Only simulate events involving chromosomes in the provided "
        "comma-delimited list.")
  parser.add_option("--min-len",
      type="int", metavar="N",
      help="Ensure that each simulated transcript is at least Nbp long. "
        "[default: %default]")
  fusion_grp = OptionGroup(parser, "Fusion Options")
  fusion_grp.add_option("--num-fusions",
                        type="int", metavar="N",
                        help="Simulate N fusion transcripts. "
                             "[default: %default]")
  fusion_grp.add_option("--fract-inter",
                        type="float", metavar="F",
                        help="The fraction of fusions to be interchromosomal "
                             "(between genes on distinct chromosomes). "
                             "[default: %default]")
  fusion_grp.add_option("--fract-cont",
                        type="float", metavar="F",
                        help="The fraction of fusions to maintain continuous "
                             "transcription direction of both genes. "
                             "[default: %default]")
  fusion_grp.add_option("--fract-exon",
                        type="float", metavar="F",
                        help="The fraction of fusions to have their "
                             "breakpoints exactly at exon boundaries. "
                             "[default: %default]")
  fusion_grp.add_option("--min-len-fus",
                        type="int", metavar="N",
                        help="When the breakpoint is not being constrained "
                             "to exon edges, it must be at least Nbp away "
                             "from the edge of the transcript. "
                             "[default: %default]")
  parser.add_option_group(fusion_grp)
  ptd_grp = OptionGroup(parser, "Partial Tandem Duplication Options")
  ptd_grp.add_option("--num-ptds",
                     type="int", metavar="N",
                     help="Simulate N partial tandem duplication transcripts. "
                          "[default: %default]")
  ptd_grp.add_option("--fract-multi",
                     type="float", metavar="F",
                     help="The fraction of PTDs to involve a duplication "
                          "of multiple exons, rather than just one. "
                          "[default: %default]")
  ptd_grp.add_option("--fract-extra-ptd",
                     type="float", metavar="F",
                     help="The fraction of PTDs to involve extra sequence "
                          "between the copies of the duplicated sequence. "
                          "[default: %default]")
  ptd_grp.add_option("--min-extra-ptd",
                     type="int", metavar="N",
                     help="When extra sequence is inserted in PTDs, it is at "
                          "least Nbp long. [default: %default]")
  ptd_grp.add_option("--max-extra-ptd",
                     type="int", metavar="N",
                     help="When extra sequence is inserted in PTDs, it is at "
                          "most Nbp long. [default: %default]")
  parser.add_option_group(ptd_grp)
  itd_grp = OptionGroup(parser, "Internal Tandem Duplication Options")
  itd_grp.add_option("--num-itds",
                     type="int", metavar="N",
                     help="Simulate N internal tandem duplication "
                          "transcripts. [default: %default]")
  itd_grp.add_option("--min-len-itd",
                     type="int", metavar="N",
                     help="For ITDs, the duplicated sequence must be at least "
                          "Nbp long. [default: %default]")
  itd_grp.add_option("--max-len-itd",
                     type="int", metavar="N",
                     help="For ITDs, the duplicated sequence must be at most "
                          "Nbp long. [default: %default]")
  itd_grp.add_option("--fract-extra-itd",
                     type="float", metavar="F",
                     help="The fraction of ITDs to involve extra sequence "
                          "between the copies of the duplicated sequence. "
                          "[default: %default]")
  itd_grp.add_option("--min-extra-itd",
                     type="int", metavar="N",
                     help="When extra sequence is inserted in ITDs, it is at "
                          "least Nbp long. [default: %default]")
  itd_grp.add_option("--max-extra-itd",
                     type="int", metavar="N",
                     help="When extra sequence is inserted in ITDs, it is at "
                          "most Nbp long. [default: %default]")
  parser.add_option_group(itd_grp)
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("--seed",
      type="float",
      help="The seed to use to initialize the random number generator. If no "
        "value is specified, current system time is used.")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force filtering to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(skip_single=True,
                      min_len=200,
                      num_fusions=0,
                      fract_inter=.5,
                      fract_cont=.75,
                      fract_exon=.9,
                      min_len_fus=15,
                      num_ptds=0,
                      fract_multi=.75,
                      fract_extra_ptd=.05,
                      min_extra_ptd=1,
                      max_extra_ptd=5,
                      num_itds=0,
                      min_len_itd=5,
                      max_len_itd=100,
                      fract_extra_itd=.5,
                      min_extra_itd=1,
                      max_extra_itd=20,
                      force=False,
                      debug=False)
  return parser
Beispiel #23
0
def SetupOptionsParser(): #{
  description_string = ("Predict events of specific types")
  args = [ "LIB", "BARNACLE_FILE", ]
  usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  fusion_grp = OptionGroup(parser, "Fusion Prediction Options")
  fusion_grp.add_option("--predict-fusions",
                    action="store_true", dest="predict_fusions",
                    help="Predict fusion events. [default]")
  fusion_grp.add_option("--no-fusions",
                    action="store_false", dest="predict_fusions",
                    help="Do not predict fusion events")
  fusion_grp.add_option("--with-gene-directions",
                    action="store_true", dest="use_gene_directions",
                    help="Only predict a fusion event if the transcription "
                         "direction of the genes involved is continuous "
                         "across the contig. [default]")
  fusion_grp.add_option("--without-gene-directions",
                    action="store_false", dest="use_gene_directions",
                    help="Do not consider transcription direction of genes "
                         "involved.")
  fusion_grp.add_option("--transcript-annotations",
                    metavar="PATH",
                    help="PATH is the path to a file containing gene "
                         "transcript annotations. Used for gene directions.")
                         #"and ITD realignment.")
  fusion_grp.add_option("--use-conflicts",
                    action="store_true",
                    help="When a gene has conflicting directions in the "
                         "annotations file, believe that the gene really "
                         "does go both ways. [default]")
  fusion_grp.add_option("--no-use-conflicts",
                    action="store_false", dest="use_conflicts",
                    help="Do not consider genes going in both directions. "
                      "Raise an error if the annotations file contains "
                      "conflicting directions.")
  fusion_grp.add_option("--ignore-conflicts",
                    action="store_true",
                    help="When a gene has conflicting directions in the "
                         "annotations file, just ignore that gene, rather "
                         "than halting the script.")
  fusion_grp.add_option("--include-introns",
                    action="store_true",
                    help="Predict fusions that overlap introns even if they "
                      "do not overlap any coding regions.")
  fusion_grp.add_option("--include-nearby",
                    action="store_true",
                    help="Predict fusions that do not overlap genes if they "
                      "are close to genes.")
  fusion_grp.add_option("--fusion-dup-filter",
                    action="store_true",
                    help="Do not predict a fusion when the contig could be "
                         "explained by a duplication event instead. "
                         "[default]")
  fusion_grp.add_option("--no-fusion-dup-filter",
                    action="store_false", dest="fusion_dup_filter",
                    help="Do not check whether fusion contigs could be "
                         "explained by duplication events instead.")
  fusion_grp.add_option("--min-unique-align",
                    type="int", metavar="N",
                    help="Require that at least Nbp of the contig aligns to "
                      "only one of the fusion partners. [default: %default]")
  fusion_grp.add_option("--min-exon-bounds",
                    type="int", metavar="N",
                    help="Require that N (0, 1, or 2) of the breakpoints "
                      "match up with annotated exon boundaries. [default: "
                      "%default]")
  fusion_grp.add_option("--read-through",
                    type="int", metavar="N",
                    help="Consider colinear split alignments closer than Nbp "
                      "to be read-through events. [default: %default]")
  parser.add_option_group(fusion_grp)
  #parser.add_option("--filter-gene-names",
  #                  dest="gene_names_path", metavar="FILE",
  #                  help="Use only gene names found in FILE (should be "
  #                       "tab-separated file, with gene name in column 13).")
  ptd_group = OptionGroup(parser, "PTD Prediction Options")
  ptd_group.add_option("--predict-PTDs",
                    action="store_true", dest="predict_ptds",
                    help="Predict PTD events. [default]")
  ptd_group.add_option("--no-PTDs",
                    action="store_false", dest="predict_ptds",
                    help="Do not predict PTD events")
  parser.add_option_group(ptd_group)
  itd_group = OptionGroup(parser, "ITD Prediction Options")
  itd_group.add_option("--predict-ITDs",
                    action="store_true", dest="predict_itds",
                    help="Predict ITD events. [default]")
  itd_group.add_option("--no-ITDs",
                    action="store_false", dest="predict_itds",
                    help="Do not predict ITD events")
  itd_group.add_option("--allow-ITD-repeats",
                    action="store_false", dest="filter_itd_repeats",
                    help="Allow ITD event prediction for candidate "
                         "contigs that overlap annotated repeats.")
  itd_group.add_option("--remove-ITD-repeats",
                    action="store_true", dest="filter_itd_repeats",
                    help="Do not predict an ITD event if the candidate "
                         "contig overlaps any annotated repeats. [default]")
  itd_group.add_option("--require-internal-gaps",
                    action="store_true", dest="require_internal_gaps",
                    help="Require that the duplicated sequence is internal "
                         "to the contig for gapped ITD "
                         "event predictions.")
  itd_group.add_option("--allow-edge-gaps",
                    action="store_false", dest="require_internal_gaps",
                    help="Allow the duplicated sequence to be at the very "
                         "edge of the contig for gapped partial exon "
                         "duplication predictions. [default]")
  itd_group.add_option("--min-edge-gap-fraction",
                    type="float", metavar="F",
                    help="If the gap is at the very edge of a contig, "
                         "require that the fraction of the gap involved "
                         "in the duplication is at least F. This option "
                         "is ignored if the --require-internal-gaps option "
                         "is used. [default: %default]")
  itd_group.add_option("--allow-non-gap-ITDs",
                    action="store_true", dest="allow_non_gap_itds",
                    help="Look for ITDs in groups with junction duplication, "
                         "end duplication, and non-colinear topologies "
                         "as well as gap topologies.")
  itd_group.add_option("--only-gap-ITDs",
                    action="store_false", dest="allow_non_gap_itds",
                    help="Only look for ITDs in groups with gap "
                      "topologies [default].")
  itd_group.add_option("--exclude-non-coding",
                    action="store_true",
                    help="Do not predict ITD events in non-coding genes. "
                         "[default]")
  itd_group.add_option("--allow-non-coding",
                    action="store_false", dest="exclude_non_coding",
                    help="Report ITD events predicted in non-coding genes.")
  parser.add_option_group(itd_group)
  parser.add_option("--get-breakpoint-exons",
                    metavar="FILE", dest="breakpoint_exons",
                    help="Use the exon coordinates in FILE to get the exon "
                         "overlapped by the breakpoint in each event (used "
                         "for ITDs).")
  parser.add_option("--event-buffer",
                    type="int", metavar="N",
                    help="For split events, check whether the breakpoint "
                         "coordinates are within N bases of an exon. "
                         "[default: %default]")
  parser.add_option("--use-existing-overlaps",
                    action="store_true",
                    help="If a breakpoint/transcripts overlap file already "
                         "exists, just use it rather than generating a new "
                         "one.")
  parser.add_option("--transcript-sequences",
                    metavar="PATH", dest="tran_seq_path",
                    help="The path to a file containing transcript sequences "
                         "to realign candidate contigs against, for avoiding "
                         "false positives due to problems with contig to "
                         "genome alignments.")
  parser.add_option("--contig-sequences",
                    metavar="PATH", dest="ctg_seq_path",
                    help="The path to a file containing candidate contig "
                         "sequences to realign to transcript sequences, for "
                         "avoiding false positives due to problems with "
                         "contig to genome alignments.")
  parser.add_option("--use-existing-realigns",
                    action="store_true",
                    help="If contig-to-transcript realignment files already "
                         "exist, just use them rather than generating new "
                         "ones.")
  misc_group = OptionGroup(parser, "Miscellaneous Options")
  misc_group.add_option("--read-length",
      type="int", metavar="N",
      help="The length of the sequenced reads (used for ITD full duplicate "
        "alignment filter). [default: %default]")
  misc_group.add_option("-p", "--pretty",
                    action="store_true",
                    help="Print more readable output file as well as "
                         "standard data file.")
  misc_group.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  misc_group.add_option("-f", "--force",
                    action="store_true",
                    help="Force prediction to take place, even if the output "
                         "directory already exists.")
  misc_group.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  misc_group.add_option("--extreme-debug",
                    action="store_true", dest="extreme_debug",
                    help="Print extremely in-depth debug information while "
                      "the program runs. Not recommended for large jobs.")
  parser.add_option_group(misc_group)
  parser.set_defaults(#list_input=True,
                      predict_fusions=True,
                      use_gene_directions=True,
                      use_conflicts=True,
                      ignore_conflicts=False,
                      include_introns=False,
                      include_nearby=False,
                      fusion_dup_filter=True,
                      min_unique_align=5,
                      min_exon_bounds=0,
                      read_through=50000,
                      predict_ptds=True,
                      predict_itds=True,
                      filter_itd_repeats=True,
                      require_internal_gaps=False,
                      min_edge_gap_fraction=0.80,
                      allow_non_gap_itds=False,
                      exclude_non_coding=True,
                      event_buffer=5,
                      use_existing_overlaps=False,
                      use_existing_realigns=False,
                      read_length=75,
                      pretty=False,
                      dpt=False,
                      force=False,
                      debug=False,
                      extreme_debug=False)
  return parser
Beispiel #24
0
def SetupOptionsParser(): #{
  description_string = ("Checks the status of Barnacle support jobs submitted "
    "to the cluster.")
  args = [ "LIB_LIST_FILE", "DIR_HEAD", ]
  usage_string = ("%prog " + " ".join(args) + " [ OPTIONS ]\n"
    "LIB_LIST_FILE is a text file containing a list of library info lines, "
    "each line of the form \"lib_name,assembly_ver,barnacle_ver\" (e.g. "
    "\"A00001,assembler-1.2.1,1.1.0\").\nDIR_HEAD is the directory containing "
    "the subdirectories of the libraries in LIB_LIST_FILE.")
  parser = OptionParser(description=description_string,
                        version="%prog " + VERSION,
                        usage=usage_string)
  parser.num_args = len(args)
  parser.add_option("-c", "--check-cid",
                    action="store_true", dest="check_cid",
                    help="Check the status of the candidate identification "
                         "jobs as well as the support job.")
  parser.add_option("-p", "--check-p2g",
                    action="store_true", dest="check_p2g",
                    help="Check the status of the pair-to-genome support "
                         "jobs as well as the support job.")
  parser.add_option("-r", "--check-r2c",
                    action="store_true", dest="check_r2c",
                    help="Check the status of the read-to-contig support "
                         "jobs as well as the support job.")
  parser.add_option("-t", "--template",
                    metavar="TEMPLATE", dest="lib_path_template",
                    help="The template to use to construct the path to the "
                         "event data files for a library. \"DIR_HEAD\", "
                         "\"%{lib}\", \"%{assembly_ver}\", and "
                         "\"%{barnacle_ver}\" will be replaced with the "
                         "appropriate values from LIB_LIST_FILE. "
                         "[default: %default]")
  parser.add_option("--disable-profiling-timer",
                    action="store_true", dest="dpt",
                    help="Sometimes this script can hang when trying to spawn "
                         "child processes, due to the kernel's profiling "
                         "timer. Use this option to disable the profiling "
                         "timer if the script seems to be hanging.")
  parser.add_option("--log-file",
                    dest="log_file_name", metavar="FILE",
                    help="Log all messages in FILE")
  parser.add_option("--terse",
                    action="store_true", dest="terse",
                    help="When checking candidate identification, "
                         "pair-to-genome, or read-to-contig status, only "
                         "report number complete and total number of jobs.")
  parser.add_option("-q", "--quiet",
                    action="store_true",
                    help="Only write output to log-file, not to the screen.")
  parser.add_option("-d", "--debug",
                    action="store_true",
                    help="Print debug information while the program runs.")
  parser.set_defaults(check_cid=False,
                      check_p2g=False,
                      check_r2c=False,
                      lib_path_template= os.path.join(["DIR_HEAD", "%{lib}",
                        "Assembly", "%{assembly_ver}", "barnacle",
                        "%{barnacle_ver}"]),
                      dpt=False,
                      terse=False,
                      quiet=False,
                      debug=False)
  return parser
Beispiel #25
0
def SetupOptionsParser():  # {
    description_string = "Use dwgsim to generate simulated reads for wildtype " "and event sequences."
    # description_string = ("Associates each coverage value with a sequence "
    #    "and converts the coverage to an absolute number of reads for that "
    #    "sequence, then simulates reads from each sequence using the dwgsim "
    #    "tool.")
    args = ["WT_ANNOT", "WT_SEQ", "WT_COV", "EVENT_SEQ", "EVENT_COV"]
    usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]"
    parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string)
    parser.num_args = len(args)
    # parser.add_option("-A", "--cov-adjust",
    #    type="float", metavar="F",
    #    help="Add F to each coverage value read in from the coverages file "
    #      "(to ensure minimum coverage value) [default: %default]")
    parser.add_option(
        "-R", "--read-length", type="int", metavar="N", help="Simulate reads Nnt long. [default: %default]"
    )
    parser.add_option(
        "-F",
        "--frag-length",
        type="int",
        metavar="N",
        help="Simulate read-pairs from fragments with a mean length of Nnt. " "[default: %default]",
    )
    parser.add_option(
        "--min-overlap",
        type="int",
        metavar="N",
        help="Ensure that for each event sequence, there is at least one read "
        "generated that overlaps the event split position by Nnt. "
        "[default: %default]",
    )
    parser.add_option(
        "--chr-filter", help="Only simulate reads from genes on chromosomes in the provided " "comma-delimited list."
    )
    dwgsim_group = OptionGroup(parser, "dwgsim Options")
    dwgsim_group.add_option(
        "--err-rate", type="float", metavar="F", help="The per-base rate of sequencing errors. [default: %default]"
    )
    # help="The per-base rate of sequencing errors, defaults to dwgsim "
    #  "default if unused.")
    dwgsim_group.add_option("--mut-rate", type="float", metavar="F", help="The rate of mutations. [default: %default]")
    dwgsim_group.add_option(
        "--std-dev",
        type="float",
        metavar="F",
        help="The standard deviation of the fragment length, defaults to dwgsim " "default if unused.",
    )
    dwgsim_group.add_option(
        "--extra-dwgsim", help="A string containing any extra options to be used in the call to " "dwgsim."
    )
    parser.add_option_group(dwgsim_group)
    misc_group = OptionGroup(parser, "Miscellaneous Options")
    misc_group.add_option(
        "--seed",
        type="float",
        help="The seed to use to initialize the random number generator. If no "
        "value is specified, current system time is used.",
    )
    misc_group.add_option(
        "--disable-profiling-timer",
        action="store_true",
        dest="dpt",
        help="Sometimes this script can hang when trying to spawn "
        "child processes, due to the kernel's profiling "
        "timer. Use this option to disable the profiling "
        "timer if the script seems to be hanging.",
    )
    misc_group.add_option(
        "-f",
        "--force",
        action="store_true",
        help="Force filtering to take place, even if the output " "directory already exists.",
    )
    misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.")
    misc_group.add_option(
        "--extreme-debug",
        action="store_true",
        dest="extreme_debug",
        help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.",
    )
    parser.add_option_group(misc_group)
    parser.set_defaults(  # cov_adjust=0.6,
        read_length=75,
        frag_length=200,
        min_overlap=5,
        err_rate=0.0037,
        mut_rate=0,
        dpt=False,
        force=False,
        debug=False,
        extreme_debug=False,
    )
    return parser