Beispiel #1
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(prog="repmask", description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("-i", "--input", type=str, required=True,
                        help="VCF to annotate (%(default)s)")
    parser.add_argument("-o", "--output", type=str, default="/dev/stdout",
                        help="Output filename (%(default)s)")
    parser.add_argument("-e", "--executable", type=str, default="RepeatMasker",
                        help="Path to RepeatMasker (%(default)s)")
    parser.add_argument("-m", "--min-length", type=truvari.restricted_int, default=50,
                        help="Minimum size of entry to annotate (%(default)s)")
    parser.add_argument("-M", "--max-length", type=truvari.restricted_int, default=50000,
                        help="Maximum size of entry to annotate (%(default)s)")
    parser.add_argument("-t", "--threshold", type=truvari.restricted_float, default=.8,
                        help="Threshold for pct of allele covered (%(default)s)")
    parser.add_argument("-p", "--params", type=str, default=DEFAULTPARAMS,
                        help="Default parameter string to send to RepeatMasker (%(default)s)")
    parser.add_argument("-T", "--threads", type=truvari.restricted_int, default=os.cpu_count(),
                        help="Number of threads to use (%(default)s)")
    parser.add_argument("--debug", action="store_true",
                        help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #2
0
def parse_args(args):
    """Build parser object with options for sample.

    Returns:
        Python argparse parsed object.
    """
    parser = argparse.ArgumentParser(
        description=
        "A VCF editing utility which adds ref and all sequences to a SURVIVOR fasta file."
    )

    parser.add_argument("--reference-fasta",
                        "-r",
                        required=True,
                        type=str,
                        help="Reference fasta file.")
    parser.add_argument("--survivor-insertions-fasta",
                        "-i",
                        required=True,
                        type=str,
                        help="Insertions fasta file from SURVIVOR.")
    parser.add_argument("--survivor-vcf-file",
                        "-v",
                        required=True,
                        type=str,
                        help="VCF file from SURVIVOR.")
    parser.add_argument("--output-vcf",
                        "-o",
                        required=True,
                        type=str,
                        help="Output path of edited VCF.")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #3
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="divide",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("input", metavar="VCF", help="VCF to split")
    parser.add_argument("output",
                        metavar="DIR",
                        help="Output directory to save parts")
    parser.add_argument("-b",
                        "--buffer",
                        type=truvari.restricted_int,
                        default=1000,
                        help="Buffer to make mini-clusters (%(default)s)")
    parser.add_argument("-m",
                        "--min",
                        type=truvari.restricted_int,
                        default=100,
                        help="Minimum number of entries per-vcf (%(default)s)")
    parser.add_argument("--no-compress",
                        action="store_false",
                        help="Don't attempt to compress/index sub-VCFs")
    args = parser.parse_args(args)
    truvari.setup_logging(False)
    return args
Beispiel #4
0
def setup_outputs(args):
    """
    Makes all of the output files
    return a ... to get to each of the
    """
    truvari.setup_logging(args.debug)
    logging.info("Params:\n%s", json.dumps(vars(args), indent=4))

    outputs = {}

    in_vcf = pysam.VariantFile(args.input)
    outputs["o_header"] = edit_header(in_vcf)
    outputs["c_header"] = trubench.edit_header(in_vcf)
    num_samps = len(outputs["o_header"].samples)
    if args.hap and num_samps != 1:
        logging.error("--hap mode requires exactly one sample. Found %d",
                      num_samps)
        sys.exit(100)
    outputs["output_vcf"] = pysam.VariantFile(args.output,
                                              'w',
                                              header=outputs["o_header"])
    outputs["collap_vcf"] = pysam.VariantFile(args.collapsed_output,
                                              'w',
                                              header=outputs["c_header"])
    outputs["stats_box"] = {
        "collap_cnt": 0,
        "kept_cnt": 0,
        "out_cnt": 0,
        "consol_cnt": 0
    }
    return outputs
Beispiel #5
0
def parseArgs(args):
    """
    Argument parsing
    """
    parser = argparse.ArgumentParser(prog="surv_sim", description=inspect.getdoc(surv_sim_main),
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("reference", metavar="REF", type=str,
                        help="Reference file overwhich to simulate SVs")
    parser.add_argument("output", metavar="OUT", type=str, default="output",
                        help="SVTeaser output basename (%(default)s)")
    parser.add_argument("--debug", action="store_true",
                        help="Verbose logging")
    parser.add_argument('--sv_regions', type=str,
                        help='Comma separated file containing (chr, region_start, region_end). \
                        For every row, an SV of length randint(50, mx_variation) is generated with the region \
                        specified by (chr, start, end).\
                        chr, start, end \
                        chr22, 1000, 20000 \
                        chr22, 50000, 80000', required=False)
    parser.add_argument('--num_sv_regions', type=int, default=10,
                        help='Alternatively to the csv file defined by --sv_regions, user can also \
                              provide number of regions to generate SVs for. The programme will randomly \
                              choose locations within the genome to introduce the SVs. --sv_regions will be given priority \
                              if both options are provided.',
                        required=False)
    parser.add_argument('--len_sv_region', type=int, default=10000,
                        help='The length of regions to create.',
                        required=False)
    args = parser.parse_args(args)
    args.reference = os.path.abspath(args.reference)
    args.output = args.output + ".svt"
    setup_logging(args.debug)
    return args
Beispiel #6
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="svinfo",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        type=str,
                        default="/dev/stdin",
                        help="VCF to annotate (stdin)")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default="/dev/stdout",
                        help="Output filename (stdout)")
    parser.add_argument("-m",
                        "--minsize",
                        type=truvari.restricted_int,
                        default=50,
                        help="Minimum size of entry to annotate (%(default)s)")
    truvari.setup_logging()
    return parser.parse_args(args)
Beispiel #7
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="vcf2df",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("vcf", metavar="VCF", help="VCF to parse")
    parser.add_argument("output", metavar="JL", help="Output joblib to save")
    parser.add_argument("-b",
                        "--bench-dir",
                        action="store_true",
                        help="Input is a truvari bench directory")
    parser.add_argument(
        "-i",
        "--info",
        action="store_true",
        help="Attempt to put the INFO fields into the dataframe")
    parser.add_argument(
        "-f",
        "--format",
        action="store_true",
        help="Attempt to put the FORMAT fileds into the dataframe")
    parser.add_argument(
        "-s",
        "--sample",
        default=None,
        help="SAMPLE name to parse when building columns for --format")
    parser.add_argument(
        "-m",
        "--multisample",
        action="store_true",
        help=("Parse multiple samples. Splits -s by comma. Sample "
              "column names will be flattened"))
    parser.add_argument(
        "-S",
        "--skip-compression",
        action="store_true",
        help="Skip the attempt to optimize the dataframe's size")
    parser.add_argument("-c",
                        "--compress",
                        type=int,
                        default=3,
                        choices=range(9),
                        help="Compression level for joblib (%(default)s)")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")
    args = parser.parse_args(args)
    if args.sample:
        if args.multisample:
            args.sample = args.sample.split(',')
        else:
            args.sample = [args.sample]
    truvari.setup_logging(args.debug)
    return args
Beispiel #8
0
def parse_args(args):
    """
    Argument parsing
    """
    parser = argparse.ArgumentParser(
        prog="remap",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        default="/dev/stdin",
                        help="Input VCF (%(default)s)")
    parser.add_argument("-r",
                        "--reference",
                        required=True,
                        help="BWA indexed reference")
    parser.add_argument("-o",
                        "--output",
                        default="/dev/stdout",
                        help="Output VCF (%(default)s)")
    parser.add_argument(
        "-m",
        "--minlength",
        default=50,
        type=truvari.restricted_int,
        help="Smallest length of allele to remap (%(default)s)")
    parser.add_argument(
        "-t",
        "--threshold",
        type=truvari.restricted_float,
        default=.8,
        help="Threshold for pct of allele covered to consider hit (%(default)s)"
    )
    parser.add_argument(
        "-d",
        "--dist",
        type=truvari.restricted_int,
        default=10,
        help=("Minimum distance an alignment must be from a DEL's "
              "position to be considered (%(default)s))"))
    parser.add_argument(
        "-H",
        "--hits",
        type=truvari.restricted_int,
        default=0,
        help="Report top hits as chr:start-end.pct (max %(default)s)")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #9
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="hompct",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        type=str,
                        required=True,
                        help="Compressed, indexed VCF to annotate ")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default="/dev/stdout",
                        help="Output filename (stdout)")
    parser.add_argument(
        "-b",
        "--buffer",
        type=truvari.restricted_int,
        default=5000,
        help="Number of base-pairs up/dn-stream to query (%(default)s)")
    parser.add_argument("-m",
                        "--minanno",
                        type=truvari.restricted_int,
                        default=50,
                        help="Minimum size of event to annotate (%(default)s)")
    parser.add_argument(
        "-M",
        "--maxgt",
        type=truvari.restricted_int,
        default=1,
        help="Largest event size to count for genotyping (%(default)s)")
    parser.add_argument(
        "-c",
        "--mincount",
        type=truvari.restricted_int,
        default=0,
        help=
        "Minimum number of genotyping events to report HOMPCT (%(default)s)")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #10
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(prog="segment", description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("vcf", metavar="IN",
                        help="VCF to parse")
    parser.add_argument("output", metavar="OUT",
                        help="Output VCF")
    # parser.add_argument("-m", "--min", default=10, type=int,
    # help="Minimum span of variants to segment")
    # parser.add_argument("--alter", action="store_true",
    # help="Add SEG Format field to all variants (false)")
    args = parser.parse_args(args)
    truvari.setup_logging()
    return args
Beispiel #11
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(prog="dpcnt", description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i", "--input", type=str, default="/dev/stdin",
                        help="VCF to annotate (stdin)")
    parser.add_argument("-b", "--bins", type=str, default="0,5,10,15",
                        help="Coverage bins to bisect left the counts (%(default)s)")
    parser.add_argument("--no-ad", action="store_false",
                        help="Skip adding ADCNT bins")
    parser.add_argument("-p", "--present", action="store_true", default=False,
                        help="Only count sites with present (non ./.) genotypes")
    parser.add_argument("-o", "--output", type=str, default="/dev/stdout",
                        help="Output filename (stdout)")
    truvari.setup_logging()
    return parser.parse_args(args)
Beispiel #12
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="lcr",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        type=str,
                        default="/dev/stdin",
                        help="VCF to annotate (stdin)")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default="/dev/stdout",
                        help="Output filename (stdout)")
    truvari.setup_logging()
    return parser.parse_args(args)
Beispiel #13
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(prog="bpovl", description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i", "--input", type=str, default="/dev/stdin",
                        help="VCF to annotate (stdin)")
    parser.add_argument("-a", "--anno", type=str, required=True,
                        help="Tab-delimited annotation file")
    parser.add_argument("-o", "--output", type=str, required=True,
                        help="Output joblib DataFrame")
    parser.add_argument("--sizemin", type=truvari.restricted_int, default=50,
                        help="Minimum size of variant to annotate (%(default)s)")
    parser.add_argument("--spanmax", type=truvari.restricted_int, default=50000,
                        help="Maximum span of SVs to annotate (%(default)s)")
    annosg = parser.add_argument_group("Annotation File Arguments")
    annosg.add_argument("-p", "--preset", choices=PRESET_FMTS.keys(), default=None,
                        help=("Annotation format. This option overwrites "
                              "-s, -b, -e, -c and -1 (%(default)s)"))
    annosg.add_argument("-c", "--comment", type=str, default="#",
                        help="Skip lines started with character. (%(default)s)")
    annosg.add_argument("-s", "--sequence", type=truvari.restricted_int, default=0,
                        help="Column of sequence/chromosome name. (%(default)s)")
    annosg.add_argument("-b", "--begin", type=truvari.restricted_int, default=1,
                        help="Column of start chromosomal position. (%(default)s)")
    annosg.add_argument("-e", "--end", type=truvari.restricted_int, default=2,
                        help="Column of end chromosomal position. (%(default)s)")
    # The end column can be the same as the start column. [2]
    annosg.add_argument("-1", "--one-based", action='store_true',
                        help=("The position in the anno file is 1-based "
                              "rather than 0-based. (%(default)s)"))

    args = parser.parse_args(args)
    if args.preset is not None:
        args.anno_psets = PRESET_FMTS[args.preset]
    else:
        args.anno_psets = [args.sequence, args.begin, args.end,
                           args.one_based, args.comment]
    truvari.setup_logging()
    return args
Beispiel #14
0
def parse_args(args):
    """
    Argument parsing
    """
    parser = argparse.ArgumentParser(
        prog="grm",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("-i", "--input", required=True, help="Input VCF")
    parser.add_argument("-r",
                        "--reference",
                        required=True,
                        help="BWA indexed reference")
    parser.add_argument("-R",
                        "--regions",
                        default=None,
                        help="Bed file of regions to parse (None)")
    parser.add_argument("-o",
                        "--output",
                        default="results.jl",
                        help="Output dataframe (%(default)s)")
    parser.add_argument("-k",
                        "--kmersize",
                        default=50,
                        type=truvari.restricted_int,
                        help="Size of kmer to map (%(default)s)")
    parser.add_argument("-m",
                        "--min-size",
                        default=25,
                        type=truvari.restricted_int,
                        help="Minimum size of variants to map (%(default)s)")
    parser.add_argument("-t",
                        "--threads",
                        default=os.cpu_count(),
                        type=truvari.restricted_int,
                        help="Number of threads (%(default)s)")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #15
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="numneigh",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        type=str,
                        default="/dev/stdin",
                        help="VCF to annotate")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default="/dev/stdout",
                        help="Output vcf (stdout)")
    parser.add_argument("-r",
                        "--refdist",
                        type=truvari.restricted_int,
                        default=1000,
                        help="Max reference location distance (%(default)s)")
    parser.add_argument(
        "-s",
        "--sizemin",
        type=truvari.restricted_int,
        default=50,
        help="Minimum variant size to consider for annotation (%(default)s)")
    parser.add_argument("--passonly",
                        action="store_true",
                        default=False,
                        help="Only count calls with FILTER == PASS")
    parser.add_argument("--debug",
                        action="store_true",
                        default=False,
                        help="Verbose logging")
    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args
Beispiel #16
0
def parseArgs(args):
    """
    Argument parsing
    """
    parser = argparse.ArgumentParser(prog="sim_reads", description=inspect.getdoc(sim_reads_main),
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("workdir", metavar="DIR", type=str,
                        help="SVTeaser working directory")
    parser.add_argument("--coverage", type=int, default=30,
                        help="Depth of coverage to simulate (%(default)s)")
    parser.add_argument("--read-len", type=int, default=150,
                        help="Simulated read length (%(default)s)")
    parser.add_argument("--mean-frag", type=int, default=400,
                        help="Mean insert fragment length (%(default)s)")
    parser.add_argument("--insert-sd", type=int, default=50,
                        help="Insert fragment length standard deviation (%(default)s)")
    parser.add_argument("--seq-inst", type=str, default="HS25",
                        help="Sequencing instrument (%(default)s)")
    args = parser.parse_args(args)
    setup_logging()
    return args
Beispiel #17
0
def setup_outputs(args):
    """
    Makes all of the output files
    Places the data into the shared space
    """
    os.mkdir(args.output)
    truvari.setup_logging(
        args.debug, truvari.LogFileStderr(os.path.join(args.output,
                                                       "log.txt")))
    logging.info("Params:\n%s", json.dumps(vars(args), indent=4))
    logging.info(f"Truvari version: {truvari.__version__}")

    outputs = {}
    outputs["vcf_base"] = pysam.VariantFile(args.base)
    outputs["n_base_header"] = edit_header(outputs["vcf_base"])

    outputs["vcf_comp"] = pysam.VariantFile(args.comp)
    outputs["n_comp_header"] = edit_header(outputs["vcf_comp"])

    outputs["tpb_out"] = pysam.VariantFile(os.path.join(
        args.output, "tp-base.vcf"),
                                           'w',
                                           header=outputs["n_base_header"])
    outputs["tpc_out"] = pysam.VariantFile(os.path.join(
        args.output, "tp-call.vcf"),
                                           'w',
                                           header=outputs["n_comp_header"])

    outputs["fn_out"] = pysam.VariantFile(os.path.join(args.output, "fn.vcf"),
                                          'w',
                                          header=outputs["n_base_header"])
    outputs["fp_out"] = pysam.VariantFile(os.path.join(args.output, "fp.vcf"),
                                          'w',
                                          header=outputs["n_comp_header"])

    outputs["stats_box"] = StatsBox()
    return outputs
Beispiel #18
0
def parse_args(args):
    """
    Pull the command line parameters
    """
    parser = argparse.ArgumentParser(
        prog="trf",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i",
                        "--input",
                        type=str,
                        required=True,
                        help="VCF to annotate")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default="/dev/stdout",
                        help="Output filename (stdout)")
    parser.add_argument("-e",
                        "--executable",
                        type=str,
                        default="trf409.linux64",
                        help="Path to tandem repeat finder (%(default)s)")
    parser.add_argument("-T",
                        "--trf-params",
                        type=str,
                        default="3 7 7 80 5 40 500 -h -ngs",
                        help="Default parameters to send to trf (%(default)s)")
    parser.add_argument("-s",
                        "--simple-repeats",
                        type=str,
                        required=True,
                        help="Simple repeats bed")
    parser.add_argument("-f",
                        "--reference",
                        type=str,
                        required=True,
                        help="Reference fasta file")
    parser.add_argument("-m",
                        "--min-length",
                        type=truvari.restricted_int,
                        default=50,
                        help="Minimum size of entry to annotate (%(default)s)")
    parser.add_argument(
        "-M",
        "--max-length",
        type=truvari.restricted_int,
        default=10000,
        help="Maximum size of sequence to run through trf (%(default)s)")
    parser.add_argument("-t",
                        "--threads",
                        type=truvari.restricted_int,
                        default=multiprocessing.cpu_count(),
                        help="Number of threads to use (%(default)s)")
    parser.add_argument(
        "-C",
        "--chunk-size",
        type=truvari.restricted_int,
        default=1,
        help=
        "Size (in mbs) of reference chunks for parallelization (%(default)s)")
    parser.add_argument("--debug", action="store_true", help="Verbose logging")

    args = parser.parse_args(args)
    truvari.setup_logging(args.debug)
    return args