def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser(prog="repmask", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, required=True, help="VCF to annotate (%(default)s)") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (%(default)s)") parser.add_argument("-e", "--executable", type=str, default="RepeatMasker", help="Path to RepeatMasker (%(default)s)") parser.add_argument("-m", "--min-length", type=truvari.restricted_int, default=50, help="Minimum size of entry to annotate (%(default)s)") parser.add_argument("-M", "--max-length", type=truvari.restricted_int, default=50000, help="Maximum size of entry to annotate (%(default)s)") parser.add_argument("-t", "--threshold", type=truvari.restricted_float, default=.8, help="Threshold for pct of allele covered (%(default)s)") parser.add_argument("-p", "--params", type=str, default=DEFAULTPARAMS, help="Default parameter string to send to RepeatMasker (%(default)s)") parser.add_argument("-T", "--threads", type=truvari.restricted_int, default=os.cpu_count(), help="Number of threads to use (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parse_args(args): """Build parser object with options for sample. Returns: Python argparse parsed object. """ parser = argparse.ArgumentParser( description= "A VCF editing utility which adds ref and all sequences to a SURVIVOR fasta file." ) parser.add_argument("--reference-fasta", "-r", required=True, type=str, help="Reference fasta file.") parser.add_argument("--survivor-insertions-fasta", "-i", required=True, type=str, help="Insertions fasta file from SURVIVOR.") parser.add_argument("--survivor-vcf-file", "-v", required=True, type=str, help="VCF file from SURVIVOR.") parser.add_argument("--output-vcf", "-o", required=True, type=str, help="Output path of edited VCF.") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="divide", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("input", metavar="VCF", help="VCF to split") parser.add_argument("output", metavar="DIR", help="Output directory to save parts") parser.add_argument("-b", "--buffer", type=truvari.restricted_int, default=1000, help="Buffer to make mini-clusters (%(default)s)") parser.add_argument("-m", "--min", type=truvari.restricted_int, default=100, help="Minimum number of entries per-vcf (%(default)s)") parser.add_argument("--no-compress", action="store_false", help="Don't attempt to compress/index sub-VCFs") args = parser.parse_args(args) truvari.setup_logging(False) return args
def setup_outputs(args): """ Makes all of the output files return a ... to get to each of the """ truvari.setup_logging(args.debug) logging.info("Params:\n%s", json.dumps(vars(args), indent=4)) outputs = {} in_vcf = pysam.VariantFile(args.input) outputs["o_header"] = edit_header(in_vcf) outputs["c_header"] = trubench.edit_header(in_vcf) num_samps = len(outputs["o_header"].samples) if args.hap and num_samps != 1: logging.error("--hap mode requires exactly one sample. Found %d", num_samps) sys.exit(100) outputs["output_vcf"] = pysam.VariantFile(args.output, 'w', header=outputs["o_header"]) outputs["collap_vcf"] = pysam.VariantFile(args.collapsed_output, 'w', header=outputs["c_header"]) outputs["stats_box"] = { "collap_cnt": 0, "kept_cnt": 0, "out_cnt": 0, "consol_cnt": 0 } return outputs
def parseArgs(args): """ Argument parsing """ parser = argparse.ArgumentParser(prog="surv_sim", description=inspect.getdoc(surv_sim_main), formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("reference", metavar="REF", type=str, help="Reference file overwhich to simulate SVs") parser.add_argument("output", metavar="OUT", type=str, default="output", help="SVTeaser output basename (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") parser.add_argument('--sv_regions', type=str, help='Comma separated file containing (chr, region_start, region_end). \ For every row, an SV of length randint(50, mx_variation) is generated with the region \ specified by (chr, start, end).\ chr, start, end \ chr22, 1000, 20000 \ chr22, 50000, 80000', required=False) parser.add_argument('--num_sv_regions', type=int, default=10, help='Alternatively to the csv file defined by --sv_regions, user can also \ provide number of regions to generate SVs for. The programme will randomly \ choose locations within the genome to introduce the SVs. --sv_regions will be given priority \ if both options are provided.', required=False) parser.add_argument('--len_sv_region', type=int, default=10000, help='The length of regions to create.', required=False) args = parser.parse_args(args) args.reference = os.path.abspath(args.reference) args.output = args.output + ".svt" setup_logging(args.debug) return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="svinfo", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, default="/dev/stdin", help="VCF to annotate (stdin)") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (stdout)") parser.add_argument("-m", "--minsize", type=truvari.restricted_int, default=50, help="Minimum size of entry to annotate (%(default)s)") truvari.setup_logging() return parser.parse_args(args)
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="vcf2df", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("vcf", metavar="VCF", help="VCF to parse") parser.add_argument("output", metavar="JL", help="Output joblib to save") parser.add_argument("-b", "--bench-dir", action="store_true", help="Input is a truvari bench directory") parser.add_argument( "-i", "--info", action="store_true", help="Attempt to put the INFO fields into the dataframe") parser.add_argument( "-f", "--format", action="store_true", help="Attempt to put the FORMAT fileds into the dataframe") parser.add_argument( "-s", "--sample", default=None, help="SAMPLE name to parse when building columns for --format") parser.add_argument( "-m", "--multisample", action="store_true", help=("Parse multiple samples. Splits -s by comma. Sample " "column names will be flattened")) parser.add_argument( "-S", "--skip-compression", action="store_true", help="Skip the attempt to optimize the dataframe's size") parser.add_argument("-c", "--compress", type=int, default=3, choices=range(9), help="Compression level for joblib (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) if args.sample: if args.multisample: args.sample = args.sample.split(',') else: args.sample = [args.sample] truvari.setup_logging(args.debug) return args
def parse_args(args): """ Argument parsing """ parser = argparse.ArgumentParser( prog="remap", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", default="/dev/stdin", help="Input VCF (%(default)s)") parser.add_argument("-r", "--reference", required=True, help="BWA indexed reference") parser.add_argument("-o", "--output", default="/dev/stdout", help="Output VCF (%(default)s)") parser.add_argument( "-m", "--minlength", default=50, type=truvari.restricted_int, help="Smallest length of allele to remap (%(default)s)") parser.add_argument( "-t", "--threshold", type=truvari.restricted_float, default=.8, help="Threshold for pct of allele covered to consider hit (%(default)s)" ) parser.add_argument( "-d", "--dist", type=truvari.restricted_int, default=10, help=("Minimum distance an alignment must be from a DEL's " "position to be considered (%(default)s))")) parser.add_argument( "-H", "--hits", type=truvari.restricted_int, default=0, help="Report top hits as chr:start-end.pct (max %(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="hompct", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, required=True, help="Compressed, indexed VCF to annotate ") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (stdout)") parser.add_argument( "-b", "--buffer", type=truvari.restricted_int, default=5000, help="Number of base-pairs up/dn-stream to query (%(default)s)") parser.add_argument("-m", "--minanno", type=truvari.restricted_int, default=50, help="Minimum size of event to annotate (%(default)s)") parser.add_argument( "-M", "--maxgt", type=truvari.restricted_int, default=1, help="Largest event size to count for genotyping (%(default)s)") parser.add_argument( "-c", "--mincount", type=truvari.restricted_int, default=0, help= "Minimum number of genotyping events to report HOMPCT (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser(prog="segment", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("vcf", metavar="IN", help="VCF to parse") parser.add_argument("output", metavar="OUT", help="Output VCF") # parser.add_argument("-m", "--min", default=10, type=int, # help="Minimum span of variants to segment") # parser.add_argument("--alter", action="store_true", # help="Add SEG Format field to all variants (false)") args = parser.parse_args(args) truvari.setup_logging() return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser(prog="dpcnt", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, default="/dev/stdin", help="VCF to annotate (stdin)") parser.add_argument("-b", "--bins", type=str, default="0,5,10,15", help="Coverage bins to bisect left the counts (%(default)s)") parser.add_argument("--no-ad", action="store_false", help="Skip adding ADCNT bins") parser.add_argument("-p", "--present", action="store_true", default=False, help="Only count sites with present (non ./.) genotypes") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (stdout)") truvari.setup_logging() return parser.parse_args(args)
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="lcr", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, default="/dev/stdin", help="VCF to annotate (stdin)") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (stdout)") truvari.setup_logging() return parser.parse_args(args)
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser(prog="bpovl", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, default="/dev/stdin", help="VCF to annotate (stdin)") parser.add_argument("-a", "--anno", type=str, required=True, help="Tab-delimited annotation file") parser.add_argument("-o", "--output", type=str, required=True, help="Output joblib DataFrame") parser.add_argument("--sizemin", type=truvari.restricted_int, default=50, help="Minimum size of variant to annotate (%(default)s)") parser.add_argument("--spanmax", type=truvari.restricted_int, default=50000, help="Maximum span of SVs to annotate (%(default)s)") annosg = parser.add_argument_group("Annotation File Arguments") annosg.add_argument("-p", "--preset", choices=PRESET_FMTS.keys(), default=None, help=("Annotation format. This option overwrites " "-s, -b, -e, -c and -1 (%(default)s)")) annosg.add_argument("-c", "--comment", type=str, default="#", help="Skip lines started with character. (%(default)s)") annosg.add_argument("-s", "--sequence", type=truvari.restricted_int, default=0, help="Column of sequence/chromosome name. (%(default)s)") annosg.add_argument("-b", "--begin", type=truvari.restricted_int, default=1, help="Column of start chromosomal position. (%(default)s)") annosg.add_argument("-e", "--end", type=truvari.restricted_int, default=2, help="Column of end chromosomal position. (%(default)s)") # The end column can be the same as the start column. [2] annosg.add_argument("-1", "--one-based", action='store_true', help=("The position in the anno file is 1-based " "rather than 0-based. (%(default)s)")) args = parser.parse_args(args) if args.preset is not None: args.anno_psets = PRESET_FMTS[args.preset] else: args.anno_psets = [args.sequence, args.begin, args.end, args.one_based, args.comment] truvari.setup_logging() return args
def parse_args(args): """ Argument parsing """ parser = argparse.ArgumentParser( prog="grm", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", required=True, help="Input VCF") parser.add_argument("-r", "--reference", required=True, help="BWA indexed reference") parser.add_argument("-R", "--regions", default=None, help="Bed file of regions to parse (None)") parser.add_argument("-o", "--output", default="results.jl", help="Output dataframe (%(default)s)") parser.add_argument("-k", "--kmersize", default=50, type=truvari.restricted_int, help="Size of kmer to map (%(default)s)") parser.add_argument("-m", "--min-size", default=25, type=truvari.restricted_int, help="Minimum size of variants to map (%(default)s)") parser.add_argument("-t", "--threads", default=os.cpu_count(), type=truvari.restricted_int, help="Number of threads (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="numneigh", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, default="/dev/stdin", help="VCF to annotate") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output vcf (stdout)") parser.add_argument("-r", "--refdist", type=truvari.restricted_int, default=1000, help="Max reference location distance (%(default)s)") parser.add_argument( "-s", "--sizemin", type=truvari.restricted_int, default=50, help="Minimum variant size to consider for annotation (%(default)s)") parser.add_argument("--passonly", action="store_true", default=False, help="Only count calls with FILTER == PASS") parser.add_argument("--debug", action="store_true", default=False, help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args
def parseArgs(args): """ Argument parsing """ parser = argparse.ArgumentParser(prog="sim_reads", description=inspect.getdoc(sim_reads_main), formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("workdir", metavar="DIR", type=str, help="SVTeaser working directory") parser.add_argument("--coverage", type=int, default=30, help="Depth of coverage to simulate (%(default)s)") parser.add_argument("--read-len", type=int, default=150, help="Simulated read length (%(default)s)") parser.add_argument("--mean-frag", type=int, default=400, help="Mean insert fragment length (%(default)s)") parser.add_argument("--insert-sd", type=int, default=50, help="Insert fragment length standard deviation (%(default)s)") parser.add_argument("--seq-inst", type=str, default="HS25", help="Sequencing instrument (%(default)s)") args = parser.parse_args(args) setup_logging() return args
def setup_outputs(args): """ Makes all of the output files Places the data into the shared space """ os.mkdir(args.output) truvari.setup_logging( args.debug, truvari.LogFileStderr(os.path.join(args.output, "log.txt"))) logging.info("Params:\n%s", json.dumps(vars(args), indent=4)) logging.info(f"Truvari version: {truvari.__version__}") outputs = {} outputs["vcf_base"] = pysam.VariantFile(args.base) outputs["n_base_header"] = edit_header(outputs["vcf_base"]) outputs["vcf_comp"] = pysam.VariantFile(args.comp) outputs["n_comp_header"] = edit_header(outputs["vcf_comp"]) outputs["tpb_out"] = pysam.VariantFile(os.path.join( args.output, "tp-base.vcf"), 'w', header=outputs["n_base_header"]) outputs["tpc_out"] = pysam.VariantFile(os.path.join( args.output, "tp-call.vcf"), 'w', header=outputs["n_comp_header"]) outputs["fn_out"] = pysam.VariantFile(os.path.join(args.output, "fn.vcf"), 'w', header=outputs["n_base_header"]) outputs["fp_out"] = pysam.VariantFile(os.path.join(args.output, "fp.vcf"), 'w', header=outputs["n_comp_header"]) outputs["stats_box"] = StatsBox() return outputs
def parse_args(args): """ Pull the command line parameters """ parser = argparse.ArgumentParser( prog="trf", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--input", type=str, required=True, help="VCF to annotate") parser.add_argument("-o", "--output", type=str, default="/dev/stdout", help="Output filename (stdout)") parser.add_argument("-e", "--executable", type=str, default="trf409.linux64", help="Path to tandem repeat finder (%(default)s)") parser.add_argument("-T", "--trf-params", type=str, default="3 7 7 80 5 40 500 -h -ngs", help="Default parameters to send to trf (%(default)s)") parser.add_argument("-s", "--simple-repeats", type=str, required=True, help="Simple repeats bed") parser.add_argument("-f", "--reference", type=str, required=True, help="Reference fasta file") parser.add_argument("-m", "--min-length", type=truvari.restricted_int, default=50, help="Minimum size of entry to annotate (%(default)s)") parser.add_argument( "-M", "--max-length", type=truvari.restricted_int, default=10000, help="Maximum size of sequence to run through trf (%(default)s)") parser.add_argument("-t", "--threads", type=truvari.restricted_int, default=multiprocessing.cpu_count(), help="Number of threads to use (%(default)s)") parser.add_argument( "-C", "--chunk-size", type=truvari.restricted_int, default=1, help= "Size (in mbs) of reference chunks for parallelization (%(default)s)") parser.add_argument("--debug", action="store_true", help="Verbose logging") args = parser.parse_args(args) truvari.setup_logging(args.debug) return args