def CalcCharacterCount(self): """Calculates the count of different character types in an MVF file """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_contig_ids() parser.addarg_contig_labels() parser.addarg_sample_indices() parser.addarg_sample_labels() parser.addarg_mincoverage() parser.addarg_windowsize() parser.add_argument( "--base-match", "--basematch", help="String of bases to match (i.e. numerator).") parser.add_argument( "--base-total", "--basetotal", help="String of bases for total (i.e. denominator).") return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) mvfanalysis.calc_character_count(args) return ''
def CalcDstatCombinations(self): """Calculates all D-statistics for all combinations of specified taxa in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_sample_indices(nmin=3) parser.addarg_sample_labels(nmin=3) parser.addarg_outgroup_indices() parser.addarg_outgroup_labels() parser.addarg_contig_ids() parser.addarg_contig_labels() return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) mvfanalysis.calc_dstat_combinations(args) return ''
def CalcPatternCount(self): """Counts biallelic site pattersn (AB-patterns) for specified combinations of taxa in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_sample_indices() parser.addarg_sample_labels() parser.addarg_windowsize() parser.addarg_mincoverage() parser.add_argument("--output-lists", action="store_true") return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) args.qprint = make_qprint(args.quiet, self.time0) mvfanalysis.calc_pattern_count(args) return ''
def CalcAllCharacterCountPerSample(self): """Calculates the count of different character types in an MVF file """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_contig_ids() parser.addarg_contig_labels() parser.addarg_sample_indices() parser.addarg_sample_labels() parser.addarg_mincoverage() parser.addarg_windowsize() return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) args.qprint = make_qprint(args.quiet, self.time0) mvfanalysis.calc_all_character_count_per_sample(args) return ''
def ConvertMVF2Phylip(self): """Converts an MVF file to a Phylip file """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.add_argument("--out", type=os.path.abspath, help="Output Phylip file.", required=True) parser.addarg_regions() parser.add_argument( "--label-type", "--labeltype", choices=('long', 'short'), default='short', help="Long labels with all metadata or short ids") parser.add_argument("--output-data", "--outputdata", choices=("dna", "rna", "prot"), help="Output dna, rna or prot data.") parser.addarg_sample_indices() parser.addarg_sample_labels() parser.add_argument( "--buffer", type=int, default=100000, help="size (bp) of write buffer for each sample") parser.add_argument( "--temp_dir", "--tempdir", default=".", help="directory to write temporary fasta files") parser.add_argument("--partition", action="store_true", help=("Output a CSV partitions file with RAxML" "formatting for use in partitioned " "phylogenetic methods.")) return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) mvf2phy(args) return ''
def ConvertMVF2Fasta(self): """Converts an MVF file to a FASTA file """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.add_argument("--out", type=os.path.abspath, help="Output path of FASTA file.", required=True) parser.addarg_regions() parser.addarg_sample_indices() parser.addarg_sample_labels() parser.add_argument( "--label-type", "--labeltype", choices=('long', 'short'), default='long', help=("Long labels with all metadata or short ids")) parser.add_argument("--output-data", "--outputdata", choices=("dna", "rna", "prot"), help="Output dna, rna or prot data.") parser.add_argument( "--buffer", type=int, default=10, help="size (Mbp) of write buffer for each sample") parser.add_argument( "--temp_dir", "--tempdir", default=".", help="directory to write temporary fasta files") parser.add_argument("--gene-mode", action="store_true") return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) args.qprint = make_qprint(args.quiet, self.time0) mvf2fasta(args) mutex_check(args) return ''
def CalcPairwiseDistances(self): """Calculates pairwise sequence distances for combinations of specified taxa in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_sample_indices(nmin=2) parser.addarg_sample_labels(nmin=2) parser.addarg_windowsize() parser.addarg_mincoverage() parser.add_argument("--data-type", "--datatype", choices=("dna", "prot"), help=("Data type to compare." "(This option is only needed for codon " " MVF files, others will default.)")) parser.add_argument("--ambig", choices=("random2", "random3"), help=("By default, ambiguous nucleotides are " "excluded. This option will include " "sets of ambiguous characters by " "randomly choosing one of the options " "for: RYMKWS ('random2') or " "RYMKWS+BDHV ('random3')")) parser.add_argument( "--emit-counts", action="store_true", help=("output additional file that presents " "the raw counts of pairwise patterns for " "each sample pair tested for each window")) return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) args.qprint = make_qprint(args.quiet, self.time0) mvfanalysis.calc_pairwise_distances(args) return ''
def CalcSampleCoverage(self): """Counts per-contig coverage for specified sample columns in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_contig_ids() parser.addarg_contig_labels() parser.addarg_sample_indices() parser.addarg_sample_labels() return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) mvfanalysis.calc_sample_coverage(args) return ''
def CalcPairwiseDistances(self): """Calculates pairwise sequence distances for combinations of specified taxa in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_sample_indices(nmin=2) parser.addarg_sample_labels(nmin=2) parser.addarg_windowsize() parser.addarg_mincoverage() return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) mvfanalysis.calc_pairwise_distances(args) return ''
def PlotChromoplot(self): """Plot a Chromoplot from an MVF file for all combinations of the specified samples. """ from pylib.mvfchromoplot import plot_chromoplot, Pallette def generate_argparser(): """Generate argparse parser """ pallette = Pallette() parser = MvfArgumentParser() parser.addarg_mvf() parser.add_argument("--out-prefix", "--outprefix", help="Output prefix (not required).") parser.addarg_sample_indices(nmin=3) parser.addarg_sample_labels(nmin=3) parser.addarg_outgroup_indices(nmin=1) parser.addarg_outgroup_labels(nmin=1) parser.addarg_windowsize() parser.add_argument( "--contig-labels", "--contiglabels", nargs=1, help=("Enter the ids of one or more contigs in the " "order they will appear in the chromoplot (as " "comma-separated list)" "(defaults to all ids in order present in MVF)")) parser.add_argument( "--contig-ids", "--contigids", "--contigs", nargs=1, help=("Enter the labels of one or more contigs in the " "order they will appear in the chromoplot (as " "comma-separated list)" "(defaults to all ids in order present in MVF)")) parser.add_argument( "--majority", action="store_true", help=("Plot only 100% shading in the majority track " " rather than shaded proportions in all tracks.")) parser.add_argument( "--info-track", "--infotrack", action="store_true", help=("Include an additional coverage information " "track that will show empty, uninformative, " "and informative loci. (Useful for " "ranscriptomes/RAD or other reduced sampling.")) parser.add_argument("--empty-mask", "--emptymask", choices=pallette.colornames, default="none", help="Mask empty regions with this color.") parser.add_argument( "--yscale", default=20, type=int, help=("Height (in number of pixels) for each track")) parser.add_argument( "--xscale", default=1, type=int, help="Width (in number of pixels) for each window") parser.add_argument("--colors", nargs=3, choices=pallette.colornames, help="three colors to use for chromoplot") parser.add_argument( "--plot-type", "--plottype", choices=["graph", "image"], default="image", help=("PNG image (default) or graph via matplotlib " "(experimental)")) return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) plot_chromoplot(args) return ''
def InferTree(self): """Infer phylogenies for various windows or contigs in an MVF file. """ def generate_argparser(): """Generate argparse parser """ parser = MvfArgumentParser() parser.addarg_mvf() parser.addarg_out() parser.addarg_sample_indices() parser.addarg_sample_labels() parser.addarg_contig_ids() parser.addarg_contig_labels() parser.addarg_windowsize() parser.add_argument("--raxml-outgroups", "--raxmloutgroups", help=("Comma-separated list of outgroup " "taxon labels to use in RAxML.")) parser.add_argument("--root-with", "--rootwith", help=("Comma-separated list of taxon labels " "to root trees with after RAxML")) parser.add_argument("--output-contig-labels", "--outputcontiglabels", action="store_true", help=("Output will use contig labels " "instead of id numbers.")) parser.add_argument("--output-empty", "--outputempty", action="store_true", help=("Include entries of windows " "with no data in output.")) parser.add_argument( "--choose-allele", "--chooseallele", "--hapmode", default="none", dest="choose_allele", choices=[ "none", "randomone", "randomboth", "major", "minor", "majorminor" ], help=("Chooses how heterozygous alleles are " "handled. (none=no splitting (default); " "randomone=pick one allele randomly " "(recommended); randomboth=pick two alleles " "randomly, but keep both; major=pick the " "more common allele; minor=pick the less " "common allele; majorminor= pick the major in " "'a' and minor in 'b'")) parser.add_argument("--min-sites", "--minsites", type=int, default=100, help="minimum number of sites ") parser.add_argument( "--min-seq-coverage", "--minseqcoverage", type=float, default=0.1, help=("proportion of total alignment a sequence" "must cover to be retianed [0.1]")) parser.add_argument("--min-depth", "--mindepth", type=int, default=4, help=("minimum number of alleles per site")) parser.add_argument( "--bootstrap", type=int, help=("turn on rapid bootstrapping for RAxML and " "perform specified number of replicates")) parser.add_argument("--raxml-model", "--raxmlmodel", default="GTRGAMMA", help=("choose RAxML model")) parser.add_argument("--raxml-path", "--raxmlpath", default="raxml", help="RAxML path for manual specification.") parser.add_argument( "--raxml-opts", "--raxmlopts", default="", help=("specify additional RAxML arguments as a " "double-quotes encased string")) parser.add_argument( "--duplicate-seq", "--duplicateseq", default="dontuse", choices=["dontuse", "keep", "remove"], help=("dontuse=remove duplicate sequences prior to " "RAxML tree inference, then add them to the " "tree manually as zero-branch-length sister " "taxa; keep=keep in for RAxML tree inference " "(may cause errors for RAxML); " "remove=remove entirely from alignment")) parser.add_argument("--temp-dir", "--tempdir", default='./raxmltemp', type=os.path.abspath, help=("Temporary directory path")) parser.add_argument("--temp-prefix", "--tempprefix", default="mvftree", help=("Temporary file prefix")) return parser parser = generate_argparser() if self.selfdoc is True: return parser args = parser.parse_args(self.arguments[1:]) mutex_check(args) infer_window_tree(args) return ''