#!/usr/bin/env python __author__ = 'Chris Mitchell' import sys from pythomics.templates import CustomParser import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the variants in a given VCF file into a specified fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to incorporate changes into.") parser.add_out(help="The file to write resulting fasta file to.") parser.add_vcf() def main(): args = parser.parse_args() file_name = args.file vcf = args.vcf snps = not args.no_snps dels = args.dels ins = args.ins homs = not args.no_homozygous hets = args.heterozygous individual = args.individual-1 fasta_file = fasta.FastaIterator(file_name) vcf_file = gp.VCFIterator( vcf )
from pythomics.templates import CustomParser import sys import argparse import operator import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the a given GFF file into a specified fasta file. It can also incorporate variants given in a VCF file while generating this fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to reference.") parser.add_out(help="The file to write resulting fasta file to.") gff_group = parser.add_argument_group('GFF file related options') gff_group.add_argument('--gff', help="The GFF file to use.", type=argparse.FileType('r'), required=True) gff_group.add_argument('--group-on', help="The key to group entries together by (such as transcript_id)", type=str, default='ID') gff_group.add_argument('--feature', help="The feature to use for fetching coordinates (such as CDS, does not apply with cufflinks flag).", type=str, default='') gff_group.add_argument('--cufflinks', help="If the gff file is in the standard cufflinks output", action='store_true', default=False) vcf_group = parser.add_argument_group('VCF file related options') vcf_group.add_vcf() vcf_group.add_argument('--variants-only', help="Only output transcripts with variants.", action='store_true', default=False) splice_group = parser.add_argument_group('Splice Junction Options (if a variant falls over a exon-exon junction. Default is to ignore.)') splice_group.add_argument('--splice-partial', help="Partially splice variants (only include exonic portions of variant)", action='store_true', default=False) def main(): args = parser.parse_args()
#!/usr/bin/env python description = """ This script will trim N's from the ends of a fasta/fastq file so it can be aligned by tophat (which pukes if there are >5 N's. We remove them from the read ends only) """ import sys, re, os, gzip from itertools import izip from multiprocessing import Pool from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_read_pair() parser.add_out() parser.add_argument( '--min-len', help="The minimum read length reads must be after trimming.", type=int, default=25) parser.add_argument('--prefix', help="If using paired reads, this is the filename prefix.", type=str) parser.add_argument('--quality', help='If provided, remove qualities below a given score.', type=int, default=0) parser.add_argument('--chunk', help='How many reads to submit to each core.', type=int,
import argparse, sys, csv, copy, decimal, itertools, os, operator try: import re2 as re except ImportError: import re from multiprocessing import Pool, Value from collections import Counter from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out(help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide') parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins') parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true') parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true') ibaq_group = parser.add_argument_group('iBAQ related options') ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true') ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str) parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False) ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true') parser.add_enzyme(help="The enzyme used to digest the sample.")
#!/usr/bin/env python description = """ This script will digest a given fasta file with the specified enzymes. Both protein and nucleotide fasta files are valid inputs, and when digesting fasta files, it is possible to create 6 frame as well as 3 frame translations. """ import argparse, sys, itertools from pythomics.templates import CustomParser import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_fasta() parser.add_argument('-t', '--type', help="The type of fasta file (default protein).", choices=['prot','nt'], type=str, default='prot') parser.add_argument('--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1,3,6], type=int) parser.add_argument('--genome', help="Are we translating a genome? This will keep chromosome positions in the header.", action='store_true', default=False) parser.add_out() parser.add_enzyme() parser.add_argument('--unique', help="Only return unique peptides per cleavage", action='store_true', default=False) def main(): args = parser.parse_args() file_name = args.fasta enzyme_choice = args.enzyme digest_type = args.type digest_frame = args.frame digest_negative = False if digest_frame == 6:
import argparse, sys, csv, copy, decimal, itertools, os, operator try: import re2 as re except ImportError: import re from multiprocessing import Pool, Value from collections import Counter from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out( help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument( '--strict', help=
#!/usr/bin/env python __author__ = 'Chris Mitchell' import sys from pythomics.templates import CustomParser import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the variants in a given VCF file into a specified fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to incorporate changes into.") parser.add_out(help="The file to write resulting fasta file to.") parser.add_vcf() def main(): args = parser.parse_args() file_name = args.fasta vcf = args.vcf snps = args.no_snps dels = args.dels ins = args.ins homs = args.no_homozygous hets = args.heterozygous individual = args.individual-1 fasta_file = fasta.FastaIterator(file_name) vcf_file = gp.VCFIterator( vcf )