#!/usr/bin/env python description = """ This script will digest a given protein fasta file with the specified enzymes and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_out() parser.add_enzyme( help= "Enzyme to use. Pass a list like \"trypsin lysc\" to use multiple enzymes. " "The order of enzymes will be the order of digestion if digesting in series." ) parser.add_argument( '--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true') def main(): args = parser.parse_args() digest_min = args.min
#!/usr/bin/env python from __future__ import division, absolute_import __author__ = 'chris' description = """ This will search NCBI for domains in a protein fasta file. """ import sys import time import requests from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_out() parser.add_argument('--db', help='The database to search', default='cdd', choices=['cdd', 'pfam', 'smart', 'tigrfam', 'cog', 'kog']) def main(): args = parser.parse_args() files = {'queries': args.fasta} nbci_url = 'http://www.ncbi.nlm.nih.gov/Structure/bwrpsb/bwrpsb.cgi?' response = requests.post( '{nbci}tdata=hits&dmode=std&db={database}&compbasedadj=0&filter=true&evalue=0.0001&cddefl=true' .format(nbci=nbci_url, database=args.db),
""" import argparse, sys, csv, copy, decimal, itertools, os, operator try: import re2 as re except ImportError: import re from multiprocessing import Pool, Value from collections import Counter from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out(help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide') parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins') parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true') parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true') ibaq_group = parser.add_argument_group('iBAQ related options') ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true') ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str) parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False) ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true')
#!/usr/bin/env python __author__ = 'Chris Mitchell' import sys from pythomics.templates import CustomParser import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the variants in a given VCF file into a specified fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to incorporate changes into.") parser.add_out(help="The file to write resulting fasta file to.") parser.add_vcf() def main(): args = parser.parse_args() file_name = args.file vcf = args.vcf snps = not args.no_snps dels = args.dels ins = args.ins homs = not args.no_homozygous hets = args.heterozygous individual = args.individual-1 fasta_file = fasta.FastaIterator(file_name) vcf_file = gp.VCFIterator( vcf )
creating summary statistics for them. For instance, gene ids can be selected and their FPKM/iBAQ values combined. Also, features can be can be grouped into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_delimited_file(cols=['--group-on']) parser.add_out() parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true') parser.add_column_function('--summary-col', col_help="The function to apply to grouped entries in modification columns.") parser.add_argument('--summary-col-delimiter', help="If the summary column has a delimiter, such as a ; for multiple proteins.") parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_argument('--merge', help='Merge together identical entries.', action='store_true') # parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true') # parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';') parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true') def main(): args = parser.parse_args() peptide_colname = False try:
#!/usr/bin/env python description = """ This script will digest a given fasta file with the specified enzymes. Both protein and nucleotide fasta files are valid inputs, and when digesting fasta files, it is possible to create 6 frame as well as 3 frame translations. """ import argparse, sys, itertools from pythomics.templates import CustomParser import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_fasta() parser.add_argument('-t', '--type', help="The type of fasta file (default protein).", choices=['prot','nt'], type=str, default='prot') parser.add_argument('--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1,3,6], type=int) parser.add_argument('--genome', help="Are we translating a genome? This will keep chromosome positions in the header.", action='store_true', default=False) parser.add_out() parser.add_enzyme() parser.add_argument('--unique', help="Only return unique peptides per cleavage", action='store_true', default=False) def main(): args = parser.parse_args() file_name = args.fasta enzyme_choice = args.enzyme digest_type = args.type digest_frame = args.frame digest_negative = False if digest_frame == 6:
#!/usr/bin/env python __author__ = 'Chris Mitchell' import sys from pythomics.templates import CustomParser import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the variants in a given VCF file into a specified fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to incorporate changes into.") parser.add_out(help="The file to write resulting fasta file to.") parser.add_vcf() def main(): args = parser.parse_args() file_name = args.fasta vcf = args.vcf snps = args.no_snps dels = args.dels ins = args.ins homs = args.no_homozygous hets = args.heterozygous individual = args.individual-1 fasta_file = fasta.FastaIterator(file_name) vcf_file = gp.VCFIterator( vcf )
#!/usr/bin/env python from __future__ import division, absolute_import __author__ = "chris" description = """ This will search NCBI for domains in a protein fasta file. """ import sys import time import requests from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_out() parser.add_argument( "--db", help="The database to search", default="cdd", choices=["cdd", "pfam", "smart", "tigrfam", "cog", "kog"] ) def main(): args = parser.parse_args() files = {"queries": args.fasta} nbci_url = "http://www.ncbi.nlm.nih.gov/Structure/bwrpsb/bwrpsb.cgi?" response = requests.post( "{nbci}tdata=hits&dmode=std&db={database}&compbasedadj=0&filter=true&evalue=0.0001&cddefl=true".format( nbci=nbci_url, database=args.db ),
__author__ = 'chris' description = """ This script will annotate a tab delimited text file with peptides with corresponding proteins present in an annotation file, and can also use this annotation to include iBAQ measures. """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_argument('--peptide_out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=sys.stdout) parser.add_argument('--protein_out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=sys.stdout) parser.add_delimited_file() parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_false', default=False) group = parser.add_argument_group('iBAQ related options') group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true', default=False) group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=int, default=None) parser.add_enzyme() group.add_argument('--no-normalize', help="Don't normalize iBAQ to total intensity", action='store_false', default=True) group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true', default=False) protein_group = parser.add_argument_group('Protein Grouping Options') protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true', default=False) protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true', default=False)
#!/usr/bin/env python description = """ This script will accept a given nucleotide fasta file and output found ORFs. ORFs are annotated by which stop codon they are a part of. As in, ORF 3 is annotated as the 3rd sequence if the translated sequence is divided by stop codons. This is prevent ambiguity with differing minimum lengths of ORFs. """ from pythomics.templates import CustomParser import sys, argparse import pythomics.parsers.fasta as fasta parser = CustomParser(description = description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_fasta() parser.add_out() parser.add_argument('--min', help="Minimum ORF length in amino acids.", type=int, default=50) parser.add_argument('--both-strands', help="Search both strands for ORFs.", action='store_true', default=False) def main(): args = parser.parse_args() file_name = args.fasta orf_min = args.min fasta_file = fasta.FastaIterator(file_name) negative_strand = args.both_strands with args.out as o: for header, sequence in fasta_file: for i in xrange(3): strand='+' translation = fasta._translate(sequence[i:])
#!/usr/bin/env python __author__ = 'chris' description = """ This script will lookup features from one delimited file in another delimited file, and perform various operations on the found entries in the alternative file """ import sys, csv from pythomics.templates import CustomParser from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_delimited_file(files=['-a'], delimiter=['--adelim'], cols=['--acol'], header=['--aheader'], help="This is the file to lookup values from.") parser.add_delimited_file(files=['-b'], delimiter=['--bdelim'], cols=['--bcol'], header=['--bheader'], help="This is the file to lookup values in.") parser.add_argument('--blookup', help='The column to take entries from in file b.', type=str, default=1) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_out() parser.add_argument('--function', help='The function to apply to found entries.', choices=['concat', 'mean', 'sum', 'median', 'var', 'std', 'count'], type=str, default='concat') parser.add_argument('--colname', help='The column name to give the new appended value. Defaults to function chosen', type=str, default='') parser.add_argument('--aregex', help='An optional regex pattern for matching columns in file a.', type=str, default='') parser.add_argument('--bregex', help='An optional regex pattern for matching columns in file b.', type=str, default='') def main(): args = parser.parse_args() a_colname, b_colname, bl_colname = False, False, False try: a_column = int(args.acol) a_column = a_column-1 if a_column > 0 else a_column except ValueError:
#!/usr/bin/env python description = """ This script will accept a given nucleotide fasta file and output found ORFs. ORFs are annotated by which stop codon they are a part of. As in, ORF 3 is annotated as the 3rd sequence if the translated sequence is divided by stop codons. This is prevent ambiguity with differing minimum lengths of ORFs. """ from pythomics.templates import CustomParser import sys, argparse import pythomics.parsers.fasta as fasta parser = CustomParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_fasta() parser.add_out() parser.add_argument('--min', help="Minimum ORF length in amino acids.", type=int, default=50) parser.add_argument('--both-strands', help="Search both strands for ORFs.", action='store_true') parser.add_argument( '--no-met-start', help="Output ORFs starting with amino acids other than MET", action='store_true') parser.add_argument('--from-met', help="Truncate leading amino acids up to MET",
#!/usr/bin/env python description = """ This script will digest a given fasta file with the specified enzymes. Both protein and nucleotide fasta files are valid inputs, and when digesting fasta files, it is possible to create 6 frame as well as 3 frame translations. """ import argparse, sys, itertools from pythomics.templates import CustomParser import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description=description) parser.add_fasta() parser.add_argument('-t', '--type', help="The type of fasta file (default protein).", choices=['prot', 'nt'], type=str, default='prot') parser.add_argument( '--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1, 3, 6], type=int) parser.add_argument( '--genome', help= "Are we translating a genome? This will keep chromosome positions in the header.",
#!/usr/bin/env python description = """ This script will accept a given nucleotide fasta file and output found ORFs. ORFs are annotated by which stop codon they are a part of. As in, ORF 3 is annotated as the 3rd sequence if the translated sequence is divided by stop codons. This is prevent ambiguity with differing minimum lengths of ORFs. """ from pythomics.templates import CustomParser import sys, argparse import pythomics.parsers.fasta as fasta parser = CustomParser(description = description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_fasta() parser.add_out() parser.add_argument('--min', help="Minimum ORF length in amino acids.", type=int, default=50) parser.add_argument('--both-strands', help="Search both strands for ORFs.", action='store_true') parser.add_argument('--no-met-start', help="Output ORFs starting with amino acids other than MET", action='store_true') parser.add_argument('--from-met', help="Truncate leading amino acids up to MET", action='store_true') parser.add_argument('--from-met-keep', help="Truncate leading amino acids up to MET, but keep the untruncated version as well.", action='store_true') def main(): args = parser.parse_args() file_name = args.fasta orf_min = args.min fasta_file = fasta.FastaIterator(file_name) negative_strand = args.both_strands no_met = args.no_met_start
#!/usr/bin/env python description = """ This script will digest a given protein fasta file with the specified enzymes and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_fasta() parser.add_out() parser.add_enzyme(help="Enzyme to use. Pass a command separated list (no spaces); " "the order of enzymes will be the order of digestion if digesting in series.") parser.add_argument('--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true', default=False) parser.add_argument('--series', help="Should cleavages be done in series? (default)", action='store_true', default=True) def main(): args = parser.parse_args() digest_min = args.min digest_max = args.max enzymes = args.enzyme.split(',') peptides_found = {} retained = {} total = 0
from __future__ import division, absolute_import __author__ = 'chris' description = """ """ import sys import os import operator import argparse from pythomics.templates import CustomParser parser = CustomParser(description=description) group = parser.add_argument_group('Protein Inference File') group.add_argument( '--inference', help= "The protein inference file (your peptide file with gene/protein annotations). For multiple files, separate by spaces (must be in same order as mods).", nargs='+', type=argparse.FileType('r'), required=True) group.add_argument('--gene', help="The Gene column name", type=str, default='Gene') group.add_argument('--protein', help="The Protein column name", type=str,
#!/usr/bin/env python description = """ This produces a bam file corresponding to junctional regions in a given gtf file """ import sys import pysam from pythomics.genomics.parsers import GFFReader from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_bam() parser.add_bam_out() parser.add_gff() def main(): args = parser.parse_args() samfile = pysam.Samfile(args.bam, 'rb') junctionreads = pysam.Samfile(args.out_bam, 'wb', template=samfile) id_tag = args.group_on chosen_feature = args.feature if args.cufflinks: gff = GFFReader(args.gff, preset='cufflinks') else: gff = GFFReader(args.gff, tag_map={'ID': id_tag, 'Parent': 'Parent'}) written = set([]) for feature_name, feature in gff.get_features(): try: children = feature.children
#!/usr/bin/env python description = """ This produces a bam file corresponding to junctional regions in a given gtf file """ import sys import pysam from pythomics.genomics.parsers import GFFReader from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_bam() parser.add_bam_out() parser.add_gff() def main(): args = parser.parse_args() samfile = pysam.Samfile(args.bam, 'rb') junctionreads = pysam.Samfile(args.out_bam, 'wb', template=samfile) id_tag = args.group_on chosen_feature = args.feature if args.cufflinks: gff = GFFReader(args.gff, preset='cufflinks') else: gff = GFFReader(args.gff, tag_map={'ID': id_tag, 'Parent': 'Parent'}) written = set([]) for feature_name, feature in gff.get_features(): try: children = feature.children except AttributeError:
#!/usr/bin/env python description = """ This script will digest a given protein fasta file with the specified enzymes and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_fasta() parser.add_out() parser.add_enzyme(help="Enzyme to use. Pass a list like \"trypsin lysc\" to use multiple enzymes. " "The order of enzymes will be the order of digestion if digesting in series.") parser.add_argument('--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true') def main(): args = parser.parse_args() digest_min = args.min digest_max = args.max enzymes = args.enzyme peptides_found = {} retained = {} total = 0 proteinMap = {}
#!/usr/bin/env python __author__ = 'chris' description = """ This script will lookup features from one delimited file in another delimited file, and perform various operations on the found entries in the alternative file """ import sys, csv from pythomics.templates import CustomParser from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_delimited_file(files=['-a'], delimiter=['--adelim'], cols=['--acol'], header=['--aheader'], help="This is the file to lookup values from.") parser.add_delimited_file(files=['-b'], delimiter=['--bdelim'], cols=['--bcol'], header=['--bheader'], help="This is the file to lookup values in.") parser.add_argument('--blookup', help='The column to take entries from in file b.', type=str, default=1) parser.add_argument( '--strict', help=
__author__ = 'chris' description = """ This script will take a delimited file and collapse features together, such as scan numbers. It can also be used to group peptides into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_delimited_file() parser.add_out() parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true', default=False) parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true', default=False) parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';') parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true', default=False) def main(): args = parser.parse_args() peptide_column = args.col-1 tsv_file = args.tsv header_lines = args.header delimiter = args.delimiter peptide_join = args.substring col_delimiter = args.merge_delimiter
creating summary statistics for them. For instance, gene ids can be selected and their FPKM/iBAQ values combined. Also, features can be can be grouped into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_delimited_file(cols=['--group-on']) parser.add_out() parser.add_argument( '--substring', help= 'If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true') parser.add_column_function( '--summary-col', col_help="The function to apply to grouped entries in modification columns." ) parser.add_argument( '--summary-col-delimiter', help= "If the summary column has a delimiter, such as a ; for multiple proteins."
""" import argparse, sys, csv, copy, decimal, itertools, os, operator try: import re2 as re except ImportError: import re from multiprocessing import Pool, Value from collections import Counter from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out( help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument( '--strict',
#!/usr/bin/env python description = """ This script will trim N's from the ends of a fasta/fastq file so it can be aligned by tophat (which pukes if there are >5 N's. We remove them from the read ends only) """ import sys, re, os, gzip from itertools import izip from multiprocessing import Pool from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_read_pair() parser.add_out() parser.add_argument( '--min-len', help="The minimum read length reads must be after trimming.", type=int, default=25) parser.add_argument('--prefix', help="If using paired reads, this is the filename prefix.", type=str) parser.add_argument('--quality', help='If provided, remove qualities below a given score.', type=int, default=0) parser.add_argument('--chunk', help='How many reads to submit to each core.', type=int,
from __future__ import division, absolute_import __author__ = 'chris' description = """ """ import sys import os import operator import argparse from pythomics.templates import CustomParser parser = CustomParser(description=description) group = parser.add_argument_group('Protein Inference File') group.add_argument('--inference', help="The protein inference file (your peptide file with gene/protein annotations). For multiple files, separate by spaces (must be in same order as mods).", nargs='+', type=argparse.FileType('r'), required=True) group.add_argument('--gene', help="The Gene column name", type=str, default='Gene') group.add_argument('--protein', help="The Protein column name", type=str, default='Protein') group.add_argument('--peptide', help="The Peptide column name", type=str, default='Peptide') group.add_argument('--quant', help="The name of quantification columns (such as Heavy/Light). Separate multiple columns by spaces", nargs='+', default=['Heavy/Light']) mods = parser.add_argument_group('Modification File') mods.add_argument('--mods', help="The modifications file (the file with sites, peptides). For multiple files, separate by spaces (must be in same order as inference).", nargs='+', type=argparse.FileType('r'), required=True) mods.add_argument('--site-protein', help="The mod file protein column name", type=str, default='Protein') parser.add_argument('--no-log2', help='Do not log2 normalize quantification values.', action='store_true') parser.add_argument('--no-median', help='Do not normalize quantification values by the median of the experiment.', action='store_true') parser.add_argument('--wp', help="The whole proteome inference file, if it exists. For multiple replicates, separate by spaces.", nargs='+', type=argparse.FileType('r')) parser.add_argument('--non-mod-norm', help='Normalize the data by the non-modified peptides.', action='store_true')
__author__ = 'Chris Mitchell' from pythomics.templates import CustomParser import sys import argparse import operator import pythomics.parsers.fasta as fasta import pythomics.genomics.parsers as gp description = """ This script will incorporate the a given GFF file into a specified fasta file. It can also incorporate variants given in a VCF file while generating this fasta file. """ parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to reference.") parser.add_out(help="The file to write resulting fasta file to.") gff_group = parser.add_argument_group('GFF file related options') gff_group.add_argument('--gff', help="The GFF file to use.", type=argparse.FileType('r'), required=True) gff_group.add_argument('--group-on', help="The key to group entries together by (such as transcript_id)", type=str, default='ID') gff_group.add_argument('--feature', help="The feature to use for fetching coordinates (such as CDS, does not apply with cufflinks flag).", type=str, default='') gff_group.add_argument('--cufflinks', help="If the gff file is in the standard cufflinks output", action='store_true', default=False) vcf_group = parser.add_argument_group('VCF file related options') vcf_group.add_vcf() vcf_group.add_argument('--variants-only', help="Only output transcripts with variants.", action='store_true', default=False) splice_group = parser.add_argument_group('Splice Junction Options (if a variant falls over a exon-exon junction. Default is to ignore.)') splice_group.add_argument('--splice-partial', help="Partially splice variants (only include exonic portions of variant)", action='store_true', default=False) def main():
#!/usr/bin/env python description = """ This script will trim N's from the ends of a fasta/fastq file so it can be aligned by tophat (which pukes if there are >5 N's. We remove them from the read ends only) """ import sys, re, os, gzip from itertools import izip from multiprocessing import Pool from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_fasta() parser.add_read_pair() parser.add_out() parser.add_argument('--min-len', help="The minimum read length reads must be after trimming.", type=int, default=25) parser.add_argument('--prefix', help="If using paired reads, this is the filename prefix.", type=str) parser.add_argument('--quality', help='If provided, remove qualities below a given score.', type=int, default=0) parser.add_argument('--chunk', help='How many reads to submit to each core.', type=int, default=1000) parser.add_argument('--no-gzip', help='To disable compression with gzip.', action='store_false') # parser.add_argument('--5partial-match', help='This will trim partial matches at the 3\' end of the sequence if there is a match of at least x nucleotides.', type=int, default=0) # parser.add_argument('--seed-length', help='The seed length for a match.', type=int, default=0) # parser.add_argument('--mismatches', help='The number of possible mismatches in a sequence.', type=int, default=3) start_trim = re.compile(r'^N+') end_trim = re.compile(r'N+$') global quality_min global quality_offset global paired global read_min