digesting fasta files, it is possible to create 6 frame as well as 3 frame translations. """ import argparse, sys, itertools from pythomics.templates import CustomParser import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_fasta() parser.add_argument('-t', '--type', help="The type of fasta file (default protein).", choices=['prot','nt'], type=str, default='prot') parser.add_argument('--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1,3,6], type=int) parser.add_argument('--genome', help="Are we translating a genome? This will keep chromosome positions in the header.", action='store_true', default=False) parser.add_out() parser.add_enzyme() parser.add_argument('--unique', help="Only return unique peptides per cleavage", action='store_true', default=False) def main(): args = parser.parse_args() file_name = args.fasta enzyme_choice = args.enzyme digest_type = args.type digest_frame = args.frame digest_negative = False if digest_frame == 6: digest_negative = True digest_frame = 3 digest_min = args.min digest_max = args.max genome = args.genome
parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out(help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide') parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins') parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true') parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true') ibaq_group = parser.add_argument_group('iBAQ related options') ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true') ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str) parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False) ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true') parser.add_enzyme(help="The enzyme used to digest the sample.") ibaq_group.add_argument('--normalize', help="Normalize iBAQ to total intensity of column (useful for comparing multiple samples).", action='store_true') protein_group = parser.add_argument_group('Protein Grouping Options') protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true') protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true') protein_group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true') mod_group = parser.add_argument_group('Peptide Modification Options') mod_group.add_argument('--mod-out', nargs='?', help="The file to write a modification-centric summary to.", type=argparse.FileType('w'), default=None) mod_group.add_argument('--modification-site', help="Write the position in the parent protein of the modification (requires case-sensitive and modifications being lower-cased).", action='store_true') parser.add_column_function('--mod-col', help="The column containing modification information.", group=mod_group) motif_group = mod_group.add_argument_group('Motif Options') motif_group.add_argument('--motifs', help="Enable creation of motifs for each modification.", action='store_true') motif_group.add_argument('--motif-window', help="The width of the motif window (how many residues to go out from each modification).", type=int, default=10) motif_group.add_argument('--motif-unique', help="Only output motifs where the peptide mapping is unambiguous.", action='store_true') motif_group.add_argument('--motif-out', help="Where to save the file with motifs. Default: --out file with _motif suffix.", type=str)
help= "The column with precursor area (defaults to header lines containing 'Precursor').", type=str) parser.add_column_function( '', col_argument='--ibaq-function', group=ibaq_group, col_help= "The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False) ibaq_group.add_argument( '--non-redundant', help= "Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true') parser.add_enzyme(help="The enzyme used to digest the sample.") ibaq_group.add_argument( '--normalize', help= "Normalize iBAQ to total intensity of column (useful for comparing multiple samples).", action='store_true') protein_group = parser.add_argument_group('Protein Grouping Options') protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true') protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true') protein_group.add_argument( '--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)",
description = """ This script will digest a given protein fasta file with the specified enzymes and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_fasta() parser.add_out() parser.add_enzyme(help="Enzyme to use. Pass a list like \"trypsin lysc\" to use multiple enzymes. " "The order of enzymes will be the order of digestion if digesting in series.") parser.add_argument('--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true') def main(): args = parser.parse_args() digest_min = args.min digest_max = args.max enzymes = args.enzyme peptides_found = {} retained = {} total = 0 proteinMap = {} coverageMap = {} aas = config.RESIDUE_MASSES.keys() aas.sort()
description = """ This script will digest a given protein fasta file with the specified enzymes and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description = description) parser.add_fasta() parser.add_out() parser.add_enzyme(help="Enzyme to use. Pass a command separated list (no spaces); " "the order of enzymes will be the order of digestion if digesting in series.") parser.add_argument('--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true', default=False) parser.add_argument('--series', help="Should cleavages be done in series? (default)", action='store_true', default=True) def main(): args = parser.parse_args() digest_min = args.min digest_max = args.max enzymes = args.enzyme.split(',') peptides_found = {} retained = {} total = 0 proteinMap = {} coverageMap = {} aas = config.RESIDUE_MASSES.keys()
help="The type of fasta file (default protein).", choices=['prot', 'nt'], type=str, default='prot') parser.add_argument( '--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1, 3, 6], type=int) parser.add_argument( '--genome', help= "Are we translating a genome? This will keep chromosome positions in the header.", action='store_true') parser.add_out() parser.add_enzyme() parser.add_argument('--unique', help="Only return unique peptides per cleavage", action='store_true') def main(): args = parser.parse_args() file_name = args.fasta enzyme_choice = args.enzyme enzyme_pattern = args.enzyme_pattern digest_type = args.type digest_frame = args.frame digest_negative = False if digest_frame == 6: digest_negative = True
and summarize how much of the proteome is covered, what residues are missed, and what isoforms can be uniquely identified. """ import sys, copy, re import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta import pythomics.proteomics.config as config from pythomics.templates import CustomParser parser = CustomParser(description=description) parser.add_fasta() parser.add_out() parser.add_enzyme( help= "Enzyme to use. Pass a list like \"trypsin lysc\" to use multiple enzymes. " "The order of enzymes will be the order of digestion if digesting in series." ) parser.add_argument( '--parallel', help="Should cleavages be done in parallel (default is serial digestion)?", action='store_true') def main(): args = parser.parse_args() digest_min = args.min digest_max = args.max enzymes = args.enzyme peptides_found = {} retained = {}