For instance, gene ids can be selected and their FPKM/iBAQ values combined. Also, features can be can be grouped into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_delimited_file(cols=['--group-on']) parser.add_out() parser.add_argument( '--substring', help= 'If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true') parser.add_column_function( '--summary-col', col_help="The function to apply to grouped entries in modification columns." ) parser.add_argument( '--summary-col-delimiter', help= "If the summary column has a delimiter, such as a ; for multiple proteins." )
import re from multiprocessing import Pool, Value from collections import Counter from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_fasta(help="The fasta file to match peptides against.") parser.add_out(help="The name of the file you wish to create with results appended.") parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide') parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins') parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true') parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true') ibaq_group = parser.add_argument_group('iBAQ related options') ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true') ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str) parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False) ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true') parser.add_enzyme(help="The enzyme used to digest the sample.") ibaq_group.add_argument('--normalize', help="Normalize iBAQ to total intensity of column (useful for comparing multiple samples).", action='store_true') protein_group = parser.add_argument_group('Protein Grouping Options') protein_group.add_argument('--unique-only', help="Only group proteins with unique peptides", action='store_true') protein_group.add_argument('--position', help="Write the position of the peptide matches.", action='store_true') protein_group.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true')
__author__ = 'chris' description = """ This script will lookup features from one delimited file in another delimited file, and perform various operations on the found entries in the alternative file """ import sys, csv from pythomics.templates import CustomParser from pythomics.utils import ColumnFunctions parser = CustomParser(description=description) parser.add_delimited_file(files=['-a'], delimiter=['--adelim'], cols=['--acol'], header=['--aheader'], help="This is the file to lookup values from.") parser.add_delimited_file(files=['-b'], delimiter=['--bdelim'], cols=['--bcol'], header=['--bheader'], help="This is the file to lookup values in.") parser.add_argument('--blookup', help='The column to take entries from in file b.', type=str, default=1) parser.add_argument( '--strict', help= 'For numeric operations, fail if types are incorrect (converting NA to a float for instance).',
For instance, gene ids can be selected and their FPKM/iBAQ values combined. Also, features can be can be grouped into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_delimited_file(cols=['--group-on']) parser.add_out() parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true') parser.add_column_function('--summary-col', col_help="The function to apply to grouped entries in modification columns.") parser.add_argument('--summary-col-delimiter', help="If the summary column has a delimiter, such as a ; for multiple proteins.") parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_argument('--merge', help='Merge together identical entries.', action='store_true') # parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true') # parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';') parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true') def main(): args = parser.parse_args() peptide_colname = False try: peptide_column = int(args.group_on)
parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull) parser.add_argument( '--strict', help= 'For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide') parser.add_argument( '-r', '--regex', help= "A perl regular expression determining which parts of the header to capture.", type=str) parser.add_argument( '--inferred-name', help= "The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins') parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true')
description = """ This script will take a delimited file and collapse features together, such as scan numbers. It can also be used to group peptides into longer sequences with the --substring flag (ex: peptides LNGERPEPTIDE and ERPEPT will be merged into LNGERPEPTIDE). """ import argparse, sys, re, csv, copy, decimal from pythomics.templates import CustomParser import pythomics.proteomics.config as config import pythomics.proteomics.digest as digest import pythomics.parsers.fasta as fasta parser = CustomParser(description = description) parser.add_delimited_file() parser.add_out() parser.add_argument('--substring', help='If set, merge features by partial matches (such as collapsing peptides into larger peptides)', action='store_true', default=False) parser.add_argument('--merge-columns', help="If set, columns of merged peptides will be combined.", action='store_true', default=False) parser.add_argument('--merge-delimiter', help='The delimiter for column merges.', type=str, default=';') parser.add_argument('--case-sensitive', help="Treat peptides as case-sensitive (ie separate modified peptides)", action='store_true', default=False) def main(): args = parser.parse_args() peptide_column = args.col-1 tsv_file = args.tsv header_lines = args.header delimiter = args.delimiter peptide_join = args.substring col_delimiter = args.merge_delimiter merge_columns = args.merge_columns
#!/usr/bin/env python __author__ = 'chris' description = """ This script will lookup features from one delimited file in another delimited file, and perform various operations on the found entries in the alternative file """ import sys, csv from pythomics.templates import CustomParser from pythomics.utils import ColumnFunctions parser = CustomParser(description = description) parser.add_delimited_file(files=['-a'], delimiter=['--adelim'], cols=['--acol'], header=['--aheader'], help="This is the file to lookup values from.") parser.add_delimited_file(files=['-b'], delimiter=['--bdelim'], cols=['--bcol'], header=['--bheader'], help="This is the file to lookup values in.") parser.add_argument('--blookup', help='The column to take entries from in file b.', type=str, default=1) parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true') parser.add_out() parser.add_argument('--function', help='The function to apply to found entries.', choices=['concat', 'mean', 'sum', 'median', 'var', 'std', 'count'], type=str, default='concat') parser.add_argument('--colname', help='The column name to give the new appended value. Defaults to function chosen', type=str, default='') parser.add_argument('--aregex', help='An optional regex pattern for matching columns in file a.', type=str, default='') parser.add_argument('--bregex', help='An optional regex pattern for matching columns in file b.', type=str, default='') def main(): args = parser.parse_args() a_colname, b_colname, bl_colname = False, False, False try: a_column = int(args.acol) a_column = a_column-1 if a_column > 0 else a_column except ValueError: