Python parseの例、pyteomics.parser.parse Pythonの例

コード例 #1

0

ファイルを表示

 def test_tostring(self):
     for seq in self.simple_sequences:
         self.assertEqual(
             seq, parser.tostring(parser.parse(seq, labels=uppercase)))
         self.assertEqual(
             seq,
             parser.tostring(
                 parser.parse(seq, True, True, labels=uppercase), False))

コード例 #2

0

ファイルを表示

ファイル: test_parser.py プロジェクト: sailfish009/pyteomics

 def test_parse(self):
     self.assertEqual(
         [('P',), ('E',), ('P',), ('T',), ('I',), ('D',), ('E',)],
         parser.parse('PEPTIDE', split=True))
     self.assertEqual(['P', 'E', 'P', 'T', 'I', 'D', 'E'],
         parser.parse('H-PEPTIDE'))
     for seq in ['PEPTIDE', 'H-PEPTIDE', 'PEPTIDE-OH', 'H-PEPTIDE-OH']:
         self.assertEqual(['H-', 'P', 'E', 'P', 'T', 'I', 'D', 'E', '-OH'],
             parser.parse(seq, show_unmodified_termini=True))
     self.assertEqual(['T', 'E', 'pS', 'T', 'oxM'],
             parser.parse('TEpSToxM', labels=parser.std_labels + ['pS', 'oxM']))
     self.assertEqual(
         [('H-', 'z', 'P'), ('E',), ('P',), ('z', 'T'), ('I',), ('D',), ('z', 'E', '-OH')],
         parser.parse('zPEPzTIDzE', True, True, labels=parser.std_labels + ['z']))

コード例 #3

0

ファイルを表示

ファイル: psmTheoretical.py プロジェクト: markmipt/DeMix

def calc_precursor_theoretical(seq, z):
    try:
        parseq = parser.parse(seqModX(seq), labels=modLabels, show_unmodified_termini=True)
        theomass = mass.calculate_mass(parsed_sequence=parseq, aa_comp=composition)
        theomz = mass.calculate_mass(parsed_sequence=parseq, aa_comp=composition, charge=z)
        return (parseq, theomass, theomz)
    except :
        return (None, None, None)

コード例 #4

0

ファイルを表示

ファイル: test_mass.py プロジェクト: sailfish009/pyteomics

 def test_isotopologues(self):
     peptide = 'XYF'
     states = [{
         'F[6]': 1,
         'A': 1,
         'B': 1,
         'D': 1,
         'E': 1
     }, {
         'F[7]': 1,
         'A': 1,
         'B': 1,
         'D': 1,
         'E': 1
     }]
     abundances = [0.7, 0.3]
     kw_common = dict(elements_with_isotopes='F',
                      aa_comp=self.aa_comp,
                      mass_data=self.mass_data)
     kwlist = [{}, {
         'sequence': 'XYF'
     }, {
         'parsed_sequence':
         parser.parse('XYF', show_unmodified_termini=True)
     }, {
         'split_sequence':
         parser.parse('XYF', show_unmodified_termini=True, split=True)
     }, {
         'formula': 'ABDEF'
     }, {
         'composition':
         mass.Composition(sequence='XYF', aa_comp=self.aa_comp)
     }]
     arglist = [(peptide, ), (), (), (), (), ()]
     for args, kw in zip(arglist, kwlist):
         kwargs = kw_common.copy()
         kwargs.update(kw)
         isotopologues = mass.isotopologues(*args, **kwargs)
         for state in isotopologues:
             i = states.index(state)
             self.assertNotEqual(i, -1)
             self.assertAlmostEqual(
                 abundances[i],
                 mass.isotopic_composition_abundance(
                     state, aa_comp=self.aa_comp, mass_data=self.mass_data))

コード例 #5

0

ファイルを表示

ファイル: test_parser.py プロジェクト: sailfish009/pyteomics

 def test_isoforms_maxmods(self):
     for j in range(50):
         L = random.randint(1, 10)
         M = random.randint(1, 10)
         peptide = ''.join([random.choice(self.labels) for _ in range(L)])
         modseqs = parser.isoforms(peptide, variable_mods=self.potential,
                 labels=self.labels, max_mods=M, format='split')
         pp = parser.parse(peptide, labels=self.extlabels, split=True)
         for ms in modseqs:
             self.assertEqual(len(pp), len(ms))
             self.assertLessEqual(sum(i != j for i, j in zip(pp, ms)), M)

コード例 #6

0

ファイルを表示

ファイル: extractPeptidesFromFasta.py プロジェクト: saitomics/extractPeptidesFromFasta

def get_peptide_data(peptide):
    """ Get data for a given peptide. """
    peptide_data = {'sequence': peptide}
    peptide_data['parsed_sequence'] = parser.parse(
        peptide,
        show_unmodified_termini=True # keep the termini, for mass calculations.
    )
    peptide_data['mass'] = mass.calculate_mass(
        peptide_data['parsed_sequence']
    )
    return peptide_data

コード例 #7

0

ファイルを表示

ファイル: utils.py プロジェクト: SimpleNumber/aa_stat

def apply_var_mods(seq, mods):
    parsed = parser.parse(seq)
    out = []
    for i, aa in enumerate(parsed):
        if i in mods:
            out.append('{{{:+.0f}}}'.format(mods[i]) + aa)
        else:
            out.append(aa)
    seqout = ''.join(out)
    internal('%s + %s = %s', seq, mods, seqout)
    return seqout

コード例 #8

0

ファイルを表示

ファイル: test_parser.py プロジェクト: sailfish009/pyteomics

 def test_isoforms_len(self):
     for j in range(50):
         L = random.randint(1, 10)
         peptide = ''.join(random.choice(self.labels) for _ in range(L))
         modseqs = list(parser.isoforms(peptide, variable_mods=self.potential,
                 fixed_mods=self.constant, labels=self.labels))
         pp = parser.parse(peptide, labels=self.extlabels)
         N = (pp[0] == 'N') + (pp[-1] == 'C')
         for p in modseqs:
             self.assertEqual(len(pp), parser.length(p, labels=self.extlabels))
         self.assertEqual(len(modseqs), (3 ** pp.count('A')) * (2 ** (pp.count('X') + pp.count('C') + N)))

コード例 #9

0

ファイルを表示

ファイル: psmTheoretical.py プロジェクト: wj-zhang/DeMix

def calc_precursor_theoretical(seq, z):
    try:
        parseq = parser.parse(seqModX(seq),
                              labels=modLabels,
                              show_unmodified_termini=True)
        theomass = mass.calculate_mass(parsed_sequence=parseq,
                                       aa_comp=composition)
        theomz = mass.calculate_mass(parsed_sequence=parseq,
                                     aa_comp=composition,
                                     charge=z)
        return (parseq, theomass, theomz)
    except:
        return (None, None, None)

コード例 #10

0

ファイルを表示

def _get_theoretical_peptide_fragments(peptide: str, types: str = 'by',
                                       max_charge: int = 1):
    """
    Get theoretical fragments for the given peptide.

    Parameters
    ----------
    peptide : str
        The peptide sequence for which the fragments will be generated.
    types : str, optional
        The fragment type. Can be any combination of 'a', 'b', 'c', 'x', 'y',
        and 'z' (the default is 'by', which means that b-ions and y-ions will
        be generated).
    max_charge : int, optional
        All fragments up to and including the given charge will be generated
        (the default is 1 to only generate singly-charged fragments).

    Returns
    -------
        A list of all fragments as (`FragmentAnnotation`, m/z) tuples sorted in
        ascending m/z order.
    """
    ions = []
    amino_acids = parser.parse(peptide)
    for i in range(1, len(amino_acids)):
        for ion_type in types:
            for charge in range(1, max_charge + 1):
                if ion_type in 'abc':
                    ions.append((
                        FragmentAnnotation(ion_type, i, charge),
                        mass.calculate_mass(sequence=''.join(amino_acids[:i]),
                                            ion_type=ion_type,
                                            charge=charge)))
                else:
                    ions.append((
                        FragmentAnnotation(ion_type, len(peptide) - i, charge),
                        mass.calculate_mass(sequence=''.join(amino_acids[i:]),
                                            ion_type=ion_type,
                                            charge=charge)))
    return sorted(ions, key=operator.itemgetter(1))

コード例 #11

0

ファイルを表示

ファイル: main.web.py プロジェクト: AspirinCode/DIALib

def prepare_libraries(sequence, **kwargs):
    kw = ("static", "variable", "Ytype")
    ignore = {"FALSE", ""}
    labels = parser.std_labels[:]
    mod_mass = dict(mass.std_aa_mass)

    for k in kw:
        if k in kwargs:
            for m in range(len(kwargs[k])):
                labels.append(kwargs[k][m]["label"])
                mod_mass[kwargs[k][m]["label"]] = kwargs[k][m]["mass"]
                if kwargs[k][m]["auto_allocation"] not in ignore:

                    reg = re.compile(kwargs[k][m]["regex"])

                    if "positions" not in kwargs[k][m]:
                        kwargs[k][m]["positions"] = []

                    for match in reg.finditer(sequence):
                        kwargs[k][m]["positions"].append(match.start())

    return labels, mod_mass, parser.parse(sequence, labels=labels, split=True)

コード例 #12

0

ファイルを表示

ファイル: pyteomicsTest.py プロジェクト: BJWiley233/Practical-Computer-Concepts-Files

# -*- coding: utf-8 -*-
"""
Created on Wed Feb 27 20:56:42 2019

@author: bjwil
"""

import pyteomics
from pyteomics import parser
parser.is_modX('pTx')
parser.is_modX('K')
parser.parse('AcPEPTIDE', split=True)

コード例 #13

0

ファイルを表示

ファイル: example_fasta.py プロジェクト: wxlsummer/pyteomics

        'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/'
        'knowledgebase/proteomes/YEAST.fasta.gz', 'yeast.fasta.gz')
    print 'Done!'

print 'Cleaving the proteins with trypsin...'
unique_peptides = set()
for description, sequence in fasta.read(gzip.open('yeast.fasta.gz')):
    new_peptides = parser.cleave(sequence, parser.expasy_rules['trypsin'])
    unique_peptides.update(new_peptides)
print 'Done, {0} sequences obtained!'.format(len(unique_peptides))

peptides = [{'sequence': i} for i in unique_peptides]

print 'Parsing peptide sequences...'
for peptide in peptides:
    peptide['parsed_sequence'] = parser.parse(peptide['sequence'],
                                              show_unmodified_termini=True)
    peptide['length'] = parser.length(peptide['parsed_sequence'])
print 'Done!'

peptides = [peptide for peptide in peptides if peptide['length'] <= 100]

print 'Calculating the mass, charge and m/z...'
for peptide in peptides:
    peptide['charge'] = int(
        round(electrochem.charge(peptide['parsed_sequence'], pH=2.0)))
    peptide['mass'] = mass.calculate_mass(peptide['parsed_sequence'])
    peptide['m/z'] = mass.calculate_mass(peptide['parsed_sequence'],
                                         charge=peptide['charge'])
print 'Done!'

print 'Calculating the retention time...'

コード例 #14

0

ファイルを表示

ファイル: dataTransformation.py プロジェクト: NicolasHousset/R2TF

                    elif(df['Old'][parser_index] == "*"):
                        parser_index += 1
                    elif(df['Old'][parser_index] == ","):
                        parser_index += 1
            elif(c_term_parsing):
                if(df['Old'][parser_index]=='C'):
                    if(df['Old'][parser_index+1]=='O'):
                        if(df['Old'][parser_index+2]=='O'):
                            if(df['Old'][parser_index+3]=='H'):
                                parser_index += 4
                                df['New'][writer_index] = 'O'
                                df['New'][writer_index+1] = 'H'
                                writer_index += 1
                c_term_parsing = False
            else:
                df['New'][writer_index] = df['Old'][parser_index].lower()
                parser_index += 1
                writer_index += 1

        new_seq = ""
        for i in range(pep_length):
            new_seq += df['New'][i]
        df_reduced['New_Sequence'][indexing] = new_seq

    df_reduced.to_csv('/mnt/compomics/Nicolas/Python/R2TF/data/project_transformed'+str(test['projectid'][projIndex])+'.csv',
                      index=False,header=False)

from pyteomics import parser
new_seq_2 = "H-QpyrQSEEDLLLQDFSR-OH"
parser.parse(new_seq_2, allow_unknown_modifications=True)

コード例 #15

0

ファイルを表示

ファイル: Candidate_pep_for_a_mass_tolerance.py プロジェクト: changool/Proteomics_Ribosome_Profiling

        print "Warning! Command-line argument: %s not recognized. Exiting..." % opt
        sys.exit()

inputfile01 = open(input_file, "r")
# outputfile1 = open(output_file,'w')

from pyteomics import parser
from pyteomics import mass


# gene_list = ['SAA1']
# gene_list = open(gene_list,'r')
counter = 0
errcounter = 0
pepinput = "MALTSEYWIILR"
ps0 = parser.parse(pepinput, show_unmodified_termini=True)
referencemass = mass.calculate_mass(parsed_sequence=ps0)
mass_tolerance = 7  # unit: ppm
targetmass = 1422.730378
total_pep_list = []
for num, x in enumerate(SeqIO.parse(inputfile01, "fasta")):
    if num % 10000 == 0:
        print num
    # if num > 5000:
    #    break
    pro = str(x.seq)
    peplist = digest(pro, enzyme, missed_cleavage, min_pep_length, max_pep_length)
    if len(peplist) > 0:
        for p in peplist:
            total_pep_list.append(p)
sort_list = list(set(total_pep_list))

コード例 #16

0

ファイルを表示

ファイル: test_parser.py プロジェクト: sailfish009/pyteomics

 def test_parse_simple(self):
     for seq in self.simple_sequences:
         self.assertEqual(seq, ''.join(parser.parse(seq, labels=uppercase)))

コード例 #17

0

ファイルを表示

ファイル: test_mass.py プロジェクト: sailfish009/pyteomics

    def test_calculate_mass(self):
        # Calculate mass by a formula.
        self.assertEqual(
            mass.calculate_mass(formula='ABCDE', mass_data=self.mass_data),
            sum(self.mass_data[atom][0][0] for atom in 'ABCDE'))

        # Calculate mass by a sequence.
        self.assertEqual(
            mass.calculate_mass(sequence='XYZ',
                                aa_comp=self.aa_comp,
                                mass_data=self.mass_data),
            sum(self.mass_data[atom][0][0] for atom in 'ABCDE'))

        # Calculate mass by a parsed sequence.
        self.assertEqual(
            mass.calculate_mass(parsed_sequence=['H-', 'X', 'Y', 'Z', '-OH'],
                                aa_comp=self.aa_comp,
                                mass_data=self.mass_data),
            sum(self.mass_data[atom][0][0] for atom in 'ABCDE'))

        # Calculate average mass by a formula.
        self.assertEqual(
            mass.calculate_mass(formula='ABCDE',
                                average=True,
                                mass_data=self.mass_data),
            sum(self.mass_data[atom][isotope][0] *
                self.mass_data[atom][isotope][1] for atom in 'ABCDE'
                for isotope in self.mass_data[atom] if isotope != 0))

        # Calculate m/z of an ion.
        for charge in [1, 2, 3]:
            self.assertEqual(
                mass.calculate_mass(formula='ABCDE',
                                    ion_type='M',
                                    charge=charge,
                                    mass_data=self.mass_data),
                mass.calculate_mass(formula='ABCDE' + 'H+%d' % (charge, ),
                                    mass_data=self.mass_data))

            self.assertEqual(
                mass.calculate_mass(formula='ABCDE',
                                    ion_type='M',
                                    charge=charge,
                                    mass_data=self.mass_data),
                (mass.calculate_mass(formula='ABCDE', mass_data=self.mass_data)
                 + self.mass_data['H+'][0][0] * charge) / charge)

            self.assertRaises(
                auxiliary.PyteomicsError, mass.calculate_mass, **{
                    'formula': 'ABCDEH+%d' % charge,
                    'ion_type': 'M',
                    'charge': charge,
                    'mass_data': self.mass_data
                })

        # Sanity check.
        for pep in self.random_peptides:
            self.assertEqual(
                mass.calculate_mass(sequence=pep,
                                    aa_comp=self.aa_comp,
                                    mass_data=self.mass_data,
                                    ion_comp=self.ion_comp),
                mass.calculate_mass(parsed_sequence=parser.parse(
                    pep, labels=['X', 'Y', 'Z'], show_unmodified_termini=True),
                                    aa_comp=self.aa_comp,
                                    mass_data=self.mass_data,
                                    ion_comp=self.ion_comp))