Esempio n. 1
0
    def test_most_probable_isotopic_composition(self):
        self.assertEqual(
            mass.most_probable_isotopic_composition(formula='F',
                                                    mass_data=self.mass_data),
            (mass.Composition({
                'F[6]': 1,
                'F[7]': 0
            }, mass_data=self.mass_data), 0.7))

        self.assertEqual(
            mass.most_probable_isotopic_composition(formula='F10',
                                                    mass_data=self.mass_data),
            (mass.Composition({
                'F[6]': 7,
                'F[7]': 3
            }, mass_data=self.mass_data), (0.3)**3 * (0.7)**7 * 120))

        self.assertEqual(
            mass.most_probable_isotopic_composition(
                formula='A20F10',
                elements_with_isotopes=['F'],
                mass_data=self.mass_data), (mass.Composition(
                    {
                        'A': 20,
                        'F[6]': 7,
                        'F[7]': 3
                    }, mass_data=self.mass_data), (0.3)**3 * (0.7)**7 * 120))
Esempio n. 2
0
    def aion_composition(self, n):
        mods = self.mods()
        #print mods
        comp0 = mass.Composition(self.stripped_seq[:n])

        comp0['C'] -= 1
        comp0['H'] -= 2
        comp0['O'] -= 2

        #print comp0
        for i in mods:
            #print i
            #print i < n
            if i < n:
                if mods[i][0] == '-':
                    modComp = mass.Composition(formula=mods[i][1:])
                    modComp = {k: -modComp[k] for k in modComp}
                else:
                    modComp = mass.Composition(formula=mods[i])
                #print modComp
                for element in modComp:
                    if element in comp0:
                        comp0[element] += modComp[element]
                    else:
                        comp0[element] = modComp[element]
        #print comp0
        return comp0
Esempio n. 3
0
 def test_Composition_sum(self):
     # Test sum of Composition objects.
     self.assertEqual(
         mass.Composition(sequence='XXY', aa_comp=self.aa_comp) +
         mass.Composition(sequence='YZZ', aa_comp=self.aa_comp),
         {atom: 2
          for atom in 'ABCDE'})
Esempio n. 4
0
def get_charges():
    return [
        (1, mass.Composition({"H": 1})),
        (2, mass.Composition({"H": 2})),
        (3, mass.Composition({"H": 3})),
        (4, mass.Composition({"H": 4})),
        (0, mass.Composition({"H": 0})),
    ]
Esempio n. 5
0
 def test_Composition_mul(self):
     # Test multiplication of Composition by integers
     self.assertEqual(
         2 * mass.Composition(sequence='XYZ', aa_comp=self.aa_comp),
         {atom: 2
          for atom in 'ABCDE'})
     self.assertEqual(
         mass.Composition(sequence='XYZ', aa_comp=self.aa_comp) * 2,
         {atom: 2
          for atom in 'ABCDE'})
def read_compounds(filename,
                   separator="\t",
                   calculate=True,
                   lib_adducts=[],
                   filename_atoms=""):

    if calculate:
        path_nist_database = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'data',
            'nist_database.txt')
        nist_database = nist_database_to_pyteomics(path_nist_database)

    df = read_csv(filename, sep=separator, float_precision="round_trip")
    records = []
    for index, row in df.iterrows():
        record = collections.OrderedDict()
        comp = pyteomics_mass.Composition(str(row.molecular_formula))
        if comp:
            record["composition"] = collections.OrderedDict(
                (k, comp[k]) for k in order_composition_by_hill(comp.keys()))
            sum_CHNOPS = sum(
                [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]])
            record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values()))
            if calculate:
                record["exact_mass"] = round(
                    pyteomics_mass.calculate_mass(formula=str(
                        str(row.molecular_formula)),
                                                  mass_data=nist_database), 6)
            else:
                record["exact_mass"] = float(row.exact_mass)

            record["compound_id"] = row.compound_id
            record["compound_name"] = row.compound_name
            comp = pyteomics_mass.Composition(str(row.molecular_formula))
            record["molecular_formula"] = composition_to_string(comp)

            if "retention_time" in df.columns:
                record["retention_time"] = row.retention_time
            elif "rt" in df.columns:
                record["retention_time"] = row.rt
            if "adduct" in df.columns:
                record["adduct"] = row.adduct
                if lib_adducts and calculate:
                    record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"]

            records.append(record)
        else:
            Warning("{} Skipped".format(row))

    return records
Esempio n. 7
0
def test_composition(get_composition):
    assert mass.Composition("ACDE")
    assert mass.Composition("A") + mass.Composition("C")
    assert mass.Composition(parsed_sequence="ACDE") == {
        'H': 22,
        'C': 15,
        'O': 8,
        'N': 4,
        'S': 1
    }

    for data in get_composition:
        sequence = data[0]
        expected = data[1]
        assert mass.Composition(sequence) == expected
Esempio n. 8
0
 def test_Composition_sseq(self):
     # Test Composition from a split sequence.
     self.assertEqual(
         mass.Composition(split_sequence=[('X', ), ('Y', ), ('Z', )],
                          aa_comp=self.aa_comp),
         {atom: 1
          for atom in 'ABC'})
Esempio n. 9
0
 def test_Composition_pseq(self):
     # Test Composition from a parsed sequence.
     self.assertEqual(
         mass.Composition(parsed_sequence=['X', 'Y', 'Z'],
                          aa_comp=self.aa_comp),
         {atom: 1
          for atom in 'ABC'})
Esempio n. 10
0
def _formula_parser(formula, session):
    '''
    Parse a unimod formula composed of elements,
    isotopes, and other bricks.

    In order to look up a Brick's composition, this function must have access to a session.
    '''
    composition = mass.Composition()
    for token in formula.split(" "):
        match = re.search(r"(?P<isotope>\d+?)?(?P<elemet>[^\(]+)(?:\((?P<count>-?\d+)\))?", token)
        if match:
            isotope, element, count = match.groups()
            if count is not None:
                count = int(count)
            else:
                count = 1
            if isotope is not None:
                name = mass._make_isotope_string(element, isotope)
            else:
                name = element
            is_brick = session.query(Brick).filter(Brick.brick == name).first()
            if is_brick is None:
                composition[name] += count
            else:
                composition += is_brick.composition * count
    return composition
Esempio n. 11
0
 def test_Composition_positional(self):
     # Test creation from positional args
     ac = self.aa_comp.copy()
     ac.update(self.mods)
     self.assertEqual(mass.Composition('aXbYZ', aa_comp=ac), {
         'A': 2,
         'B': 2,
         'C': 1,
         'D': 1,
         'E': 1
     })
     self.assertEqual(mass.Composition('AB2C3', mass_data=self.mass_data), {
         'A': 1,
         'B': 2,
         'C': 3
     })
Esempio n. 12
0
def get_mods_composition(modifications):
    """Return the composition of a list of modifications.

    Parameters
    ----------
    modifications : list of str
        List of modifications string (corresponding to Unimod titles).

    Returns
    -------
    pyteomics.mass.Composition
        The total composition change.

    """
    # ???: Have the mass.Unimod() dict as parameter ?
    total_mod_composition = mass.Composition()
    for mod in modifications:
        try:
            mod_composition = UNIMOD_MODS.by_title(mod)["composition"]
            total_mod_composition += mod_composition
            # Using set comparison here won't work with elements as isotopes.
            for elem in mod_composition:
                if elem not in USED_ELEMS:
                    log.warning(f"{elem} in ({mod}) is not supported "
                                "in the computation of M0 and M1")

        except (KeyError, AttributeError, TypeError):
            log.warning(f"Unimod entry not found for : {mod}")
    return total_mod_composition
Esempio n. 13
0
def calculateMassUncertainty(processedSpectrum, weighted=False, dfOutput=True, show=False):
    data = [processedSpectrum["formula"],processedSpectrum["formula_mz"]-processedSpectrum["observed_mz"]]
    headers = ["formula", "uncertainty"]
    instance = pd.concat(data, axis=1, keys=headers)
    elements = {}
    for index, row in instance.iterrows():
        ion = row["formula"]
        ion = ion if ion[-1] != '-' else ion[:-1]
        tmp = mass.Composition(formula=ion)
        v = row["uncertainty"]
        total = sum(tmp.values())
        for e in tmp.keys():
            f = 1
            if weighted:
                f = tmp[e] / total
            if e not in elements:
                elements[e] = [v*f]
            else:
                elements[e].append(v*f)
    for e in elements.keys():
        elements[e] = sum(elements[e]) / len(elements[e])
    if show:
        keys = elements.keys()
        values = elements.values()
        plt.figure(1)
        plt.bar(keys, values)
        plt.ylabel('Error')
        plt.xlabel('Elements')
    if dfOutput:
        df = pd.DataFrame(elements.items(), columns=['Element', 'Uncertainty'])
        return df
    else:
        return elements
Esempio n. 14
0
def expand_isotopes(peptide, charge_states=[2, 3]):
    '''
    Convert peptide to DataFrame of isotopic peaks
    Input
        Series, should contain 'sequence', 'z+' columns, and model columns
    Return
        DataFrame with one row for each isotopic peak
        columns are:
            mz - m/z of ion
            ic_XX - ion abundance acording to XX model
            z  - charge
            sequence - peptide sequence
    '''
    formula = ''.join([
        '{}{}'.format(x, y)
        for x, y in mass.Composition(peptide['sequence']).items()
    ])
    cluster = IsoSpecPy.IsoThreshold(formula=formula,
                                     threshold=0.005,
                                     absolute=True)
    mz0 = cluster.np_masses()
    int0 = cluster.np_probs()
    mz = np.concatenate([get_ions(mz0, z) for z in charge_states])
    ic = np.concatenate(
        [int0 * peptide['{}+'.format(z)] for z in charge_states])
    charge = np.concatenate(
        [np.repeat(z, mz0.shape[0]) for z in charge_states])
    result = pd.DataFrame({'mz': mz, 'ic': ic, 'z': charge})
    result['sequence'] = peptide['sequence']
    for model in params.ion_models:
        result['ic_{}'.format(model)] = result['ic'] * peptide[model]

    return result
Esempio n. 15
0
 def test_Composition_formula(self):
     # Test Composition from a formula.
     self.assertEqual(
         self.d,
         mass.Composition(
             formula='ABCDE',
             mass_data={atom: {
                 0: (1.0, 1.0)
             }
                        for atom in 'ABCDE'}))
Esempio n. 16
0
def get_mods():
    return [
        (["Oxidation"], mass.Composition({"O": 1})),
        (["Acetyl",
          "Phospho"], mass.Composition({
              'H': 3,
              'C': 2,
              'O': 4,
              "P": 1
          })),
        (["Acetyl", "Phospho",
          "not_mod"], mass.Composition({
              'H': 3,
              'C': 2,
              'O': 4,
              "P": 1
          })),
        ([], mass.Composition()),
    ]
Esempio n. 17
0
def test_calculate_mass(get_mass):
    assert mass.calculate_mass("ACDE") == pytest.approx(436.12639936, REL)
    assert mass.calculate_mass(mass.Composition("ACDE")) == pytest.approx(
        436.12639936, REL)
    assert mass.calculate_mass(parsed_sequence="ACDE") == pytest.approx(
        418.115834, REL)
    assert mass.calculate_mass("A") == pytest.approx(89.04767846841, REL)

    for data in get_mass:
        sequence = data[0]
        expected = data[1]
        assert mass.calculate_mass(sequence) == pytest.approx(expected, REL)
Esempio n. 18
0
def test_convert_atom_C_to_X():
    assert stfi.convert_atom_C_to_X("ACDE") == mass.Composition({
        'H': 24,
        'O': 9,
        'N': 4,
        'S': 1,
        'X': 15
    })
    assert stfi.convert_atom_C_to_X("PEPTIDE") == mass.Composition({
        'H': 53,
        'O': 15,
        'N': 7,
        'X': 34
    })
    assert stfi.convert_atom_C_to_X(
        "ACDEFGHIKLMNPQRSTVWY") == mass.Composition({
            'H': 159,
            'O': 30,
            'N': 29,
            'S': 2,
            'X': 107
        })
Esempio n. 19
0
    def test_composition_objects_are_pickleable(self):
        dict_ = mass.Composition(self.d, mass_data=self.mass_data)
        formula = mass.Composition(
            formula='ABCDE',
            mass_data={atom: {
                0: (1.0, 1.0)
            }
                       for atom in 'ABCDE'})
        sequence = mass.Composition(sequence='XYZ', aa_comp=self.aa_comp)
        parsed_sequence = mass.Composition(parsed_sequence=['X', 'Y', 'Z'],
                                           aa_comp=self.aa_comp)
        split_sequence = mass.Composition(split_sequence=[('X', ), ('Y', ),
                                                          ('Z', )],
                                          aa_comp=self.aa_comp)

        self.assertEqual(dict_, pickle.loads(pickle.dumps(dict_)))
        self.assertEqual(formula, pickle.loads(pickle.dumps(formula)))
        self.assertEqual(sequence, pickle.loads(pickle.dumps(sequence)))
        self.assertEqual(parsed_sequence,
                         pickle.loads(pickle.dumps(parsed_sequence)))
        self.assertEqual(split_sequence,
                         pickle.loads(pickle.dumps(split_sequence)))
Esempio n. 20
0
 def composition(self):
     composition = mass.Composition()
     for element_relation in self.elements:
         symbol = element_relation.element
         isotope, element = re.search(r"(?P<isotope>\d*)?(?P<element>\S+)", symbol).groups()
         if isotope != "":
             isotope = int(isotope)
             iso_str = mass._make_isotope_string(element, isotope)
         else:
             iso_str = element
         count = element_relation.count
         composition[iso_str] = count
     return composition
Esempio n. 21
0
def test_computation_isotopologue():
    # Standard formula.
    test_composition = mass.Composition("ACDE")
    assert stfi.compute_M0_nl(test_composition,
                              stfi.NATURAL_ABUNDANCE) == pytest.approx(
                                  0.77662382, REL)
    assert stfi.compute_M0_nl(test_composition,
                              stfi.C12_ABUNDANCE) == pytest.approx(
                                  0.911253268, REL)
    assert stfi.compute_M1_nl(test_composition,
                              stfi.NATURAL_ABUNDANCE) == pytest.approx(
                                  0.1484942353, REL)
    assert stfi.compute_M1_nl(test_composition,
                              stfi.C12_ABUNDANCE) == pytest.approx(
                                  0.0277650369575, REL)
Esempio n. 22
0
    def validate(self):
        if self.stripped_seq == "":
            return False

        allowed_chars = 'ACDEFGHIKLMNPQRSTVWY'
        for char in self.stripped_seq:
            if char not in allowed_chars:
                return False

        for mod in self.mods().values():
            try:
                mass.Composition(mod)
            except:
                return False

        return True
Esempio n. 23
0
def calculate_b_y_ion(sequence, ion_charge):
    aa_comp = dict(mass.std_aa_comp)
    aa_comp['C'] = mass.Composition({'H': 8, 'C': 5, 'S': 1, 'O': 2, 'N': 2})
    b_ion = [
        mass.calculate_mass(sequence[:aa],
                            ion_type='b',
                            charge=ion_charge,
                            aa_comp=aa_comp) for aa in range(1, len(sequence))
    ]  # aa = the amino acid residue
    y_ion = [
        mass.calculate_mass(sequence[aa:],
                            ion_type='y',
                            charge=ion_charge,
                            aa_comp=aa_comp) for aa in range(1, len(sequence))
    ]
    y_ion.reverse()  # record from small to big
    return (tuple(b_ion), tuple(y_ion))
Esempio n. 24
0
def get_charge_composition(charge):
    """Return the composition of a given charge (only H+).

    Parameters
    ----------
    charge : int
        Peptide charge.

    Returns
    -------
    pyteomics.mass.Composition
        Composition of the change (H+).

    """
    charge_composition = mass.Composition()
    charge_composition["H"] = charge
    return charge_composition
Esempio n. 25
0
def test_deprecated_computation_isotopologue():
    test_composition = mass.Composition("ACDE")
    stfi.compute_M0 = stfi.seq_to_first_iso.compute_M0
    stfi.compute_M1 = stfi.seq_to_first_iso.compute_M1
    assert stfi.compute_M0(test_composition,
                           stfi.NATURAL_ABUNDANCE) == pytest.approx(
                               0.77662382, REL)
    assert stfi.compute_M0(test_composition,
                           stfi.C12_ABUNDANCE) == pytest.approx(
                               0.911253268, REL)

    assert stfi.compute_M1(test_composition,
                           stfi.NATURAL_ABUNDANCE) == pytest.approx(
                               0.1484942353, REL)
    assert stfi.compute_M1(test_composition,
                           stfi.C12_ABUNDANCE) == pytest.approx(
                               0.0277650369575, REL)
Esempio n. 26
0
 def test_isotopologues(self):
     peptide = 'XYF'
     states = [{
         'F[6]': 1,
         'A': 1,
         'B': 1,
         'D': 1,
         'E': 1
     }, {
         'F[7]': 1,
         'A': 1,
         'B': 1,
         'D': 1,
         'E': 1
     }]
     abundances = [0.7, 0.3]
     kw_common = dict(elements_with_isotopes='F',
                      aa_comp=self.aa_comp,
                      mass_data=self.mass_data)
     kwlist = [{}, {
         'sequence': 'XYF'
     }, {
         'parsed_sequence':
         parser.parse('XYF', show_unmodified_termini=True)
     }, {
         'split_sequence':
         parser.parse('XYF', show_unmodified_termini=True, split=True)
     }, {
         'formula': 'ABDEF'
     }, {
         'composition':
         mass.Composition(sequence='XYF', aa_comp=self.aa_comp)
     }]
     arglist = [(peptide, ), (), (), (), (), ()]
     for args, kw in zip(arglist, kwlist):
         kwargs = kw_common.copy()
         kwargs.update(kw)
         isotopologues = mass.isotopologues(*args, **kwargs)
         for state in isotopologues:
             i = states.index(state)
             self.assertNotEqual(i, -1)
             self.assertAlmostEqual(
                 abundances[i],
                 mass.isotopic_composition_abundance(
                     state, aa_comp=self.aa_comp, mass_data=self.mass_data))
Esempio n. 27
0
def convert_atom_C_to_X(sequence):
    """Replace carbon atom by element X atom in a composition.

    Parameters
    ----------
    sequence : str or pyteomics.mass.Composition
        Sequence or composition.

    Returns
    -------
    pyteomics.mass.Composition
        Composition with carbon atoms replaced by element X atoms.

    """
    # Force input to be a pyteomics.mass.Composition object.
    formula = mass.Composition(sequence)
    # Replace C atoms by X atoms.
    formula["X"] = formula.pop("C", 0)
    return formula
Esempio n. 28
0
def HC_HNOPS_rules(molecular_formula):

    composition = pyteomics_mass.Composition(molecular_formula)

    rules = {"HC": 0, "NOPSC": 0}

    if "C" not in composition or "H" not in composition:
        rules["HC"] = 0
    elif "C" not in composition and "H" not in composition:
        rules["HC"] = 0
    elif "C" in composition and "H" in composition:
        if float(composition['H']) / float(
            (composition['C'])) > 0 and float(composition['H'] /
                                              (composition['C'])) < 6:
            rules["HC"] = 1
        if float(composition['H']) / float((composition['C'])) >= 6:
            rules["HC"] = 0

    NOPS_check = []
    for element in ['N', 'O', 'P', 'S']:
        if element in composition and "C" in composition:
            NOPS_check.append(
                float(float(composition[element])) / float((composition['C'])))
        else:
            NOPS_check.append(float(0))

    if NOPS_check[0] >= float(0) and \
       NOPS_check[0] <= float(4) and \
       NOPS_check[1] >= float(0) and \
       NOPS_check[1] <= float(3) and \
       NOPS_check[2] >= float(0) and \
       NOPS_check[2] <= float(2) and \
       NOPS_check[3] >= float(0) and \
       NOPS_check[3] <= float(3):
        rules["NOPSC"] = 1

    if NOPS_check[0] > float(4) or NOPS_check[1] > float(
            3) or NOPS_check[2] > float(2) or NOPS_check[3] > float(3):
        rules["NOPSC"] = 0
    return rules
Esempio n. 29
0
def read_molecular_formulae(filename,
                            separator="\t",
                            calculate=True,
                            filename_atoms=""):

    if calculate:
        path_nist_database = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'data',
            'nist_database.txt')
        nist_database = nist_database_to_pyteomics(path_nist_database)

    df = read_csv(filename, sep=separator, float_precision="round_trip")
    records = []
    for index, row in df.iterrows():
        record = collections.OrderedDict()
        comp = pyteomics_mass.Composition(str(row.molecular_formula))
        if comp:
            record["composition"] = collections.OrderedDict(
                (k, comp[k]) for k in order_composition_by_hill(comp.keys()))
            sum_CHNOPS = sum(
                [comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]])
            record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values()))
            if calculate:
                record["exact_mass"] = round(
                    pyteomics_mass.mass.calculate_mass(
                        formula=str(row.molecular_formula),
                        mass_data=nist_database), 6)
            else:
                record["exact_mass"] = float(row.exact_mass)
            record.update(HC_HNOPS_rules(str(row.molecular_formula)))
            record.update(lewis_senior_rules(str(row.molecular_formula)))
            record["double_bond_equivalents"] = double_bond_equivalents(
                record["composition"])
            records.append(record)
        else:
            Warning("{} Skipped".format(row))

    return records
Esempio n. 30
0
def lewis_senior_rules(molecular_formula):

    valence = {'C': 4, 'H': 1, 'N': 3, 'O': 2, 'P': 3, 'S': 2}

    composition = pyteomics_mass.Composition(molecular_formula)

    rules = {"lewis": 0, "senior": 0}

    lewis_sum = 0
    for element in valence:
        if element in composition:
            lewis_sum += valence[element] * composition[element]

    if lewis_sum % 2 == 0:
        rules["lewis"] = 1
    if lewis_sum % 2 != 0:
        rules["lewis"] = 0
    if lewis_sum >= ((sum(composition.values()) - 1) * 2):
        rules["senior"] = 1
    if lewis_sum < ((sum(composition.values()) - 1) * 2):
        rules["senior"] = 0

    return rules