Ejemplo n.º 1
0
def create_simple_cam_isotopologue(test_id, test_dict):
    print('''
        ol diff
    1868.9315502796
    1869.9285851746
    ''')
    lib_1 = pyqms.IsotopologueLibrary(charges=[2],
                                      verbose=False,
                                      **test_dict['set1'])
    lib_2 = pyqms.IsotopologueLibrary(charges=[2],
                                      verbose=False,
                                      **test_dict['set2'])
    formula_1 = list(lib_1.keys())[0]
    formula_2 = list(lib_2.keys())[0]
    # __oOo__
    for label_percentile in lib_1[formula_1]['env'].keys():
        print(lib_1.lookup['formula to molecule'][formula_1])
        print(lib_2.lookup['formula to molecule'][formula_2])
        for n, mass in enumerate(
                lib_1[formula_1]['env'][label_percentile]['mass']):
            print(lib_1[formula_1]['env'][label_percentile]['mass'][n], )
            print(lib_2[formula_2]['env'][label_percentile]['mass'][n], )
            assert lib_1[ formula_1 ]['env'][ label_percentile ]['mass'][n] - \
                lib_2[ formula_2 ]['env'][ label_percentile ]['mass'][n] < 0.000000001
            break
Ejemplo n.º 2
0
def create_simple_cam_isotopologue(test_id, test_dict):
    print("""
        ol diff
    1868.9315502796
    1869.9285851746
    """)
    lib_1 = pyqms.IsotopologueLibrary(charges=[2],
                                      verbose=False,
                                      **test_dict["set1"])
    lib_2 = pyqms.IsotopologueLibrary(charges=[2],
                                      verbose=False,
                                      **test_dict["set2"])
    formula_1 = list(lib_1.keys())[0]
    formula_2 = list(lib_2.keys())[0]
    # __oOo__
    for label_percentile in lib_1[formula_1]["env"].keys():
        print(lib_1.lookup["formula to molecule"][formula_1])
        print(lib_2.lookup["formula to molecule"][formula_2])
        for n, mass in enumerate(
                lib_1[formula_1]["env"][label_percentile]["mass"]):
            print(lib_1[formula_1]["env"][label_percentile]["mass"][n])
            print(lib_2[formula_2]["env"][label_percentile]["mass"][n])
            assert (lib_1[formula_1]["env"][label_percentile]["mass"][n] -
                    lib_2[formula_2]["env"][label_percentile]["mass"][n] <
                    0.000000001)
            break
Ejemplo n.º 3
0
 def crash_test(self):
     """
     Check if a key error is raised when using a non existent amino acid
     """
     with self.assertRaises(SystemExit) as system_exit_check:
         pyqms.IsotopologueLibrary(**CRASH_TESTS["in"]["params"])
     self.assertEqual(system_exit_check.exception.code, 1)
def _recalc_isotopic_distribution( test_id, test_dict ):
    lib = pyqms.IsotopologueLibrary(
        molecules = ['KLEINERTEST'],
        charges = [2],
        verbose = False
    )
    new_distribution = lib._recalc_isotopic_distribution(
        element=test_dict['enriched_element'],
        target_percentile=test_dict['target_percentile'],
        enriched_isotope=test_dict['enriched_isotope']
    )
    print( new_distribution )
    print( lib.isotopic_distributions[ test_dict['enriched_element'] ])
    # percentiles and abundance are the same ...
    percentiles = set()
    sum_of_abundances = 0
    for mass, abundance, isoto_pos in new_distribution:
        percentiles.add( uniform( abundance ))
        sum_of_abundances += abundance
    # this uniform function rounds on 2 digits
    # because _recalc_isotopic_distribution is scaling original
    # and natural abundances first
    # errors only occur in lower abundances
    assert uniform(test_dict['target_percentile']) in percentiles
    assert 1 - sum_of_abundances <= sys.float_info.epsilon
Ejemplo n.º 5
0
def generic_check_fucntion(test_dict):
    lib = pyqms.IsotopologueLibrary(
        molecules=test_dict["input"],
        charges=[2],
        metabolic_labels=test_dict["metabolic_labels"],
    )
    for tuple_2_check in lib.labled_percentiles:
        assert tuple_2_check in test_dict["output"]
    assert len(lib.labled_percentiles) == len(test_dict["output"])
Ejemplo n.º 6
0
def _extend_kb_with_fixed_labels(test_id, test_dict):

    lib_1 = pyqms.IsotopologueLibrary(**test_dict["in"]["params"])
    print(lib_1.lookup["molecule fixed label variations"])
    formula_1 = list(lib_1.keys())[0]
    # __oOo__
    lookup_key = test_dict["in"]["params"]["molecules"][0]
    for label_percentile in lib_1[formula_1]["env"].keys():
        assert (
            sorted(list(lib_1.lookup["molecule fixed label variations"][lookup_key]))
            == test_dict["out"]["formated_molecule"]
        )
def _extend_kb_with_fixed_labels( test_id, test_dict ):

    lib_1 = pyqms.IsotopologueLibrary(
        **test_dict['in']['params']
    )
    print( lib_1.lookup['molecule fixed label variations'] )
    formula_1 = list(lib_1.keys())[0]
    # __oOo__
    lookup_key = test_dict['in']['params']['molecules'][0]
    for label_percentile in lib_1[ formula_1 ]['env'].keys():
        assert sorted(list(lib_1.lookup['molecule fixed label variations'][lookup_key ])) == \
            test_dict['out']['formated_molecule']
Ejemplo n.º 8
0
def generic_check_fucntion(test_dict):
    lib = pyqms.IsotopologueLibrary(molecules=test_dict['input'],
                                    charges=[
                                        2,
                                    ],
                                    params=test_dict['params'],
                                    fixed_labels=test_dict['fixed_labels'])
    function_output = lib._extend_molecules_with_fixed_labels(
        test_dict['input'])
    print(function_output)
    # assert False
    assert sorted(list(function_output)) == sorted(test_dict['output'])
Ejemplo n.º 9
0
def transformation_spectrum(test_id, test_dict):
    lib = pyqms.IsotopologueLibrary(molecules=['KLEINERTEST'], charges=[2])
    lib.params.update(test_dict['params'])
    tmz_set, tmz_lookup = lib._transform_spectrum(test_dict['i'],
                                                  mz_range=None)
    assert sorted(tmz_set) == sorted(test_dict['o']['tmz_set'])
    for tmz in tmz_set:
        assert tmz in tmz_lookup.keys()
        assert test_dict['o']['tmz_lookup'][tmz] == test_dict['i']

    for tmz in tmz_lookup.keys():
        assert tmz_lookup[tmz] == test_dict['o']['tmz_lookup'][tmz]
def generic_check_fucntion(test_dict):
    lib = pyqms.IsotopologueLibrary(
        molecules=test_dict['input'],
        charges=[
            2,
        ],
        metabolic_labels=test_dict['metabolic_labels'])
    # lib._build_label_percentile_tuples()
    print(lib.labled_percentiles)
    # assert False
    for tuple_2_check in lib.labled_percentiles:
        assert tuple_2_check in test_dict['output']
    assert len(lib.labled_percentiles) == len(test_dict['output'])
Ejemplo n.º 11
0
def main(mzml=None):
    """
    Simple script as template for quantification using pyQms.

    Use e.g. the BSA1.mzML example file. Please download it first using
    'get_example_BSA_file.py'

    Usage:
        ./quantify_mzml.py  mzml_file

    Note:

        The peptides under molecules are BSA peptides.

    """
    molecules = ["HLVDEPQNLIK", "YICDNQDTISSK", "DLGEEHFK"]
    charges = [2, 3, 4, 5]
    metabolic_labels = None
    fixed_labels = None

    lib = pyqms.IsotopologueLibrary(
        molecules=molecules,
        charges=charges,
        metabolic_labels=metabolic_labels,
        fixed_labels=fixed_labels,
        # params           = params,
        verbose=True,
    )
    run = pymzml.run.Reader(mzml)
    mzml_basename = os.path.basename(mzml)
    results = None
    for spectrum in run:
        # print(spectrum.ID)
        scan_time = spectrum.get("MS:1000016")
        if spectrum["ms level"] == 1:
            results = lib.match_all(
                mz_i_list=spectrum.centroidedPeaks,
                file_name=mzml_basename,
                spec_id=spectrum["id"],
                spec_rt=scan_time,
                results=results,
            )
    # pickle.dump(
    #     results,
    #     open(
    #         '{0}_pyQms_results.pkl'.format(mzml_basename),
    #         'wb'
    #     )
    # )
    print(results)
    return
Ejemplo n.º 12
0
def generate_molecule_isotopologue_lib(
    peak_properties: Dict[str, dict],
    charges: List[int] = None,
    trivial_names: Dict[str, str] = None,
):
    """Summary.

    Args:
        molecules (TYPE): Description
    """
    logger.info("Generate Isotopolgue Library")
    start = time.time()
    duplicate_formulas: Dict[str, List[str]] = {}
    for key in peak_properties:
        duplicate_formulas.setdefault(peak_properties[key]["chemical_formula"],
                                      []).append(key)
    if charges is None:
        charges = [1]
    if len(peak_properties) > 0:
        molecules = [d["chemical_formula"] for d in peak_properties.values()]
        lib = pyqms.IsotopologueLibrary(
            molecules=molecules,
            charges=charges,
            verbose=False,
            trivial_names=trivial_names,
        )
        reduced_lib = {}
        # TODO fix to  support multiple charge states
        for mol in molecules:
            formula = lib.lookup["molecule to formula"][mol]
            data = lib[formula]["env"][(("N", "0.000"), )]
            for triv in lib.lookup["formula to trivial name"][formula]:
                reduced_lib[triv] = {
                    "mz": data[peak_properties[triv]["charge"]]["mz"],
                    "i": data["relabun"],
                }
    else:
        reduced_lib = {}
    tmp = {}
    for mol in reduced_lib:
        cc = peak_properties[mol]["chemical_formula"]
        for triv in duplicate_formulas[cc]:
            if triv not in reduced_lib:
                tmp[triv] = reduced_lib[mol]
    reduced_lib.update(tmp)
    logger.info(
        f"Generating IsotopologueLibrary took {(time.time() - start)/60} minutes"
    )
    return reduced_lib
Ejemplo n.º 13
0
def main(mzml=None):
    """
    Simple script as template for quantification using pyQms.

    Use e.g. the BSA1.mzML example file. Please download it first using
    'get_example_BSA_file.py'

    Usage:
        ./quantify_mzml.py  mzml_file

    Note:

        The peptides under molecules are BSA peptides.

    """
    molecules = ['HLVDEPQNLIK', 'YICDNQDTISSK', 'DLGEEHFK']
    charges = [2, 3, 4, 5]
    metabolic_labels = None
    fixed_labels = None

    lib = pyqms.IsotopologueLibrary(
        molecules=molecules,
        charges=charges,
        metabolic_labels=metabolic_labels,
        fixed_labels=fixed_labels,
        # params           = params,
        verbose=True)
    run = pymzml.run.Reader(mzml,
                            extraAccessions=[('MS:1000016',
                                              ['value', 'unitName'])],
                            obo_version='1.1.0')
    mzml_basename = os.path.basename(mzml)
    results = None
    for spectrum in run:
        scan_time, unit = spectrum.get('MS:1000016', (None, None))
        if spectrum['ms level'] == 1:
            results = lib.match_all(mz_i_list=spectrum.centroidedPeaks,
                                    file_name=mzml_basename,
                                    spec_id=spectrum['id'],
                                    spec_rt=scan_time,
                                    results=results)
    pickle.dump(results,
                open('{0}_pyQms_results.pkl'.format(mzml_basename), 'wb'))
    return
Ejemplo n.º 14
0
def main(args):
    '''
    Uses a given peptide and charge and calculates and outputs the isotope
    envelope. Further options include metabolic labels and fixed labels.

    usage:

        ./view_isotopologue_overview.py molecule [charge [metabolic labels] [fixed labels]]

    e.g.:

        ./view_isotopologue_overview.py EILCEWRRAR 3 "{'15N' :[0,0.1]}" "{'R' :['C(-6) 13C(6)',''],'C':['C(1)O(2)','']}"

    Minimally a peptide and charge is required!

    '''

    molecule = sys.argv[1]
    charges = [1]
    metabolic_labels = None
    fixed_labels = None
    if len(sys.argv) >= 3:
        charges = [
            int(sys.argv[2]),
        ]
        if len(sys.argv) >= 4:
            metabolic_labels = eval(sys.argv[3])
            if len(sys.argv) >= 5:
                fixed_labels = eval(sys.argv[4])

    lib = pyqms.IsotopologueLibrary(molecules=[molecule],
                                    charges=charges,
                                    metabolic_labels=metabolic_labels,
                                    fixed_labels=fixed_labels,
                                    params={'LOWER_MZ_LIMIT': 0})

    for formula in lib.keys():
        for charge in charges:
            lib.print_overview(formula, charge=charge)
Ejemplo n.º 15
0
def main(args):
    """
    Uses a given peptide and charge and returns the monoisotopic mz, i.e.
    postion 0 in the isotope envelope.

    usage:

        ./get_monoisotopic_mz.py <molecule> <charge>

    e.g.:

        ./get_monoisotopic_mz.py EILCEWRRAR 3

    """

    molecule = sys.argv[1]
    charge = int(sys.argv[2])

    lib = pyqms.IsotopologueLibrary(
        molecules=[molecule],
        charges=[charge],
        metabolic_labels=None,
        fixed_labels=None,
        verbose=False,
    )

    for formula in lib.keys():
        print(
            "Peptide {0} with formula {1} has a monoisotopic m/z of {2} @ charge {3}"
            .format(
                molecule,
                formula,
                lib[formula]["env"][(("N", "0.000"), )][charge]["mz"][0],
                charge,
            ))
    return
Ejemplo n.º 16
0
def main(ident_file=None, mzml_file=None):
    """

    Script to automatically parse `Ursgal`_ result files and quantify it via
    pyQms.

    For evidence files with molecules with Caramidomethylation as fixed
    modification. These mode will be stripped from the molecules. This is
    important if an metabolic label (like 15N) is applied. This ensures that the
    nitrogens pools of the peptides (which are 15N labeled) do not mix up with
    the nitrogen pool of the Carbamidomethylation (14N since intriduced during
    sample preparation). Please refer to Documenation of :doc:`adaptors` for
    further information.

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./parse_ident_file_and_quantify_with_carbamidomethylation.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    """

    # define the fixed label for Caramidomethyl
    tmp_fixed_labels = {
        "C": [{
            "element_composition": {
                "O": 1,
                "H": 3,
                "14N": 1,
                "C": 2
            },
            "evidence_mod_name": "Carbamidomethyl",
        }]
    }

    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels=tmp_fixed_labels, evidence_files=[ident_file])

    params = {
        "molecules": molecule_list,
        "charges": [1, 2, 3, 4, 5],
        "metabolic_labels": {
            "15N": [0]
        },
        "fixed_labels": formatted_fixed_labels,
        "verbose": True,
        "evidences": evidence_lookup,
    }

    lib = pyqms.IsotopologueLibrary(**params)

    run = pymzml.run.Reader(mzml_file)
    out_folder = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id = spectrum["id"]
        try:
            # pymzML 2.0.0 style
            scan_time, unit = spectrum.scan_time
            if "unit" == "minute":
                scan_time /= 60.0
        except:
            # scan time will be in seconds
            scan_time = spectrum.get("MS:1000016") / 60.0
        if spectrum["ms level"] == 1:
            results = lib.match_all(
                mz_i_list=spectrum.centroidedPeaks,
                file_name=mzml_file_basename,
                spec_id=spectrum["id"],
                spec_rt=scan_time,
                results=results,
            )

    pickle.dump(
        results,
        open(
            os.path.join(out_folder,
                         "{0}_pyQms_results.pkl".format(mzml_file_basename)),
            "wb",
        ),
    )
    return
Ejemplo n.º 17
0
Archivo: run.py Proyecto: JB-MS/SugarPy
    def quantify(
        self,
        molecule_name_dict=None,
        rt_window=None,
        ms_level=1,
        charges=None,
        params=None,
        pkl_name='',
        mzml_file=None,
        spectra=None,
        return_all=False,
        collect_precursor=False,
        force=False,
    ):
        '''
        Quantify a list of molecules in a given mzML file using pyQms.
        Quantification is done by default on MS1 level and can be specified
        for a retention time window.

        Keyword Arguments:
            molecule_name_dict (dict): contains for the molecules that should be quantified
                as hill notations (keys) a list of corresponding trivial names (values)
            rt_window (dict): optional argument to define a retention time window 
                in which the molecules are quantified (use 'min' and 'max' as keys in the dict)
            ms_level: MS level for which quantification should be performed
            charges (list): list of charge states that are quantified
            params (dict): pyQms parameters (see pyQms manual for further information)
            pkl_name (str): name of the result pickle containing the pyQms results
            mzml_file (str): path to the mzML file used for the quantification
            spectra (list): optional list of spectrum IDs that should be quantified
            return_all (bool): if True, in addition to the results pkl, the IsotopologueLibrary
                as well as the spectrum peaks are returned. This should only be used for 
                a single spectrum.

        Returns:
            str: path to the results pickle
        '''
        # quantify the shizznit
        print('[ SugarPy  ] Quantification for {0} molecules.'.format(
            len(molecule_name_dict)
        ))
        if os.path.exists(pkl_name) is False \
                or force or return_all or collect_precursor:

            molecules = []
            trivial_names = {}
            for formula in molecule_name_dict.keys():
                molecules.append('+{0}'.format(formula))
                trivial_names[
                    '+{0}'.format(formula)] = molecule_name_dict[formula]
            lib = pyqms.IsotopologueLibrary(
                molecules=molecules,
                charges=charges,
                metabolic_labels=None,
                fixed_labels=None,
                verbose=False,
                trivial_names=trivial_names,
                params=params
            )
            run = pymzml.run.Reader(
                mzml_file,
                # extraAccessions = [
                #     ('MS:1000016', ['value', 'unitName'])
                # ],
                # obo_version = '1.1.0'
            )
            results = None
            peaks = []
            precursor_to_rt_id = {}
            for n, spectrum in enumerate(run):
                if n % 100 == 0:
                    print(
                        '[ SugarPy  ] Processing spectrum number: {0}'.format(
                            n,
                        ),
                        end='\r'
                    )
                if collect_precursor and spectrum.ms_level >= 2:
                    rt = float(spectrum.scan_time_in_minutes())
                    selected_precursors = spectrum.selected_precursors
                    if selected_precursors is not None:
                        for precursor_dict in selected_precursors:
                            precursor_mz = precursor_dict['mz']
                            rounded_precursor_mz = round(precursor_mz, 3)
                            if rounded_precursor_mz not in precursor_to_rt_id.keys():
                                precursor_to_rt_id[rounded_precursor_mz] = []
                            precursor_to_rt_id[rounded_precursor_mz].append(
                                (rt, spectrum.ID))

                if spectrum.ms_level == ms_level:
                    if spectra != None:
                        if spectrum.ID not in spectra:
                            continue
                        # if spectrum['id'] != 7313: # 3136
                        #     continue
                    rt = float(spectrum.scan_time_in_minutes())
                    if rt_window != None:
                        rt_min, rt_max = rt_window
                        if rt < rt_min:
                            continue
                        elif rt > rt_max:
                            break
                    if return_all == True:
                        peaks.append(spectrum.peaks('centroided'))
                    results = lib.match_all(
                        mz_i_list=spectrum.peaks('centroided'),
                        file_name=mzml_file,
                        spec_id=spectrum.ID,
                        spec_rt=rt,
                        results=results
                    )

            results.lookup['formula to evidences'] = {}
            for molecule, formula in results.lookup['molecule to formula'].items():
                if formula not in results.lookup['formula to evidences'].keys():
                    results.lookup['formula to evidences'][formula] = {}
                if molecule not in results.lookup['formula to evidences'][formula].keys():
                    results.lookup['formula to evidences'][formula][molecule] = {
                        'trivial_names' : []
                    }
                if len(results.lookup['formula to trivial name'][formula]) >= 2:
                    print('this should never happen')
                results.lookup['formula to evidences'][ formula ][ molecule ]['trivial_names'] += \
                    results.lookup['formula to trivial name'][formula][0]

            pickle.dump(
                results,
                open(pkl_name, 'wb')
            )
        if return_all:
            # potential memory overkill
            results = pickle.load(
                open(
                    pkl_name,
                    'rb'
                )
            )
            return results, lib, peaks
        elif collect_precursor:
            return pkl_name, precursor_to_rt_id, lib
        else:
            return pkl_name
Ejemplo n.º 18
0
def main(ident_file=None, mzml_file=None):
    '''

    Script to automatically parse `Ursgal`_ result files and quantify it via
    pyQms.

    For evidence files with molecules with Caramidomethylation as fixed
    modification. These mode will be stripped from the molecules. This is
    important if an metabolic label (like 15N) is applied. This ensures that the
    nitrogens pools of the peptides (which are 15N labeled) do not mix up with
    the nitrogen pool of the Carbamidomethylation (14N since intriduced during
    sample preparation). Please refer to Documenation of :doc:`adaptors` for
    further information.

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./parse_ident_file_and_quantify_with_carbamidomethylation.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    '''

    # define the fixed label for Caramidomethyl
    tmp_fixed_labels = {
        'C' : [
            {
                'element_composition' : {'O': 1, 'H': 3, '14N': 1, 'C': 2},
                'evidence_mod_name': 'Carbamidomethyl'
            },
        ]
    }

    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels   = tmp_fixed_labels,
        evidence_files = [ ident_file ],
    )

    params = {
        'molecules'        : molecule_list,
        'charges'          : [1, 2, 3, 4, 5],
        'metabolic_labels' : {'15N' : [0, ]},
        'fixed_labels'     : formatted_fixed_labels,
        'verbose'          : True,
        'evidences'        : evidence_lookup
    }

    lib = pyqms.IsotopologueLibrary( **params )

    run = pymzml.run.Reader(
        mzml_file
    )
    out_folder         = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id = spectrum['id']
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(
                mz_i_list = spectrum.centroidedPeaks,
                file_name = mzml_file_basename,
                spec_id   = spectrum['id'],
                spec_rt   = scan_time,
                results   = results
            )

    pickle.dump(
        results,
        open(
            os.path.join(
                out_folder,
                '{0}_pyQms_results.pkl'.format(
                    mzml_file_basename
                )
            ),
            'wb'
        )
    )
    return
def main(mzml=None):
    """
    Example script fort visualizing the m/z and intensity error, which is the
    basis for the scoring of the matches in pyQms.

    Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum
    from m/z 400 to 500 is used.

    Usage:
        ./visualize_scoring_information.py

    Note:
        This example does not require a reader to access MS spectra, since a
        simnple peak list is used.

    """

    peak_list = [
        (404.2492407565097, 2652.905029296875),
        (405.3003310237508, 4831.56103515625),
        (408.8403673369115, 23153.7109375),
        (409.17476109421705, 10182.2822265625),
        (409.5098740355617, 4770.97412109375),
        (411.17196124490727, 3454.364013671875),
        (413.26627826402705, 6861.84912109375),
        (419.3157903165357, 90201.5625),
        (420.2440507067882, 11098.4716796875),
        (420.31917273788645, 22288.9140625),
        (420.73825281590496, 8159.7099609375),
        (421.2406187369968, 3768.656494140625),
        (427.3787652898548, 5680.43212890625),
        (433.3316647490907, 8430.30859375),
        (434.705984428002, 25924.38671875),
        (435.2080179219357, 11041.2060546875),
        (443.6708762397708, 4081.282470703125),
        (443.69049198141124, 5107.13330078125),
        (443.6974813419733, 9135.3125),
        (443.7112735313511, 2517650.0),
        (443.7282222289076, 5571.26025390625),
        (443.7379762316008, 5227.4033203125),
        (444.1998579474954, 3021.341796875),
        (444.21248374593875, 1156173.75),
        (444.71384916266277, 336326.96875),
        (445.21533524843596, 58547.0703125),
        (445.71700965093, 4182.04345703125),
        (446.1200302053469, 93216.3359375),
        (447.09963627699824, 3806.537109375),
        (447.1169242266495, 59846.37109375),
        (447.3464079857604, 13170.9541015625),
        (448.11566395552086, 9294.5107421875),
        (448.3500303628631, 3213.052490234375),
        (452.1123280000919, 5092.0869140625),
        (461.1934526664677, 4022.537353515625),
        (462.1463969367603, 99732.5),
        (463.14561508666384, 24247.015625),
        (464.1433022096936, 20417.041015625),
        (465.1421080732791, 3222.4052734375),
        (470.1669593722212, 8621.81640625),
        (475.23989190282134, 3369.073974609375),
        (493.27465300375036, 2725.885986328125),
        (496.0077303201583, 8604.0830078125),
    ]
    print("{0:-^100}".format("Library generation"))
    lib = pyqms.IsotopologueLibrary(
        molecules=["DDSPDLPK"],
        charges=[2],
        metabolic_labels=None,
        fixed_labels=None,
        verbose=True,
    )
    print("{0:-^100}".format("Library generation"))

    results = lib.match_all(
        mz_i_list=peak_list,
        file_name="BSA_test",
        spec_id=1165,
        spec_rt=29.10,
        results=None,
    )
    for key, i, entry in results.extract_results():
        p = pymzml.plot.Factory()
        label_mz_error = []
        label_i_error = []
        measured_peaks = []
        matched_peaks = []
        peak_info = ddict(list)
        # pprint.pprint(entry.peaks)
        for (
                measured_mz,
                measured_intensity,
                relative_i,
                calculated_mz,
                calculated_intensity,
        ) in entry.peaks:
            if measured_mz is not None:
                measured_peaks.append((measured_mz, measured_intensity))
                matched_peaks.append(
                    (calculated_mz,
                     calculated_intensity * entry.scaling_factor))
                mz_error = (measured_mz - calculated_mz) / (measured_mz * 1e-6)
                label_mz_error.append(
                    (calculated_mz, "{0:5.3f} ppm m/z error".format(mz_error)))
                scaled_intensity = calculated_intensity * entry.scaling_factor
                rel_i_error = (abs(measured_intensity - scaled_intensity) /
                               scaled_intensity)

                peak_info["measured peaks"].append(measured_mz)
                peak_info["theoretical peaks"].append(calculated_mz)
                peak_info["relative intensity"].append(relative_i)
                peak_info["scaled matched peaks"].append(calculated_intensity *
                                                         entry.scaling_factor)
                peak_info["mz error"].append(mz_error)
                peak_info["i error"].append(rel_i_error)

                if rel_i_error > 1:
                    rel_i_error = 1

                label_i_error.append(
                    (calculated_mz,
                     "{0:5.3f} rel. intensity error".format(rel_i_error)))

        mz_only = [n[0] for n in measured_peaks]
        mz_range = [min(mz_only) - 1, max(mz_only) + 1]
        peptide = results.lookup["formula to molecule"][key.formula][0]
        p.newPlot(
            header=
            "Formula: {0}; Peptide: {1}; Charge: {2}\n Amount: {3:1.3f}; Score: {4:1.3f}"
            .format(key.formula, peptide, key.charge, entry.scaling_factor,
                    entry.score),
            mzRange=mz_range,
        )
        p.add(measured_peaks, color=(0, 0, 0), style="sticks")
        p.add(matched_peaks, color=(0, 200, 0), style="triangles")
        p.add(label_mz_error, color=(255, 0, 0), style="label_x")
        p.add(label_i_error, color=(255, 0, 0), style="label_x")

        plot_name = os.path.join(
            os.pardir,
            "data",
            "Score_visualization_Peptide_{1}_Charge_{2}.xhtml".format(
                key.file_name, peptide, key.charge),
        )
        p.save(filename=plot_name, mzRange=mz_range)
        print("Plotted file {0}".format(plot_name))
        # print(entry)
        print("Match info")
        for key, value_list in sorted(peak_info.items()):
            print(key)
            print("[{0}]".format(",".join([str(n) for n in value_list])))
            print()
    return
Ejemplo n.º 20
0
def generate_result_pickle(mzml_files,
                           fixed_labels,
                           molecules,
                           evidence_files,
                           min_charge,
                           max_charge,
                           label,
                           ms_level,
                           label_percentile,
                           evidence_score_field=None,
                           mz_score_percentile=0.4,
                           trivial_names=None,
                           pyqms_params=None,
                           verbose=True):
    """DOCSTRING."""
    if isinstance(mzml_files, str):
        mzml_files = [mzml_files]
    print('[ -ENGINE- ] Parse Evidences')
    fixed_labels, evidences, molecules = pyqms.adaptors.parse_evidence(
        fixed_labels=fixed_labels,
        evidence_files=evidence_files,
        molecules=molecules,
        evidence_score_field=evidence_score_field)

    params = {
        'molecules': molecules,
        'charges': [x for x in range(min_charge, max_charge + 1)],
        'params': pyqms_params,
        'metabolic_labels': {
            label: label_percentile,
        },
        'trivial_names': trivial_names,
        'fixed_labels': fixed_labels,
        'verbose': verbose,
        'evidences': evidences
    }
    print('[ -ENGINE- ] Set up Isotopolugue Library')
    lib = pyqms.IsotopologueLibrary(**params)

    print('[ -ENGINE- ] Matching isotopologues to spectra ..')
    results = None
    for mzml_file in mzml_files:
        run = pymzml.run.Reader(mzml_file,
                                obo_version='1.1.0',
                                extraAccessions=[('MS:1000016',
                                                  ['value', 'unitName'])])

        mzml_file_basename = os.path.basename(mzml_file)
        for n, spec in enumerate(run):
            if spec['id'] == 'TIC':
                break
            if n % 100 == 0:
                print('[ -ENGINE- ] File : {0:^40} : '
                      'Processing spectrum {1}'.format(
                          mzml_file_basename,
                          n,
                      ),
                      end='\r')
            scan_time, unit = spec.scan_time

            if unit == 'second':
                scan_time /= 60
            elif unit != 'minute':
                print('''
                    [Warning] The retention time unit is not recognized or not specified.
                    [Warning] It is assumed to be minutes and continues with that.
                ''')

            if spec['ms level'] == ms_level:
                results = lib.match_all(mz_i_list=spec.centroidedPeaks,
                                        file_name=mzml_file_basename,
                                        spec_id=spec['id'],
                                        spec_rt=scan_time,
                                        results=results)
        print()
    return results
Ejemplo n.º 21
0
        'input': [(999, 0.001), (1001, 0.001)],
        'output': 3
    },
    {
        'input': [(999, 0.001), (999, 0.001)],
        'output': 2
    },
    {
        'input': [(1006, 0.001), (1006, 0.001)],
        'output': 2
    },
]

MOLECULES = ['TEST']
CHARGES = [2]
lib = pyqms.IsotopologueLibrary(molecules=MOLECULES, charges=CHARGES)


def extend_kb_with_fixed_labels_test():
    for test_dict in TESTS:
        yield checker_function, test_dict


def checker_function(test_dict):
    #with a higher second tuple value, bisect selects the next position...
    assert len(lib._slice_list(SPECTRUM,
                               test_dict['input'])) == test_dict['output']
    return


class TestResults(unittest.TestCase):
Ejemplo n.º 22
0
def main(ident_file=None, mzml_file=None):
    """
    Examples script to demonstrate a (example) workflow from mzML files to
    peptide abundances. Will plot for every quantified peptide a matched
    isotopologue chromatogram (MIC). The plots include RT windows, maximum
    amount in RT window and identification RT(s).

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./complete_BSA_quantification.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    Note:
        rpy2 is required for all plotting

    """

    # define the fixed label for Carbamidomethyl
    tmp_fixed_labels = {
        "C": [{
            "element_composition": {
                "O": 1,
                "H": 3,
                "14N": 1,
                "C": 2
            },
            "evidence_mod_name": "Carbamidomethyl",
        }]
    }
    if ident_file.upper().endswith("MZTAB"):
        evidence_score_field = "search_engine_score[1]"
    else:
        # this is the default value in the adaptor
        evidence_score_field = "PEP"

    print('Evidence score field "{0}" will be used.'.format(
        evidence_score_field))
    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels=tmp_fixed_labels,
        evidence_files=[ident_file],
        evidence_score_field=evidence_score_field,
    )

    params = {
        "molecules": molecule_list,
        "charges": [1, 2, 3, 4, 5],
        "metabolic_labels": {
            "15N": [0]
        },
        "fixed_labels": formatted_fixed_labels,
        "verbose": True,
        "evidences": evidence_lookup,
    }

    lib = pyqms.IsotopologueLibrary(**params)

    run = pymzml.run.Reader(mzml_file)
    out_folder = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id = spectrum["id"]
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get("MS:1000016")
        if spectrum["ms level"] == 1:
            results = lib.match_all(
                mz_i_list=spectrum.centroidedPeaks,
                file_name=mzml_file_basename,
                spec_id=spectrum["id"],
                spec_rt=scan_time,
                results=results,
            )
    # print(results)
    out_folder = os.path.join(os.path.dirname(ident_file),
                              "complete_BSA_quantification")
    if os.path.exists(out_folder) is False:
        os.mkdir(out_folder)
    print()
    print("All results go into folder: {0}".format(out_folder))
    rt_border_tolerance = 1
    quant_summary_file = os.path.join(
        out_folder, "complete_BSA_quantification_summary.xlsx")
    results.write_rt_info_file(
        output_file=quant_summary_file,
        list_of_csvdicts=None,
        trivial_name_lookup=None,
        rt_border_tolerance=rt_border_tolerance,
        update=True,
    )
    calculated_amounts = results.calc_amounts_from_rt_info_file(
        rt_info_file=quant_summary_file,
        rt_border_tolerance=rt_border_tolerance,
        calc_amount_function=None,  # calc_amount_function
    )
    # print(calculated_amounts)
    formula_charge_to_quant_info = {}
    for line_dict in calculated_amounts:
        formula_charge_to_quant_info[(line_dict["formula"],
                                      int(line_dict["charge"]))] = {
                                          "rt":
                                          line_dict["max I in window (rt)"],
                                          "amount":
                                          line_dict["max I in window"],
                                          "rt start": line_dict["start (min)"],
                                          "rt stop": line_dict["stop (min)"],
                                          "evidence_rts": [],
                                      }
        if (len(formula_charge_to_quant_info[(
                line_dict["formula"],
                int(line_dict["charge"]))]["evidence_rts"]) == 0):
            for ev_string in line_dict["evidences (min)"].split(";"):
                formula_charge_to_quant_info[(
                    line_dict["formula"],
                    int(line_dict["charge"]))]["evidence_rts"].append(
                        round(float(ev_string.split("@")[1]), 2))
    import_ok = False
    try:
        import rpy2

        import_ok = True
    except:
        pass
    if import_ok:
        print(
            "Plotting results plot including RT windows, abundances and identifications"
        )
        for key in results.keys():
            short_key = (key.formula, key.charge)

            match_list = results[key]["data"]
            if len(match_list) < 15:
                continue
            file_name = os.path.join(
                out_folder,
                "MIC_2D_{0}_{1}.pdf".format(
                    "_".join(
                        results.lookup["formula to molecule"][key.formula]),
                    key.charge,
                ),
            )
            graphics, grdevices = results.init_r_plot(file_name)

            ablines = {
                key: [
                    {
                        "v": formula_charge_to_quant_info[short_key]["rt"],
                        "lty": 2
                    },
                    {
                        "v":
                        formula_charge_to_quant_info[short_key]["rt start"],
                        "lty": 2,
                        "col": "blue",
                    },
                    {
                        "v":
                        formula_charge_to_quant_info[short_key]["rt stop"],
                        "lty": 2,
                        "col": "blue",
                    },
                ]
            }
            # print(formula_charge_to_quant_info[short_key])
            additional_legends = {
                key: [
                    {
                        "x":
                        formula_charge_to_quant_info[short_key]["rt"],
                        "y":
                        formula_charge_to_quant_info[short_key]["amount"],
                        "text":
                        "max intensity: {0:1.3e}".format(
                            formula_charge_to_quant_info[short_key]["amount"]),
                        "pos":
                        3,  # above
                    },
                    {
                        "x":
                        formula_charge_to_quant_info[short_key]["rt start"],
                        "y":
                        formula_charge_to_quant_info[short_key]["amount"] / 2,
                        "text": "RT Window start",
                        "pos": 4,  # right
                        "col": "blue",
                    },
                    {
                        "x":
                        formula_charge_to_quant_info[short_key]["rt stop"],
                        "y":
                        formula_charge_to_quant_info[short_key]["amount"] / 2,
                        "text": "RT window stop",
                        "pos": 2,  # left,
                        "col": "blue",
                    },
                ]
            }

            for evidence_rt in formula_charge_to_quant_info[short_key][
                    "evidence_rts"]:
                ablines[key].append({
                    "v": evidence_rt,
                    "lwd": 0.5,
                    "col": "purple"
                })
                additional_legends[key].append({
                    "x": evidence_rt,
                    "y": 0,
                    "lwd": 0.5,
                    "col": "purple",
                    "text": "MS2 ident",
                    "pos": 4,
                    "srt": 45,  # rotate label
                })

            results.plot_MICs_2D(
                [key],
                file_name=None,
                rt_window=None,
                i_transform=None,
                xlimits=[
                    formula_charge_to_quant_info[short_key]["rt start"] - 0.05,
                    formula_charge_to_quant_info[short_key]["rt stop"] + 0.05,
                ],
                additional_legends=additional_legends,
                title=None,
                zlimits=None,
                ablines=ablines,
                graphics=graphics,
            )
            print("Plottted {0}".format(file_name))

    return
Ejemplo n.º 23
0
def main(ident_file = None, mzml_file = None):
    '''
    Examples script to demonstrate a (example) workflow from mzML files to
    peptide abundances. Will plot for every quantified peptide a matched
    isotopologue chromatogram (MIC). The plots include RT windows, maximum
    amount in RT window and identification RT(s).

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./complete_BSA_quantification.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    Note:
        rpy2 is required for all plotting

    '''

    # define the fixed label for Carbamidomethyl
    tmp_fixed_labels = {
        'C' : [
            {
                'element_composition' : {'O': 1, 'H': 3, '14N': 1, 'C': 2},
                'evidence_mod_name': 'Carbamidomethyl'
            },
        ]
    }
    if ident_file.upper().endswith('MZTAB'):
        evidence_score_field  = 'search_engine_score[1]'
    else:
        # this is the default value in the adaptor
        evidence_score_field = 'PEP'

    print(
        'Evidence score field "{0}" will be used.'.format(
            evidence_score_field
        )
    )
    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels         = tmp_fixed_labels,
        evidence_files       = [ ident_file ],
        evidence_score_field = evidence_score_field
    )

    params = {
        'molecules'        : molecule_list,
        'charges'          : [1, 2, 3, 4, 5],
        'metabolic_labels' : {'15N' : [0, ]},
        'fixed_labels'     : formatted_fixed_labels,
        'verbose'          : True,
        'evidences'        : evidence_lookup
    }

    lib = pyqms.IsotopologueLibrary( **params )

    run = pymzml.run.Reader(
        mzml_file
    )
    out_folder         = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id   = spectrum['id']
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(
                mz_i_list = spectrum.centroidedPeaks,
                file_name = mzml_file_basename,
                spec_id   = spectrum['id'],
                spec_rt   = scan_time,
                results   = results
            )
    # print(results)
    out_folder = os.path.join(
        os.path.dirname(ident_file),
        'complete_BSA_quantification'
    )
    if os.path.exists(out_folder) is False:
        os.mkdir(out_folder)
    print()
    print('All results go into folder: {0}'.format(out_folder))
    rt_border_tolerance = 1
    quant_summary_file  = os.path.join(
        out_folder,
        'complete_BSA_quantification_summary.xlsx',
    )
    results.write_rt_info_file(
        output_file         = quant_summary_file,
        list_of_csvdicts    = None,
        trivial_name_lookup = None,
        rt_border_tolerance = rt_border_tolerance,
        update              = True
    )
    calculated_amounts = results.calc_amounts_from_rt_info_file(
        rt_info_file         = quant_summary_file,
        rt_border_tolerance  = rt_border_tolerance,
        calc_amount_function = None, # calc_amount_function
    )
    # print(calculated_amounts)
    formula_charge_to_quant_info = {}
    for line_dict in calculated_amounts:
        formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ] = {
            'rt'           : line_dict['max I in window (rt)'],
            'amount'       : line_dict['max I in window'],
            'rt start'     : line_dict['start (min)'],
            'rt stop'      : line_dict['stop (min)'],
            'evidence_rts' : [],
        }
        if len(formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ]['evidence_rts']) == 0:
            for ev_string in line_dict['evidences (min)'].split(';'):
                formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ]['evidence_rts'].append(
                    round( float( ev_string.split('@')[1] ), 2 )
                )
    import_ok = False
    try:
        import rpy2
        import_ok = True
    except:
        pass
    if import_ok:
        print('Plotting results plot including RT windows, abundances and identifications')
        for key in results.keys():
            short_key = ( key.formula, key.charge )

            match_list = results[key]['data']
            if len(match_list) < 15:
                continue
            file_name = os.path.join(
                out_folder ,
                'MIC_2D_{0}_{1}.pdf'.format(
                    '_'.join(
                        results.lookup['formula to molecule'][ key.formula ]
                    ),
                    key.charge,
                )
            )
            graphics, grdevices = results.init_r_plot(file_name)

            ablines = {
                key : [
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt'],
                        'lty' : 2
                    },
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt start'],
                        'lty' : 2,
                        'col' : 'blue'
                    },
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt stop'],
                        'lty' : 2,
                        'col' : 'blue'
                    },
                ]
            }
            # print(formula_charge_to_quant_info[short_key])
            additional_legends = {
                key : [
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'],
                        'text' : 'max intensity: {0:1.3e}'.format(
                            formula_charge_to_quant_info[short_key]['amount'],
                        ),
                        'pos'  : 3 # above
                    },
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt start'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'] / 2,
                        'text' : 'RT Window start',
                        'pos'  : 4, # right
                        'col'  : 'blue'
                    },
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt stop'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'] / 2,
                        'text' : 'RT window stop',
                        'pos'  : 2, # left,
                        'col'  : 'blue'
                    },
                ]
            }

            for evidence_rt in formula_charge_to_quant_info[short_key]['evidence_rts']:
                ablines[key].append(
                    {
                        'v'   : evidence_rt,
                        'lwd' : 0.5,
                        'col' : 'purple',
                    }
                )
                additional_legends[key].append(
                    {
                        'x'    : evidence_rt,
                        'y'    : 0,
                        'lwd'  : 0.5,
                        'col'  : 'purple',
                        'text' : 'MS2 ident',
                        'pos'  : 4,
                        'srt'  : 45 # rotate label
                    }
                )

            results.plot_MICs_2D(
                [key],
                file_name          = None,
                rt_window          = None,
                i_transform        = None,
                xlimits            = [
                    formula_charge_to_quant_info[short_key]['rt start']-0.05,
                    formula_charge_to_quant_info[short_key]['rt stop']+0.05,

                ],
                additional_legends = additional_legends,
                title              = None,
                zlimits            = None,
                ablines            = ablines,
                graphics           = graphics
            )
            print(
                'Plottted {0}'.format(file_name)
            )

    return
Ejemplo n.º 24
0
#!/usr/bin/env python
# encoding: utf-8

import pyqms

lib = pyqms.IsotopologueLibrary(molecules=["PAINLESS"], charges=[2])

TESTS = [
    {
        "input": [("X", 2), ("Y", 2)],
        "output": [
            [("X", 0), ("Y", 0)],
            [("X", 0), ("Y", 1)],
            [("X", 1), ("Y", 0)],
            [("X", 1), ("Y", 1)],
        ],
    },
    {
        "input": [("X", 1), ("Y", 1), ("Z", 1)],
        "output": [[("X", 0), ("Y", 0), ("Z", 0)]],
    },
    {
        "input": [("X", 1), ("Y", 2), ("Z", 1)],
        "output": [[("X", 0), ("Y", 0), ("Z", 0)],
                   [("X", 0), ("Y", 1), ("Z", 0)]],
    },
]


def generic_test():
    for test_dict in TESTS:
Ejemplo n.º 25
0
def generate_result_pickle(
    mzml_files,
    fixed_labels,
    molecules,
    evidence_files,
    min_charge,
    max_charge,
    label,
    ms_level,
    label_percentile,
    evidence_score_field=None,
    mz_score_percentile=0.4,
    trivial_names=None,
    pyqms_params=None,
    verbose=True,
):
    """DOCSTRING."""
    if isinstance(mzml_files, str):
        mzml_files = [mzml_files]
    print("[ -ENGINE- ] Parse Evidences")
    fixed_labels, evidences, molecules = pyqms.adaptors.parse_evidence(
        fixed_labels=fixed_labels,
        evidence_files=evidence_files,
        molecules=molecules,
        evidence_score_field=evidence_score_field,
    )

    params = {
        "molecules": molecules,
        "charges": [x for x in range(min_charge, max_charge + 1)],
        "params": pyqms_params,
        "metabolic_labels": {
            label: label_percentile,
        },
        "trivial_names": trivial_names,
        "fixed_labels": fixed_labels,
        "verbose": verbose,
        "evidences": evidences,
    }
    print("[ -ENGINE- ] Set up Isotopolugue Library")
    lib = pyqms.IsotopologueLibrary(**params)

    print("[ -ENGINE- ] Matching isotopologues to spectra ..")
    results = None
    for mzml_file in mzml_files:
        run = pymzml.run.Reader(
            mzml_file,
            obo_version="1.1.0",
            extraAccessions=[("MS:1000016", ["value", "unitName"])],
        )

        mzml_file_basename = os.path.basename(mzml_file)
        for n, spec in enumerate(run):
            if spec["id"] == "TIC":
                break
            if n % 100 == 0:
                print(
                    "[ -ENGINE- ] File : {0:^40} : "
                    "Processing spectrum {1}".format(
                        mzml_file_basename,
                        n,
                    ),
                    end="\r",
                )
            scan_time, unit = spec.scan_time

            if unit == "second":
                scan_time /= 60
            elif unit != "minute":
                print("""
                    [Warning] The retention time unit is not recognized or not specified.
                    [Warning] It is assumed to be minutes and continues with that.
                """)

            if spec["ms level"] == ms_level:
                results = lib.match_all(
                    mz_i_list=spec.centroidedPeaks,
                    file_name=mzml_file_basename,
                    spec_id=spec["id"],
                    spec_rt=scan_time,
                    results=results,
                )
        print()
    return results
Ejemplo n.º 26
0
def main(ident_file=None, mzml_file=None):
    """
    Script to automatically parse `Ursgal`_ result files and quantify it via
    pyQms. Please refer to Documenation of :doc:`adaptors` for further
    information.

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./parse_ident_file_and_quantify.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    """

    if ident_file.upper().endswith('MZTAB'):
        evidence_score_field = 'search_engine_score[1]'
    else:
        # this is the default value in the adaptor
        evidence_score_field = 'PEP'

    print('Evidence score field "{0}" will be used.'.format(
        evidence_score_field))

    fixed_labels, evidences, molecules = pyqms.adaptors.parse_evidence(
        fixed_labels=None,
        evidence_files=[ident_file],
        evidence_score_field=evidence_score_field)

    params = {
        'molecules': molecules,
        'charges': [1, 2, 3, 4, 5],
        'metabolic_labels': {
            '15N': [0]
        },
        'fixed_labels': fixed_labels,
        'verbose': True,
        'evidences': evidences
    }

    lib = pyqms.IsotopologueLibrary(**params)

    run = pymzml.run.Reader(mzml_file)
    out_folder = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(mz_i_list=spectrum.centroidedPeaks,
                                    file_name=mzml_file_basename,
                                    spec_id=spectrum['id'],
                                    spec_rt=scan_time,
                                    results=results)
    pickle.dump(
        results,
        open(
            os.path.join(out_folder,
                         '{0}_pyQms_results.pkl'.format(mzml_file_basename)),
            'wb'))
    return
Ejemplo n.º 27
0
#!/usr/bin/env python3.4
# encoding: utf-8

import pyqms

lib = pyqms.IsotopologueLibrary(
    molecules=[
        'PAINLESS',
    ],
    charges=[
        2,
    ],
)

TESTS = [
    {
        'input': [('X', 2), ('Y', 2)],
        'output': [[('X', 0), ('Y', 0)], [('X', 0), ('Y', 1)],
                   [('X', 1), ('Y', 0)], [('X', 1), ('Y', 1)]]
    },
    {
        'input': [('X', 1), ('Y', 1), ('Z', 1)],
        'output': [
            [('X', 0), ('Y', 0), ('Z', 0)],
        ]
    },
    {
        'input': [('X', 1), ('Y', 2), ('Z', 1)],
        'output': [
            [('X', 0), ('Y', 0), ('Z', 0)],
            [('X', 0), ('Y', 1), ('Z', 0)],
Ejemplo n.º 28
0
def transformation_mz(test_id, test_dict):
    lib = pyqms.IsotopologueLibrary(molecules=['KLEINERTEST'], charges=[2])
    lib.params.update(test_dict['params'])
    tmp = lib._transform_mz_to_set(test_dict['i'])
    assert len(tmp) == test_dict['o']
Ejemplo n.º 29
0
def main(mzml=None):
    """
    Example script as template for most basic usage of quantification using
    pyQms.

    Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum
    from m/z 400 to 500 is used.

    Usage:
        ./basic_quantification_example.py

    Note:
        This example does not require a reader to access ms spectra, since a
        simnple peak liost is used.

    """

    peak_list = [
        (404.2492407565097, 2652.905029296875),
        (405.3003310237508, 4831.56103515625),
        (408.8403673369115, 23153.7109375),
        (409.17476109421705, 10182.2822265625),
        (409.5098740355617, 4770.97412109375),
        (411.17196124490727, 3454.364013671875),
        (413.26627826402705, 6861.84912109375),
        (419.3157903165357, 90201.5625),
        (420.2440507067882, 11098.4716796875),
        (420.31917273788645, 22288.9140625),
        (420.73825281590496, 8159.7099609375),
        (421.2406187369968, 3768.656494140625),
        (427.3787652898548, 5680.43212890625),
        (433.3316647490907, 8430.30859375),
        (434.705984428002, 25924.38671875),
        (435.2080179219357, 11041.2060546875),
        (443.6708762397708, 4081.282470703125),
        (443.69049198141124, 5107.13330078125),
        (443.6974813419733, 9135.3125),
        (443.7112735313511, 2517650.0),
        (443.7282222289076, 5571.26025390625),
        (443.7379762316008, 5227.4033203125),
        (444.1998579474954, 3021.341796875),
        (444.21248374593875, 1156173.75),
        (444.71384916266277, 336326.96875),
        (445.21533524843596, 58547.0703125),
        (445.71700965093, 4182.04345703125),
        (446.1200302053469, 93216.3359375),
        (447.09963627699824, 3806.537109375),
        (447.1169242266495, 59846.37109375),
        (447.3464079857604, 13170.9541015625),
        (448.11566395552086, 9294.5107421875),
        (448.3500303628631, 3213.052490234375),
        (452.1123280000919, 5092.0869140625),
        (461.1934526664677, 4022.537353515625),
        (462.1463969367603, 99732.5),
        (463.14561508666384, 24247.015625),
        (464.1433022096936, 20417.041015625),
        (465.1421080732791, 3222.4052734375),
        (470.1669593722212, 8621.81640625),
        (475.23989190282134, 3369.073974609375),
        (493.27465300375036, 2725.885986328125),
        (496.0077303201583, 8604.0830078125),
    ]
    print('{0:-^100}'.format('Library generation'))
    lib = pyqms.IsotopologueLibrary(molecules=['DDSPDLPK'],
                                    charges=[2],
                                    metabolic_labels=None,
                                    fixed_labels=None,
                                    verbose=True)
    print('{0:-^100}'.format('Library generation'))

    results = lib.match_all(mz_i_list=peak_list,
                            file_name='BSA_test',
                            spec_id=1165,
                            spec_rt=29.10,
                            results=None)
    print()
    print('{0:-^100}'.format('Results summary'))
    for key in results.keys():
        peptide = results.lookup['formula to molecule'][key.formula][0]
        print(
            'For Peptide {0} with formula {1} and charge {2} the following match could be made:'
            .format(peptide, key.formula, key.charge))
        for match in results[key]['data']:
            print(
                '\tAmount {0:1.2f} (scaling_factor) was detected with a matching score of {1:1.2f}'
                .format(match.scaling_factor, match.score))
            print('\tThe follwowing peaks have been matched:')
            for measured_mz, measured_intensity, relative_i, calculated_mz, calculated_intensity in match.peaks:
                print('\t\t{0:1.6f} m/z @ {1:1.2e} intensity'.format(
                    measured_mz, measured_intensity))
    print('{0:-^100}'.format('Results summary'))
    return
Ejemplo n.º 30
0
    # scaling times 3 and last peak is ignored with scaling of 0.
    (0.0, 0.0) : [( 1.1, 3, 0.9, 1, 1 ),   (0.55, 3, 0.2, 0.5, 1)],
    # bad match
    (0.5, 0.0) : [( 1, 0, 0.9, 1, 1 ),   (0.5, 0, 0.2, 0.5, 1)],
    # perfect mz match and no i match, thus no scaling and no i score
    (0.5, 5.0) : [( 1, 5, 0.9, 0.5, 1 ),   (0.5, 5, 0.2, 0.2, 1)],
    # # perfect i match and no mz match, thus scaling but no mz score
    (1.0, 6.0) : [( 1, 6, 1.0, 1, 1 ),   (0.5, 6, 1.0, 0.5, 1), (None, None, 0.0, 0.5, 0.2)],
    # scaling and score perfect - None are ignored
}


LIB = pyqms.IsotopologueLibrary(
    molecules = ['ELVISLIVES'],
    charges   = [2],
    verbose   = False,
    params    = {
        'MZ_SCORE_PERCENTILE' : 0.5,
    }
)


def score_test():
    for (expected_score, expected_scaling), matched_peaks in MATCHED_PEAKS.items():
        yield check_score, expected_score, matched_peaks


def scaling_test():
    for (expected_score, expected_scaling), matched_peaks in MATCHED_PEAKS.items():
        yield check_scaling, expected_scaling, matched_peaks