Exemple #1
0
def test0():
    fh = pyopenms.MzXMLFile()
    here = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(here, "test2.mzXML").encode()

    class Consumer(object):
        def __init__(self):
            self.speclevels = []
            self.rts = []

        def consumeSpectrum(self, spec):
            self.speclevels.append(spec.getMSLevel())
            self.rts.append(spec.getRT())

        def consumeChromatogram(self, chromo):
            raise Exception(
                "should never be called as we have no chromoatograms in example file"
            )

        def setExpectedSize(self, num_specs, num_chromo):
            assert num_specs == 5, num_specs
            assert num_chromo == 0, num_chromo

        def setExperimentalSettings(self, exp):
            assert isinstance(exp, pyopenms.ExperimentalSettings)

    consumer = Consumer()
    fh.transform(path, consumer)
    cc = Counter(consumer.speclevels)
    assert set(cc.keys()) == set([1, 2])
    assert cc[1] == 2
    assert cc[2] == 3
    assert abs(min(consumer.rts) - 4200.76) < 0.01
    assert abs(max(consumer.rts) - 4202.03) < 0.01
Exemple #2
0
 def __get_mzml(self, file):
     b_content = file.bcore
     if self.ext == 'raw':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.RAW',
             directory=self.target_dir.absolute().as_posix()
         )
         self.cmd_msconvert = self.__build_cmd_msconvert()
         self.__run_cmd()
     elif self.ext == 'mzml':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.mzML',
             directory=self.target_dir.absolute().as_posix()
         )
     elif self.ext == 'mzxml':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.mzXML',
             directory=self.target_dir.absolute().as_posix()
         )
         exp = pyopenms.MSExperiment()
         pyopenms.MzXMLFile().load(self.tf.name, exp)
         target_path = self.__get_mzml_path().absolute().as_posix()
         pyopenms.MzMLFile().store(target_path, exp)
Exemple #3
0
def _getFileHandeler(iftype: FileType):
    if iftype == FileType.MZML:
        return pyopenms.MzMLFile()
    elif iftype == FileType.MZXML:
        return pyopenms.MzXMLFile()
    elif iftype == FileType.MS2:
        return MS2File()
    elif iftype == FileType.MGF:
        return MascotGenericFile()
    else:
        raise NotImplementedError('{} not implemented!'.format(iftype.value))
Exemple #4
0
 def get_openms_file_type(self, suffix):
     import pyopenms
     # Define file format and return corresponding pyopenms file object
     if suffix.lower() == '.mzxml':
         return pyopenms.MzXMLFile()
     elif suffix.lower() == '.mzml':
         return pyopenms.MzMLFile()
     elif suffix.lower() == '.mzdata':
         return pyopenms.MzDataFile()
     else:
         print('Data format is not supported!!')
Exemple #5
0
def read_mzml_or_mzxml_impl(path, psms, theoretical, max_delta_ppm, filetype):
    assert filetype in ('mzml', 'mzxml')
    fh = po.MzMLFile() if filetype == 'mzml' else po.MzXMLFile()
    fh.setLogType(po.LogType.CMD)
    input_map = po.MSExperiment()
    fh.load(path, input_map)

    peaks_list = []
    for scan_id, modified_peptide, precursor_charge in psms.itertuples(
            index=None):
        peaks_list.append(
            psm_df(input_map, theoretical, max_delta_ppm, scan_id,
                   modified_peptide, precursor_charge))

    if len(peaks_list) > 0:
        reps = np.array([e[0] for e in peaks_list])
        transitions = pd.DataFrame({
            'fragment':
            np.concatenate([e[1] for e in peaks_list]),
            'product_mz':
            np.concatenate([e[2] for e in peaks_list]),
            'intensity':
            np.concatenate([e[3] for e in peaks_list]),
            'scan_id':
            np.repeat([e[4] for e in peaks_list], reps),
            'precursor_mz':
            np.repeat([e[5] for e in peaks_list], reps),
            'modified_peptide':
            np.repeat([e[6] for e in peaks_list], reps),
            'precursor_charge':
            np.repeat([e[7] for e in peaks_list], reps)
        })
        # Multiple peaks might be identically annotated, only use most intense
        transitions = transitions.groupby([
            'scan_id', 'modified_peptide', 'precursor_charge', 'precursor_mz',
            'fragment', 'product_mz'
        ])['intensity'].max().reset_index()
    else:
        transitions = pd.DataFrame({
            'scan_id': [],
            'modified_peptide': [],
            'precursor_charge': [],
            'precursor_mz': [],
            'fragment': [],
            'product_mz': [],
            'intensity': []
        })
    return (transitions)
def oms_ffmetabo_single_file(filename, max_peaks_per_file=5000):

    feature_map = oms.FeatureMap()
    mass_traces = []
    mass_traces_split = []
    mass_traces_filtered = []
    exp = oms.MSExperiment()
    peak_map = oms.PeakMap()
    options = oms.PeakFileOptions()
    
    options.setMSLevels([1])

    if filename.lower().endswith('.mzxml'):
        fh = oms.MzXMLFile()

    elif filename.lower().endswith('.mzml'):
        fh = oms.MzMLFile()
    else:
        assert False, filename

    fh.setOptions(options)

    # Peak map
    fh.load(filename, exp)

    #for chrom in exp.getChromatograms():
    #    peak_map.addChrom(chrom)

    for spec in exp.getSpectra():
        peak_map.addSpectrum(spec)

    mass_trace_detect = oms.MassTraceDetection()
    mass_trace_detect.run(peak_map, mass_traces, max_peaks_per_file)

    elution_peak_detection = oms.ElutionPeakDetection()
    elution_peak_detection.detectPeaks(mass_traces, mass_traces_split)

    feature_finding_metabo = oms.FeatureFindingMetabo()
    feature_finding_metabo.run(
                mass_traces_split,
                feature_map,
                mass_traces_filtered)

    feature_map.sortByOverallQuality()
    return feature_map
Exemple #7
0
def read_mzml_or_mzxml_impl(path, psms, theoretical, max_delta_ppm, filetype):
	assert filetype in ('mzml', 'mzxml')
	fh = po.MzMLFile() if filetype=='mzml' else po.MzXMLFile()
	fh.setLogType(po.LogType.CMD)
	input_map = po.MSExperiment()
	fh.load(path, input_map)

	peaks_list = []
	for ix, psm in psms.iterrows():
		scan_id = psm['scan_id']
		ionseries = theoretical[psm['modified_peptide']][psm['precursor_charge']]

		spectrum = input_map.getSpectrum(scan_id - 1)

		fragments = []
		product_mzs = []
		intensities = []
		for peak in spectrum:
			fragment, product_mz = annotate_mass(peak.getMZ(), ionseries, max_delta_ppm)
			if fragment is not None:
				fragments.append(fragment)
				product_mzs.append(product_mz)
				intensities.append(peak.getIntensity())

		peaks = pd.DataFrame({'fragment': fragments, 'product_mz': product_mzs, 'intensity': intensities})
		peaks['scan_id'] = scan_id
		peaks['precursor_mz'] = po.AASequence.fromString(po.String(psm['modified_peptide'])).getMonoWeight(po.Residue.ResidueType.Full, psm['precursor_charge']) / psm['precursor_charge'];
		peaks['modified_peptide'] = psm['modified_peptide']
		peaks['precursor_charge'] = psm['precursor_charge']

		# Baseline normalization to highest annotated peak
		max_intensity = np.max(peaks['intensity'])
		if max_intensity > 0:
			peaks['intensity'] = peaks['intensity'] * (10000 / max_intensity)

		peaks_list.append(peaks)

	if len(peaks_list) > 0:
		transitions = pd.concat(peaks_list)
		# Multiple peaks might be identically annotated, only use most intense
		transitions = transitions.groupby(['scan_id','modified_peptide','precursor_charge','precursor_mz','fragment','product_mz'])['intensity'].max().reset_index()
	else:
		transitions = pd.DataFrame({'scan_id': [], 'modified_peptide': [], 'precursor_charge': [], 'precursor_mz': [], 'fragment': [], 'product_mz': [], 'intensity': []})
	return(transitions)
Exemple #8
0
def testMxxxFile():
    """
    @tests:
     MzDataFile.__init__
     MzDataFile.endProgress
     MzDataFile.getLogType
     MzDataFile.load
     MzDataFile.setLogType
     MzDataFile.setProgress
     MzDataFile.startProgress
     MzDataFile.store
     MzMLFile.__init__
     MzMLFile.endProgress
     MzMLFile.getLogType
     MzMLFile.load
     MzMLFile.setLogType
     MzMLFile.setProgress
     MzMLFile.startProgress
     MzMLFile.store
     MzXMLFile.__init__
     MzXMLFile.endProgress
     MzXMLFile.getLogType
     MzXMLFile.load
     MzXMLFile.setLogType
     MzXMLFile.setProgress
     MzXMLFile.startProgress
     MzXMLFile.store
    """
    mse = pyopenms.MSExperiment()

    fh = pyopenms.MzDataFile()
    _testProgressLogger(fh)
    fh.store("test.mzData", mse)
    fh.load("test.mzData", mse)

    fh = pyopenms.MzMLFile()
    _testProgressLogger(fh)
    fh.store("test.mzML", mse)
    fh.load("test.mzML", mse)

    fh = pyopenms.MzXMLFile()
    _testProgressLogger(fh)
    fh.store("test.mzXML", mse)
    fh.load("test.mzXML", mse)
Exemple #9
0
def readms(input_file):  #only 'mzml,mzdata or mzxml' format
    #ms_format = re.search('\.\w+',input_file)
    import pyopenms
    ms_format = os.path.splitext(input_file)[1]
    #ms_format = ms_format.group()
    ms_format = ms_format.lower()
    msdata = pyopenms.MSExperiment()
    if ms_format == '.mzxml':
        file = pyopenms.MzXMLFile()
    elif ms_format == '.mzml':
        file = pyopenms.MzMLFile()
    elif ms_format == '.mzdata':
        file = pyopenms.MzDataFile()
    else:
        raise Exception('ERROR: %s is wrong format' % input_file)
    file.load(r'%s' % input_file, msdata)
    ms = []
    intensity = []
    rt = []
    for spectrum in msdata:
        if spectrum.getMSLevel() == 1:
            rt.append(spectrum.getRT())
            p_ms = []
            p_intensity = []
            for peak in spectrum:
                if peak.getIntensity() != 0:
                    p_ms.append(peak.getMZ())
                    p_intensity.append(peak.getIntensity())
            #print len(p_intensity)
            ms_index = np.argsort(-np.array(p_intensity))
            ms.append(np.array(p_ms)[ms_index])
            intensity.append(np.array(p_intensity)[ms_index])
            #scan+=1
    rt1 = np.array(rt)
    rt_mean_interval = np.mean(np.diff(rt1))
    #print  rt_mean_interval
    #rt_mean_interval = np.mean(rt1[1:]-rt1[:-1])
    #return ms,intensity,rt,scan,rt_max_interval
    return ms, intensity, rt, rt_mean_interval
Exemple #10
0
def main():

    parser = argparse.ArgumentParser(description="PeakPickerHiRes")
    parser.add_argument(
        "-in",
        action="store",
        type=str,
        dest="in_",
        metavar="input_file",
    )

    parser.add_argument(
        "-out",
        action="store",
        type=str,
        metavar="output_file",
    )

    parser.add_argument(
        "-ini",
        action="store",
        type=str,
        metavar="ini_file",
    )

    parser.add_argument(
        "-dict_ini",
        action="store",
        type=str,
        metavar="python_dict_ini_file",
    )

    parser.add_argument(
        "-write_ini",
        action="store",
        type=str,
        metavar="ini_file",
    )

    parser.add_argument(
        "-write_dict_ini",
        action="store",
        type=str,
        metavar="python_dict_ini_file",
    )

    args = parser.parse_args()

    run_mode = args.in_ is not None and args.out is not None\
                and (args.ini is not None or args.dict_ini is not None)
    write_mode = args.write_ini is not None or args.write_dict_ini is not None
    ok = run_mode or write_mode
    if not ok:
        parser.error("either specify -in, -out and -(dict)ini for running "
                     "the peakpicker\nor -write(dict)ini for creating std "
                     "ini file")

    defaults = pms.PeakPickerHiRes().getDefaults()
    if args.write_dict_ini or args.write_ini:
        if args.write_dict_ini:
            with open(args.write_dict_ini, "w") as fp:
                pprint.pprint(defaults.asDict(), stream=fp)
        if args.write_ini:
            defaults.store(args.write_ini)

    else:
        if args.ini:
            param = pms.Param()
            param.load(args.ini)
            defaults.update(param, False, False)
        elif args.dict_ini:
            with open(args.dict_ini, "r") as fp:
                try:
                    dd = eval(fp.read())
                except:
                    raise Exception("could not parse %s" % args.dict_ini)
            defaults.updateFrom(dd)

        fh = pms.MzXMLFile()
        fh.setLogType(pms.LogType.CMD)
        input_map = pms.MSExperiment()
        fh.load(args.in_, input_map)

        run_peak_picker(input_map, defaults, args.out)
Exemple #11
0
def load_ms_file(ms_experiment, path_to_file):
    """
    Load mzML / mzXML file into given ms_experiment object, filter to MSLevel = 1 - as only those supported for peakFinders
    :param ms_experiment:
    :param path_to_file:
    :param set_options:
    :return:
    """
    options = oms.PeakFileOptions()
    options.setMSLevels([1])  # MS1 is comparable to mcc-ims input
    # RuntimeError: FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data

    # support mzXML and mzML -file
    suffix = Path(path_to_file).suffix.lower()
    if suffix == ".mzxml":
        fh = oms.MzXMLFile()
    elif suffix == ".mzml":
        fh = oms.MzMLFile()
    else:
        raise UnsupportedGCMSFiletypeError(
            "Unsupported filetype. Only mzXML and mzML format is supported.")

    fh.setOptions(options)
    # load data into experiment

    fh.load(
        str(path_to_file),
        ms_experiment)  # if problems loading - let pyopenms error bubble up
    ms_experiment.updateRanges()

    # get keys function loads the return into an empty list - very straightforward to guess
    # k = []
    # ms_experiment[0].getKeys(k)
    # ms_experiment[1].getKeys(k)
    # ms_experiment[2].getKeys(k)
    # ms_experiment[3].getKeys(k)
    # list_functions(ms_experiment[0])
    #
    # ms_experiment[0].getAcquisitionInfo()#
    #
    # oms.CachedmzML.store("myCache.mzML", ms_experiment)
    #
    # # Now load data
    # cfile = oms.CachedmzML()
    # oms.CachedmzML.load("myCache.mzML", cfile)
    #
    # meta_data = cfile.getMetaData()
    # meta_data.metaRegistry()
    # help(meta_data)
    # list_functions(meta_data)
    # meta_data.getKeys(k)
    #
    #
    # k = []
    # list_functions(\
    #     ms_experiment[0].getDataProcessing()[0].getMetaValue(k))
    # print(k)
    # list_functions(fh.getOptions())
    # try centroided approach to pickPeaks
    #   if not centroided,
    #       apply smoothing and peak detection to form centroided data = peakMap / FeatureXML file
    return ms_experiment