def build_params(): mtd_params = pyopenms.MassTraceDetection().getDefaults() mtd_params.remove("chrom_peak_snr") mtd_params.remove("noise_threshold_int") epdet_params = pyopenms.ElutionPeakDetection().getDefaults() epdet_params.remove("noise_threshold_int") epdet_params.remove("chrom_peak_snr") epdet_params.remove("chrom_fwhm") common_params = pyopenms.Param() common_params.setValue( "noise_threshold_int", 10.0, "Intensity threshold below which peaks are regarded as noise.") common_params.setValue( "chrom_peak_snr", 3.0, "Minimum signal-to-noise a mass trace should have") common_params.setValue( "chrom_fwhm", 5.0, "Expected chromatographic peak width (in seconds).") ffm_params = pyopenms.FeatureFindingMetabo().getDefaults() ffm_params.remove("chrom_fwhm") combined_params = pyopenms.Param() combined_params.insert("common_", common_params) combined_params.insert("mtd_", mtd_params) combined_params.insert("epdet_", epdet_params) combined_params.insert("ffm_", ffm_params) return combined_params
def FeatureFindingMetabo1(mzfile): exp = pyopenms.MSExperiment() pyopenms.MzMLFile().load(mzfile, exp) mtd_params = pyopenms.MassTraceDetection().getDefaults() mtd = pyopenms.MassTraceDetection() mtd.setParameters(mtd_params) mass_traces = [] mtd.run(exp, mass_traces) epdet_params = pyopenms.ElutionPeakDetection().getDefaults() epdet = pyopenms.ElutionPeakDetection() epdet.setParameters(epdet_params) splitted_mass_traces = [] epdet.detectPeaks(mass_traces, splitted_mass_traces) ffm_params = pyopenms.FeatureFindingMetabo().getDefaults() ffm = pyopenms.FeatureFindingMetabo() ffm.setParameters(ffm_params) feature_map = pyopenms.FeatureMap() ffm.run(splitted_mass_traces, feature_map) return feature_map
def getMassTraceMatchingMS2(exp: oms.MSExperiment, tol: float = 0.5) -> List[mzqc.QualityMetric]: mts: List[oms.MassTrace] = list() oms.MassTraceDetection().run(exp, mts, 0) # since 2.5.0 with 3rd argument mts_coord = np.array([[m.getCentroidMZ(), m.getCentroidRT()] for m in mts]) # ms2_coord = np.array([[s.getPrecursors()[0].getMZ(), s.getRT()] for s in exp if s.getMSLevel()==2]) for s in exp: if s.getMSLevel() == 2: mz_matches = np.isclose(mts_coord[:, 0], s.getPrecursors()[0].getMZ(), atol=tol) rt_dist_per_match = np.abs(mts_coord[np.where(mz_matches)][:, 1] - s.getRT()) match_idx_in_dist = np.argwhere( mz_matches) # indices of match only in mts and mts_coord closest_rt_rowidx = rt_dist_per_match.argmin( ) # index in match_only distances array # rt_dist_per_match[closest_rt_rowidx] == mts_coord[match_idx[closest_rt_rowidx][0]][1]-s.getRT() closest_match_mt = mts[match_idx_in_dist[closest_rt_rowidx][0]] np.partition(rt_dist_per_match, 2)[2 - 1] # 2nd closest dist np.partition(rt_dist_per_match, 1)[1 - 1] # closest dist closest_match_mt.getSize() # peaks closest_match_mt.getTraceLength() # seconds closest_match_mt.getFWHM( ) # seconds - what if 0 or getTraceLength()? closest_match_mt.getMaxIntensity(False) # NB precursor intensity is always 0! # NB masstrace does not store peak intensities (except max and sum) # 4 categories for MS2 regarding sampling # -2 (out of trace, before centr RT) ; -1 (in trace, before centr RT) ;1 (in trace, after centr RT) ;2 (out of trace, after centr RT) ; rt_1st = np.min( closest_match_mt.getConvexhull().getHullPoints()[:, 0]) rt_last = np.max( closest_match_mt.getConvexhull().getHullPoints()[:, 0]) rt_centr = closest_match_mt.getCentroidRT() # np.digitize(s.getRT(),[rt_1st,rt_centr,rt_last]) if s.getRT() > rt_centr: # 'after' categ if s.getRT() > rt_last: return 2 else: return 1 else: # 'before' categ if s.getRT() < rt_1st: return -2 else: return -1
def mt_detection(self): self.mass_traces = [] self.mtd_process = oms.MassTraceDetection() self.mtd_params = self.mtd_process.getDefaults() self.mtd_params.setValue('noise_threshold_int', 10.0) self.mtd_params.setValue('chrom_peak_snr', 3.0) self.mtd_params.setValue('mass_error_ppm', 20.0) self.mtd_params.setValue('reestimate_mt_sd', b'true') self.mtd_params.setValue('quant_method', b'area') self.mtd_process.setParameters(self.mtd_params) self.mtd_process.run(self.centroid_input_map, self.mass_traces)
def oms_ffmetabo_single_file(filename, max_peaks_per_file=5000): feature_map = oms.FeatureMap() mass_traces = [] mass_traces_split = [] mass_traces_filtered = [] exp = oms.MSExperiment() peak_map = oms.PeakMap() options = oms.PeakFileOptions() options.setMSLevels([1]) if filename.lower().endswith('.mzxml'): fh = oms.MzXMLFile() elif filename.lower().endswith('.mzml'): fh = oms.MzMLFile() else: assert False, filename fh.setOptions(options) # Peak map fh.load(filename, exp) #for chrom in exp.getChromatograms(): # peak_map.addChrom(chrom) for spec in exp.getSpectra(): peak_map.addSpectrum(spec) mass_trace_detect = oms.MassTraceDetection() mass_trace_detect.run(peak_map, mass_traces, max_peaks_per_file) elution_peak_detection = oms.ElutionPeakDetection() elution_peak_detection.detectPeaks(mass_traces, mass_traces_split) feature_finding_metabo = oms.FeatureFindingMetabo() feature_finding_metabo.run( mass_traces_split, feature_map, mass_traces_filtered) feature_map.sortByOverallQuality() return feature_map
def metaboFeatureFinder(peak_map, config_id=None, ms_level=None, **kw): from ..algorithm_configs import metaboFFConfigs config_params = dict() for key, __, params in metaboFFConfigs: if key == config_id: config_params = params.copy() break config_params.update(kw) assert isinstance(peak_map, PeakMap) import time def info(fmtstr, *a): msg = fmtstr % a print print(" " + msg + " ").center(79, "=") print info("RUN FEATURE FINDER METABO") start_at = time.time() (mtd_params, epdet_params, ffm_params, all_params) = _ParamHandler.update_params(config_params) def dump_param(prefix, all_params=all_params): sub_params = all_params.copy(prefix) for k, v in sorted(sub_params.items()): print("%s " % (k, )).ljust(35, "."), v print "COMMON PARAMETERS" print dump_param("common_") print print "PARAMS MASS TRACE DETECTION:" print dump_param("mtd_") print print "PARAMS ELUTION PEAK DETECTION:" print dump_param("epdet_") print print "PARAMS FEATURE FINDER METABO:" print dump_param("ffm_") print # Sometimes when we run nosetest, the locale settings are set to # german. I can not explain why. # This causes a problem for the Metabo feature finder from OpenMS, # which fails to read some config files conatinng numercial values # with a "." decimal point, which is not the decimal point for german # noumbers. so we set: locale.setlocale(locale.LC_NUMERIC, "C") mtd = pyopenms.MassTraceDetection() mtd.setParameters(mtd_params) mass_traces = [] if ms_level is None: peak_map = peak_map.getDominatingPeakmap() else: peak_map = peak_map.extract(mslevelmin=ms_level, mslevelmax=ms_level) for spec in peak_map.spectra: spec.msLevel = 1 info("%d SPECS OF LEVEL %d", len(peak_map), 1) mtd.run(peak_map.toMSExperiment(), mass_traces) info("FOUND %d MASS TRACES", len(mass_traces)) rows = [] splitted_mass_traces = [] if mass_traces: epdet = pyopenms.ElutionPeakDetection() epdet.setParameters(epdet_params) splitted_mass_traces = [] epdet.detectPeaks(mass_traces, splitted_mass_traces) if splitted_mass_traces: if epdet_params.getValue("width_filtering") == "auto": final_mass_traces = [] epdet.filterByPeakWidth(splitted_mass_traces, final_mass_traces) else: final_mass_traces = splitted_mass_traces info("%d SPLITTED MASS TRACES AFTER ELUTION PEAK DETECTION", len(final_mass_traces)) ffm = pyopenms.FeatureFindingMetabo() ffm.setParameters(ffm_params) feature_map = pyopenms.FeatureMap() ffm.run(final_mass_traces, feature_map) info("FOUND %d FEATURES", feature_map.size()) for i, feature in enumerate(feature_map): convex_hulls = feature.getConvexHulls() quality = feature.getOverallQuality() width = feature.getWidth() z = feature.getCharge() mz = feature.getMZ() rt = feature.getRT() I = feature.getIntensity() for convex_hull in convex_hulls: bb = convex_hull.getBoundingBox() rtmin, mzmin = bb.minPosition() rtmax, mzmax = bb.maxPosition() row = [ i, mz, mzmin, mzmax, rt, rtmin, rtmax, I, quality, width, z ] rows.append(row) tab = Table([ "feature_id", "mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "intensity", "quality", "fwhm", "z" ], [ int, float, float, float, float, float, float, float, float, float, int ], [ "%d", "%10.5f", "%10.5f", "%10.5f", formatSeconds, formatSeconds, formatSeconds, "%.2e", "%.2e", formatSeconds, "%d" ], rows) tab.addConstantColumn("peakmap", peak_map, PeakMap, None) def recalc(table, row, name): mzmin = table.getValue(row, "mzmin") mzmax = table.getValue(row, "mzmax") rtmin = table.getValue(row, "rtmin") rtmax = table.getValue(row, "rtmax") pm = table.getValue(row, "peakmap") mz = pm.representingMzPeak(mzmin, mzmax, rtmin, rtmax) return mz if mz is not None else (mzmin + mzmax) / 2.0 tab.replaceColumn("mz", recalc) src = peak_map.meta.get("source", "") tab.addConstantColumn("source", src) tab.addEnumeration() if src: tab.title = "metabo features from %s" % os.path.basename(src) else: tab.title = "metabo features" needed = time.time() - start_at minutes = int(needed / 60) seconds = round(needed - 60 * minutes) info("NEEDED %d MINUTES AND %d SECONDS", minutes, seconds) return tab
def __init__(self, **kwargs): super(MtdEntity, self).__init__( oms.MassTraceDetection(), **kwargs, )