def pick_experiment(self, exp: ms.MSExperiment, peak_radius: int = 1, window_radius: float = 0.015, pp_mode: str = 'int', min_int_mult: float = 0.10, strict: bool = True) -> ms.MSExperiment(): """Peak picks an experiment. Keyword arguments: exp: the experiment to peak pick peak_radius: the minimum peak radius of a peak set window_radius: the maximum m/z window radius of a peak set pp_mode: the mode to use ('ltr' or 'int') min_int_mult: a multiplier to the maximum peak intensity in a set (for differentiating between signal and noise) strict: if False, allow a single increase in intensity in either direction Returns: the peak picked experiment. """ exp.sortSpectra() spectra = exp.getSpectra() picked_exp = ms.MSExperiment() for spec in spectra: if spec.getMSLevel() != 1: continue picked_exp.addSpectrum(self.pick_spectra(spec, peak_radius, window_radius, pp_mode, min_int_mult, strict)) return picked_exp
def getBasicQuality(exp: oms.MSExperiment, verbose: bool=False) -> mzqc.RunQuality: """ getBasicQuality calculates the basic QualityMetrics from a mass spectrometry peak file and creates the related RunQuality object. Calculated basic QC metrics and proto-metrics necessary to calculate more elaborate QC metrics with additional data (e.g. ID). Parameters ---------- exp : oms.MSExperiment The mass spectrometry peak file to calculate metrics from verbose : bool, optional switches on verbose logging, by default False Returns ------- mzqc.RunQuality A RunQuality object containing the list of metrics calculated and metadata collected, ready for integration into a mzQC file object. """ metrics: List[mzqc.QualityMetric] = list() if exp.getExperimentalSettings().getSourceFiles(): parent_base_name: str = basename(exp.getExperimentalSettings().getSourceFiles()[0].getNameOfFile()) parent_chksm: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksum() parent_chksm_type: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksumType() instr_srl: str = exp.getInstrument().getMetaValue('instrument serial number') \ if exp.getInstrument().metaValueExists('instrument serial number') else 'unknown' # MS:1000529 in mzML input_loc: str = exp.getExperimentalSettings().getLoadedFilePath() base_name: str = basename(input_loc) chksm: str = utils.sha256fromfile(exp.getExperimentalSettings().getLoadedFilePath()) cmpltn: str = exp.getDateTime().get() # strt:datetime.datetime = datetime.datetime.strptime(cmpltn, '%Y-%m-%d %H:%M:%S') - datetime.timedelta(seconds=exp.getChromatograms()[0][exp.getChromatograms()[0].size()-1].getRT()*60) meta: mzqc.MetaDataParameters = mzqc.MetaDataParameters( inputFiles=[ mzqc.InputFile(name=base_name,location=input_loc, fileFormat=mzqc.CvParameter("MS", "MS:1000584", "mzML format"), fileProperties=[ mzqc.CvParameter(cvRef="MS", accession="MS:1000747", name="completion time", value=cmpltn ), mzqc.CvParameter(cvRef="MS", accession="MS:1000569", name="SHA-256", value=chksm ), mzqc.CvParameter(cvRef="MS", accession="MS:1000031", name="instrument model", value=exp.getInstrument().getName() ), mzqc.CvParameter(cvRef="MS", accession="MS:1000529", name="instrument serial number", value=instr_srl ) # TODO integrate parent location and checksum # id: MS:1002846 (Associated raw file URI) N.B. definition is PRIDE specific - WTF # fitting checksum cv missing ] ) ], analysisSoftware=[ mzqc.AnalysisSoftware(cvRef="MS", accession="MS:1000752", name="TOPP software", version=oms.__version__, uri="openms.de") ] ) # this is mighty important to sort by RT exp.sortSpectra() min_mz: float = sys.maxsize max_mz: float = 0 mslevelcounts: Dict[int,int] = defaultdict(int) spectrum_acquisition_metrics_MS1: Dict[str,List[Any]] = defaultdict(list) spectrum_acquisition_metrics_MS2: Dict[str,List[Any]] = defaultdict(list) spectrum_topn: Dict[str,List[Any]] = defaultdict(list) tandem_spectrum_metrics_MS2: Dict[str,List[Any]] = defaultdict(list) trap_metrics_MS1: Dict[str,List[Any]] = defaultdict(list) trap_metrics_MS2: Dict[str,List[Any]] = defaultdict(list) isolation_window_metrics: Dict[str,List[Any]] = defaultdict(list) tic_tab: Dict[str,List[Any]] = defaultdict(list) # ActivationMethod look-up dict ams = {getattr(ActivationMethod,i): i for i in dir(ActivationMethod) if type(getattr(ActivationMethod,i))==int } intens_sum: np.float = 0 last_surveyscan_index:int = 0 for spin, spec in enumerate(exp): mslevelcounts[spec.getMSLevel()] += 1 iontraptime = utils.getTrapTime(spec) intens_max = spec.get_peaks()[1].max() intens_min = spec.get_peaks()[1].min() intens_sum = spec.get_peaks()[1].sum() if spec.getMSLevel() == 1: last_surveyscan_index = spin last_surveyscan_intensity = intens_sum last_surveyscan_max = intens_max spectrum_acquisition_metrics_MS1['RT'].append(spec.getRT()) spectrum_acquisition_metrics_MS1['SN'].append(noiseqc.getSN_medianmethod(spec)) spectrum_acquisition_metrics_MS1['peakcount'].append(spec.size()) spectrum_acquisition_metrics_MS1['int'].append(intens_sum.item()) # .item() for dtype to pytype trap_metrics_MS1['RT'].append(spec.getRT()) trap_metrics_MS1['traptime'].append(iontraptime) tic_tab['RT'].append(spec.getRT()) tic_tab['int'].append(intens_sum) if (spec.getMSLevel() == 2): if (spec.getPrecursors()[0].getMZ() < min_mz): min_mz = spec.getPrecursors()[0].getMZ() if (spec.getPrecursors()[0].getMZ() > max_mz): max_mz = spec.getPrecursors()[0].getMZ() spectrum_acquisition_metrics_MS2['RT'].append(spec.getRT()) spectrum_acquisition_metrics_MS2['SN'].append(noiseqc.getSN_medianmethod(spec)) spectrum_acquisition_metrics_MS2['peakcount'].append(spec.size()) spectrum_acquisition_metrics_MS2['int'].append(intens_sum.item()) # .item() for dtype to pytype spectrum_acquisition_metrics_MS2['native_id'].append(utils.spec_native_id(spec)) rank = spin - last_surveyscan_index spectrum_acquisition_metrics_MS2['rank'].append(rank) trap_metrics_MS2['RT'].append(spec.getRT()) trap_metrics_MS2['traptime'].append(iontraptime) trap_metrics_MS2['activation_method'].append(ams.get(next(iter(spec.getPrecursors()[0].getActivationMethods()), None),'unknown')) trap_metrics_MS2['activation_energy'].append(spec.getPrecursors()[0].getMetaValue('collision energy') if \ spec.getPrecursors()[0].metaValueExists('collision energy') else -1) precursor_index = np.searchsorted(exp[last_surveyscan_index].get_peaks()[0], [exp[spin].getPrecursors()[0].getMZ()])[0] if precursor_index != np.array(exp[last_surveyscan_index].get_peaks()).shape[1]: precursor_err = spec.getPrecursors()[0].getMZ() - np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][0] precursor_int = np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][1] else: precursor_err = np.nan precursor_int = np.nan tandem_spectrum_metrics_MS2['RT'].append(spec.getRT()) tandem_spectrum_metrics_MS2['precursor_intensity'].append(precursor_int) # TODO different from mzid->mzml getPrecursors[0].getIntensity() ? YES, latter one usually zero tandem_spectrum_metrics_MS2['precursor_error'].append(precursor_err) tandem_spectrum_metrics_MS2['precursor_mz'].append(spec.getPrecursors()[0].getMZ()) tandem_spectrum_metrics_MS2['precursor_c'].append(spec.getPrecursors()[0].getCharge()) tandem_spectrum_metrics_MS2['surveyscan_intensity_sum'].append(last_surveyscan_intensity) tandem_spectrum_metrics_MS2['surveyscan_intensity_max'].append(last_surveyscan_max) isolation_window_metrics['RT'].append(spec.getRT()) isolation_window_metrics['isolation_target'].append(spec.getPrecursors()[0].getMZ()) # https://github.com/OpenMS/OpenMS/blob/d17cc251fd0c4068eb253b03c9fb107897771fdc/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp#L1992 isolation_window_metrics['isolation_lower'].append(spec.getPrecursors()[0].getIsolationWindowLowerOffset()) isolation_window_metrics['isolation_upper'].append(spec.getPrecursors()[0].getIsolationWindowUpperOffset()) lower = spec.getPrecursors()[0].getMZ() - spec.getPrecursors()[0].getIsolationWindowLowerOffset() upper = spec.getPrecursors()[0].getMZ() + spec.getPrecursors()[0].getIsolationWindowUpperOffset() s = np.array([(i.getMZ(),i.getIntensity()) for i in exp[last_surveyscan_index]], ndmin = 2) s = s[np.where(np.logical_and(s[:, 0]>=lower, s[:, 0]<=upper))[0]] isolation_window_metrics['peaks_in_window'].append(np.shape(s)[0]) int_sort_desc = np.flip(np.argsort(s[:,1])) if np.shape(s)[0] > 1: isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append( s[int_sort_desc][:-1,1]/s[int_sort_desc][1:,1][0]) # intensity ratio between top1&2, 2&3, ... else: isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append(0) # bigger is better, though best is 0 isolation_window_metrics['summed_window_intensity'].append(np.sum(s[int_sort_desc][:,1])) isolation_window_metrics['isolation_target_intensity'].append(spec.getPrecursors()[0].getIntensity()) # TODO this needs to go outside tol = 0.5 if spec.metaValueExists('filter string'): if 'FTMS' in spec.getMetaValue('filter string'): tol = 0.05 elif 'ITMS' in spec.getMetaValue('filter string'): tol = 0.5 elif 'QTOF' in spec.getMetaValue('filter string'): #TOFMS, SQMS, TQMS, SectorMS tol = 0.1 # ms2 peaks directly from isolation window? unfragmented = np.any([np.isclose(i[0],[x.getMZ() for x in spec], atol=tol) for i in s]) isolation_window_metrics['peaks_in_window_in_ms2'].append(str(unfragmented)) ## Spectra detail numbers metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Spectrum acquisition metric values - MS1", value=spectrum_acquisition_metrics_MS1) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Spectrum acquisition metric values - MS2", value=spectrum_acquisition_metrics_MS2) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Spectra topn ranks", value=spectrum_topn) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Tandem spectrum metric values - MS2", value=tandem_spectrum_metrics_MS2) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Trap metric values - MS1", value=trap_metrics_MS1) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Trap metric values - MS2", value=trap_metrics_MS2) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="isolation window metrics", value=isolation_window_metrics) ) ## Spectra numbers for levels in mslevelcounts.keys(): metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Number of MS{l} spectra".format(l=str(levels)), value=mslevelcounts[levels]) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Number of chromatograms", value=len(exp.getChromatograms())) ) ## Ranges metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="MZ aquisition range", value=[min_mz,max_mz]) ) metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="RT aquisition range", value=[exp[0].getRT(),exp[exp.size()-1].getRT()]) ) # TIC metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Total ion current", value=tic_tab) ) # Chrom chrom_tab: Dict[str,List[Any]] = defaultdict(list) chroms = exp.getChromatograms() for t in chroms: if t.getChromatogramType() == oms.ChromatogramSettings.ChromatogramType.TOTAL_ION_CURRENT_CHROMATOGRAM: for chro_peak in t: chrom_tab['RT'].append(chro_peak.getRT()) chrom_tab['int'].append(chro_peak.getIntensity()) break metrics.append( mzqc.QualityMetric(cvRef="QC", accession="QC:0000000", name="Chromatogram", value=chrom_tab) ) # TODO is there a difference between TIC as defined in MS:1000235 and the chromatogram you get from TRP?? In MZML it says its a MS:1000235 (ion current detected in each of a series of mass spectra) but is it? # TODO consider collection of spectrum_native_id return mzqc.RunQuality(metadata=meta, qualityMetrics=metrics)
def store(self, ofname: str, peakMap: pyopenms.MSExperiment): #sort peakMap if necessary if not peakMap.isSorted(): peakMap.sortSpectra() peakMap.updateRanges() outF = open(ofname, 'w') firstScan = self._getScan( peakMap.getSpectrum(0).getNativeID().decode('utf-8')) dataType = peakMap.getSpectrum(0).getType() dataType = 'Centroid' if dataType == 1 else 'Profile' if dataType == 2 else 'Unknown' lastScan = self._getScan( peakMap.getSpectrum(peakMap.getNrSpectra() - 1).getNativeID().decode('utf-8')) precursorFile = basename(ofname).replace('ms2', 'ms1') #print header outF.write( self._writeValue( MS2File._h_tag, 'CreationDate', datetime.datetime.now().strftime('%m/%d/%Y %I:%M:%S %p'))) outF.write(self._writeValue(MS2File._h_tag, 'Extractor', 'msConvert')) outF.write(self._writeValue(MS2File._h_tag, 'ExtractorVersion', '0.1')) outF.write( self._writeValue(MS2File._h_tag, 'Comments', 'msConvert was written by Aaron Maurais, 2019')) outF.write(self._writeValue(MS2File._h_tag, 'ExtractorOptions', 'MS2')) outF.write( self._writeValue(MS2File._h_tag, 'AcquisitionMethod', 'Data-Dependent')) outF.write( self._writeValue(MS2File._h_tag, 'InstrumentType', 'Unknown')) outF.write(self._writeValue(MS2File._h_tag, 'ScanType', 'MS2')) outF.write(self._writeValue(MS2File._h_tag, 'DataType', dataType)) outF.write(self._writeValue(MS2File._h_tag, 'FirstScan', firstScan)) outF.write(self._writeValue(MS2File._h_tag, 'LastScan', lastScan)) #iterate through spectra preScan = 'Unknown' for i, scan in enumerate(peakMap.getSpectra()): if scan.getMSLevel() == 1: preScan = self._getScan(scan.getNativeID().decode('utf-8')) if scan.getMSLevel() == 2: #write header info curScan = self._getScan(scan.getNativeID().decode('utf-8')) precursors = scan.getPrecursors() preCharge = int(precursors[0].getCharge()) preMZ = precursors[0].getMZ() #print scan line outF.write('{}\t{}\t{}\t{}\n'.format(MS2File._s_tag, curScan.zfill(6), curScan.zfill(6), preMZ)) #print scan info outF.write( self._writeValue(MS2File._i_tag, 'RetTime', scan.getRT())) outF.write( self._writeValue(MS2File._i_tag, 'PrecursorInt', precursors[0].getIntensity())) outF.write( self._writeValue(MS2File._i_tag, 'IonInjectionTime', 'Unknown')) ameth = list(precursors[0].getActivationMethods()) ameth = ' '.join( [MS2File._activationMethods[x] for x in ameth]) if not ameth: ameth = 'Unknown' outF.write( self._writeValue(MS2File._i_tag, 'ActivationType', ameth)) outF.write( self._writeValue(MS2File._i_tag, 'PrecursorFile', precursorFile)) outF.write( self._writeValue(MS2File._i_tag, 'PrecursorScan', preScan)) outF.write( self._writeValue(MS2File._i_tag, 'InstrumentType', 'Unknown')) #write z line #after charge, the M+H m/z for the ion is listed, so calculate that here outF.write( self._writeValue( MS2File._z_tag, preCharge, (float(preMZ) * preCharge) - (preCharge * MS2File._H_mass) + MS2File._H_mass)) #write ions for ion in scan: outF.write('{0:.4f} {1:.1f}\n'.format( round(ion.getMZ(), 4), round(ion.getIntensity(), 1)))