def dist(spectrum1: sus.MsmsSpectrum, spectrum2: sus.MsmsSpectrum, method: str = 'dot'): if method == 'dot': spectrum1 = copy.copy(spectrum1).scale_intensity(max_intensity=1) spec1 = pyopenms.MSSpectrum() spec1.set_peaks([spectrum1.mz, spectrum1.intensity]) spectrum2 = copy.copy(spectrum2).scale_intensity(max_intensity=1) spec2 = pyopenms.MSSpectrum() spec2.set_peaks([spectrum2.mz, spectrum2.intensity]) # Third parameter of xCorrelationPrescore is bin size in Da. xcorr = pyopenms.XQuestScores().xCorrelationPrescore(spec1, spec2, 0.005) return 1 - xcorr else: return 0
def handle_compressed_frame(allmz, allint, allim, mslevel, rtime, center, width): mz = np.concatenate(allmz) intens = np.concatenate(allint) ims = np.concatenate(allim) fda = pyopenms.FloatDataArray() fda.setName("Ion Mobility") fda.resize(len(mz)) for k, val in enumerate(ims): fda[k] = val sframe = pyopenms.MSSpectrum() sframe.setMSLevel(mslevel) sframe.setRT(rtime) sframe.setFloatDataArrays([fda]) p = pyopenms.Precursor() if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) sframe.setPrecursors([p]) sframe.set_peaks((mz, intens)) sframe.sortByPosition() return sframe
def _mergeSpectra(self, tmp): """ Perform spectral merging - we pick one spectrum (the first one) as our reference and add all other data from the other spectra to it - we check that merged spectra have equal precursor m/z and same float array """ merge_spec = pyopenms.MSSpectrum(tmp[0]) fda = tmp[0].getFloatDataArrays()[0] fda.clear() allmz = [] allint = [] for q in tmp: m, i = q.get_peaks() allmz.append(m) allint.append(i) # Sanity checks, precursors of merged spectra and float arrays need to match assert q.getPrecursors()[0].getMZ() - merge_spec.getPrecursors()[0].getMZ() < 1e-5 assert len(q.getFloatDataArrays()) == len(merge_spec.getFloatDataArrays()) assert q.getFloatDataArrays()[0].getName() == merge_spec.getFloatDataArrays()[0].getName() # TODO this is not very efficient, fix in pyOpenMS! for d in q.getFloatDataArrays()[0]: fda.push_back(d) mz = np.concatenate(allmz) intens = np.concatenate(allint) # create merge spec merge_spec.set_peaks( (mz, intens) ) merge_spec.setFloatDataArrays([fda]) merge_spec.sortByPosition() return merge_spec
def _write_spectra_mzml(filename: str, spectra: Iterable[sus.MsmsSpectrum]) \ -> None: """ Write the given spectra to an mzML file. Parameters ---------- filename : str The mzML file name where the spectra will be written. spectra : Iterable[sus.MsmsSpectrum] The spectra to be written to the mzML file. """ experiment = pyopenms.MSExperiment() for spectrum in tqdm.tqdm(spectra, desc='Spectra written', unit='spectra'): mzml_spectrum = pyopenms.MSSpectrum() mzml_spectrum.setMSLevel(2) mzml_spectrum.setNativeID(spectrum.identifier) precursor = pyopenms.Precursor() precursor.setMZ(spectrum.precursor_mz) precursor.setCharge(spectrum.precursor_charge) mzml_spectrum.setPrecursors([precursor]) mzml_spectrum.set_peaks([spectrum.mz, spectrum.intensity]) if hasattr(spectrum, 'retention_time'): mzml_spectrum.setRT(spectrum.retention_time) if hasattr(spectrum, 'filename'): mzml_spectrum.setMetaValue('filename', str.encode(spectrum.filename)) if hasattr(spectrum, 'scan'): mzml_spectrum.setMetaValue('scan', str.encode(str(spectrum.scan))) if hasattr(spectrum, 'cluster'): mzml_spectrum.setMetaValue('cluster', str.encode(str(spectrum.cluster))) experiment.addSpectrum(mzml_spectrum) pyopenms.MzMLFile().store(filename, experiment)
def toMSSpectrum(self): """converts to pyopenms.MSSpectrum""" spec = pyopenms.MSSpectrum() spec.setRT(self.rt) spec.setMSLevel(self.msLevel) ins = spec.getInstrumentSettings() pol = { '0': pyopenms.IonSource.Polarity.POLNULL, '+': pyopenms.IonSource.Polarity.POSITIVE, '-': pyopenms.IonSource.Polarity.NEGATIVE }[self.polarity] ins.setPolarity(pol) spec.setInstrumentSettings(ins) oms_pcs = [] for mz, I in self.precursors: p = pyopenms.Precursor() p.setMZ(mz) p.setIntensity(I) oms_pcs.append(p) spec.setPrecursors(oms_pcs) if IS_PYOPENMS_2: mz = self.peaks[:, 0] I = self.peaks[:, 1] spec.set_peaks((mz, I)) else: spec.set_peaks(self.peaks) spec.updateRanges() return spec
def handle_stack(stack, mslevel): # print "Size", len(stack), mslevel scan_nr_ = stack[0].split() assert (len(scan_nr_) == 2) scan_nr = int(scan_nr_[1]) rt_ = stack[1].split() assert (len(rt_) == 3) rt = float(rt_[2]) * 60 # Convert to mz/int pairs pairs = [ it.split() for it in stack[3:] if len(it.strip()) > 0 and float(it.split()[1]) > 0.0 ] #mz = [float(it.split()[0]) for it in stack[3:] if len(it.strip()) > 0] #intensity = [float(it.split()[1]) for it in stack[3:] if len(it.strip()) > 0] try: mz = [float(it[0]) for it in pairs] intensity = [float(it[1]) for it in pairs] except ValueError: print("Could not convert", len(stack), "with pairs", len(pairs)) return assert len(mz) == len(intensity) # print("Handle scan at rt", rt) peaks = np.ndarray(shape=(len(mz), 2), dtype=np.float32) peaks[:, 0] = mz peaks[:, 1] = intensity s = pyopenms.MSSpectrum() s.set_peaks(peaks) s.setRT(rt) s.setMSLevel(1) outexp.addSpectrum(s)
def testPeakTypeEstimator(): """ @tests: PeakTypeEstimator.__init__ PeakTypeEstimator.estimateType """ pyopenms.PeakTypeEstimator().estimateType(pyopenms.MSSpectrum())
def fft_pick(spec, fft_low_cutoff, fft_high_cutoff): # filter spec filtered_spec = fft_filter_spec(spec, fft_low_cutoff, fft_high_cutoff) # apply pyopenms peakpicker on filtered spec picker = pyopenms.PeakPickerHiRes() newspec_out = pyopenms.MSSpectrum() picker.pick(filtered_spec, newspec_out) return newspec_out
def convertToMSSpectrum(input_): spectrum = pyopenms.MSSpectrum() for p in input_: rp = pyopenms.Peak1D() rp.setMZ(p.getMZ()) rp.setIntensity(p.getIntensity()) spectrum.push_back(rp) return spectrum
def getSwathExperiment(nr_scans, nr_swathes, precusorsisolation): exp = pyopenms.MSExperiment() for spec_cnt in range(1, nr_scans): ms1_spec = pyopenms.MSSpectrum() ms1_spec.setMSLevel(1) ms1_spec.setRT(spec_cnt * 10) middle_scan = nr_scans / 2 intensity = norm.pdf((spec_cnt - middle_scan) / 4.0) pk_list = [[500.01, intensity * 3000], [510.05, intensity * 3000]] peaks = numpy.array(pk_list, dtype=numpy.float32) ms1_spec.set_peaks(peaks) exp.addSpectrum(ms1_spec) # Swath 1: 500.01, 500.15, 500.25 # Swath 2: 501.01, 501.15, 501.25 # Swath 3: 502.01, 502.15, 502.25 # Swath 4: 504.01, 504.15, 504.25 # Swath 5: 505.01, 505.15, 505.25 for i in range(nr_swathes): middle_scan = nr_scans / 2 + i # shift the middle of the gaussian by one in each scan intensity = norm.pdf((spec_cnt - middle_scan) / 4.0) intensity *= 1.2 # 20% higher intensity in each swath spec = pyopenms.MSSpectrum() spec.setMSLevel(2) spec.setRT(spec_cnt * 10 + i + 1) prec = pyopenms.Precursor() if precusorsisolation == "OpenSwath": prec.setIsolationWindowLowerOffset(400 + i * 25) prec.setIsolationWindowUpperOffset(425 + i * 25) elif precusorsisolation == "Pwiz": prec.setIsolationWindowLowerOffset(12.5) prec.setIsolationWindowUpperOffset(12.5) elif precusorsisolation == "Missing": pass else: raise Exception( "precusorsisolation needs to be {Missing,Pwiz,OpenSwath}") prec.setMZ(400 + i * 25 + 12.5) spec.setPrecursors([prec]) pk_list = [[500.01 + i, intensity * 3000], [500.15 + i, intensity * 3000 / 2.0], [500.25 + i, intensity * 3000 / 3.0]] peaks = numpy.array(pk_list, dtype=numpy.float32) spec.set_peaks(peaks) exp.addSpectrum(spec) return exp
def testMSSpectrum(self): spec = pyopenms.MSSpectrum() p = pyopenms.Peak1D() p.setMZ(500.0) p.setIntensity(1e5) spec.push_back(p) p_back, = list(spec) assert isinstance(p_back, pyopenms.Peak1D) assert p_back.getMZ() == 500.0 assert p_back.getIntensity() == 1e5
def main(options): # generate fragmentationtype lookup lookup = {} methods = pyopenms.ActivationMethod() for attr in dir(methods): value = getattr(methods,attr) if isinstance(value,int): lookup[value] = attr print "loading MS Experiment " exp = pyopenms.MSExperiment() fh = pyopenms.FileHandler() fh.loadExperiment(options.infile,exp) print "checking spectra types:" fragmentationTypes = {} for s in exp: typ = getSpectrumType(s,lookup) cont = continousSpectrumCheck(s) fragmentationTypes[typ] = fragmentationTypes.get(typ, [] ) + [cont] isContinousSpectrum = {} for typ in fragmentationTypes: check = percentile75(fragmentationTypes[typ]) isContinousSpectrum[typ] = check if check == True: print "\t" + typ + " has continous spectra data" else: print "\t" + typ + " has centroided spectra data" print "picking spectra" expNew = pyopenms.MSExperiment() picker = pyopenms.PeakPickerHiRes() for s in exp: typ = getSpectrumType(s,lookup) if isContinousSpectrum[typ] == True: newSpec = pyopenms.MSSpectrum() picker.pick(s,newSpec) expNew.addSpectrum(newSpec) else: expNew.addSpectrum(s) print "saving file to ",options.outfile mzFile = pyopenms.MzMLFile() fileoptions = mzFile.getOptions() fileoptions.setCompression(True) mzFile.setOptions(fileoptions) mzFile.store(options.outfile,expNew) print "finished"
def testMSExperiment(): """ @tests: MSExperiment.__init__ MSExperiment.getLoadedFilePath MSExperiment.getMaxMZ MSExperiment.getMaxRT MSExperiment.getMetaValue MSExperiment.getMinMZ MSExperiment.getMinRT MSExperiment.push_back MSExperiment.setLoadedFilePath MSExperiment.setMetaValue MSExperiment.size MSExperiment.sortSpectra MSExperiment.updateRanges MSExperiment.__eq__ MSExperiment.__ge__ MSExperiment.__getitem__ MSExperiment.__gt__ MSExperiment.__iter__ MSExperiment.__le__ MSExperiment.__lt__ MSExperiment.__ne__ MSExperiment.clearMetaInfo MSExperiment.getKeys MSExperiment.isMetaEmpty MSExperiment.metaValueExists MSExperiment.removeMetaValue """ mse = pyopenms.MSExperiment() _testMetaInfoInterface(mse) mse.updateRanges() mse.sortSpectra(True) assert isinstance(mse.getMaxRT(), float) assert isinstance(mse.getMinRT(), float) assert isinstance(mse.getMaxMZ(), float) assert isinstance(mse.getMinMZ(), float) assert isinstance(mse.getLoadedFilePath(), str) mse.setLoadedFilePath("") assert mse.size() == 0 mse.push_back(pyopenms.MSSpectrum()) assert mse.size() == 1 assert mse[0] is not None assert isinstance(list(mse), list) assert mse == mse assert not mse != mse
def run_list_conversions(self): pc = pyopenms.Precursor() allpcs = 500 * [pc] li = [] for i in range(500): if (i + 1) % 100 == 0: show_mem("%4d runs" % i) spec = pyopenms.MSSpectrum() spec.setPrecursors(allpcs) spec.setPrecursors(allpcs) li.append(spec) del spec del li
def set_spec_peaks(self): data = np.zeros((10000, 2), dtype=np.float32) li = [] for i in range(1000): if (i + 1) % 100 == 0: show_mem("%4d specs processed" % i) spec = pyopenms.MSSpectrum() spec.set_peaks((data[:, 0], data[:, 1])) spec.set_peaks((data[:, 0], data[:, 1])) spec.set_peaks((data[:, 0], data[:, 1])) li.append(spec) for spec in li: del spec del data
def fft_filter_spec(spec, fft_low_cutoff, fft_high_cutoff): # get raw data peaks = spec.get_peaks() mz_values, intensities = peaks.T # fft filter on intensities fft = scipy.fft(intensities) fft[:fft_low_cutoff+1] = 0 fft[fft_high_cutoff:] = 0 filtered_intensities = scipy.ifft(fft).real peaks[:, 1] = filtered_intensities # build pyopenms.MSSpectrum from filtered raw data new_spec = pyopenms.MSSpectrum() new_spec.set_peaks(peaks) return new_spec
def four_d_spectrum_to_experiment(spec): """Function that converts a 4D spectrum object which contains retention time, ion mobility, mass to charge, and intensity data, into a new experiment where the ion mobility becomes the retention time of each of its new spectra and vice versa. Args: spec (MSSpectrum): An OpenMS MSSpectrum object. Returns: MSExperiment: A new MSExperiment where each MSSpectrum object is a 3D spectrum from the original where the retention time and ion mobility has been swapped. """ ion_mobility_to_peaks = zip(spec.getFloatDataArrays()[0], *spec.get_peaks()) new_exp = ms.MSExperiment() new_mz, new_int = [], [] curr_im = None for im, mz, intensity in sorted(ion_mobility_to_peaks, key=lambda x: (x[0], x[1], x[2])): if im != curr_im: if curr_im: new_spec = ms.MSSpectrum() new_spec.setRT(curr_im) new_spec.set_peaks((new_mz, new_int)) rt_fda = ms.FloatDataArray() for i in new_mz: rt_fda.push_back(spec.getRT()) new_spec.setFloatDataArrays([rt_fda]) new_exp.addSpectrum(new_spec) new_mz, new_int = [], [] curr_im = im new_mz.append(mz) new_int.append(intensity) return new_exp
def store_frame(frame_id, filename, q, verbose=False): """ Store a single frame as an individual mzML file Note that this is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. """ # Get a projected mass spectrum: q = conn.execute( "SELECT NumScans FROM Frames WHERE Id={0}".format(frame_id)) num_scans = q.fetchone()[0] # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) # Get a new MSExperiment from OpenMS e = pyopenms.MSExperiment() for k, scan in enumerate(td.readScans(frame_id, 0, num_scans)): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we set the "RT" to be the ion mobility dimension s = pyopenms.MSSpectrum() s.set_peaks((mz, intens)) s.setRT(ook0_axis[k]) e.addSpectrum(s) if verbose: print "scan ", k if len(mz) > 0 and verbose: print " len: ", len(mz), len(intens) print " at ook0", ook0_axis[k] print " len: ", mz, intens for p in s: print p.getMZ(), p.getIntensity() # Store file at designated position pyopenms.MzMLFile().store(filename, e)
def testMSSpectrum(self): spec = pyopenms.MSSpectrum() p = pyopenms.Peak1D() p.setMZ(500.0) p.setIntensity(1e5) spec.push_back(p) p_back, = list(spec) assert isinstance(p_back, pyopenms.Peak1D) assert p_back.getMZ() == 500.0 assert p_back.getIntensity() == 1e5 spec.updateRanges() assert isinstance(spec.getMinMZ(), float) assert isinstance(spec.getMaxMZ(), float) assert isinstance(spec.getMinIntensity(), float) assert isinstance(spec.getMaxIntensity(), float) assert spec.getMinIntensity() == 1e5 assert spec.getMaxIntensity() == 1e5
target = read_csv_faster(config.target) source = read_csv_faster(config.source) super_spectrum_mz = [] for i in target['feature']: super_spectrum_mz.append(float(i)) mz = [] for i in source['feature']: mz.append(float(i)) spectrums = [] based_intensity = np.transpose(source['mz_exp']) for i in range(based_intensity.shape[0]): peak_intensity = based_intensity[i] spectrum = pyopenms.MSSpectrum() spectrum.set_peaks([mz, peak_intensity]) spectrum.sortByPosition() spectrums.append(spectrum) super_spectrum_mz = np.array(super_spectrum_mz) print(super_spectrum_mz.shape) aligned_intensities = [] aligner = pyopenms.SpectrumAlignment() target_spectrum = pyopenms.MSSpectrum() target_spectrum.set_peaks( [super_spectrum_mz, np.zeros_like(super_spectrum_mz)]) target_spectrum.sortByPosition() for spectrum in spectrums:
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. """ # Get a projected mass spectrum: q = conn.execute("SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}".format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 mslevel = 1 if msms == 2: q = conn.execute("SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}".format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 if verbose: print "mslevel", mslevel, msms # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first for k, scan in reversed(list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis [k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for k in mz]) continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks( (mz, intens) ) s.setRT(time) p = pyopenms.Precursor() p.setDriftTime(drift_time) if msms == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) if compressFrame: mz = np.concatenate(allmz) intens = np.concatenate(allint) ims = np.concatenate(allim) # print " leeen", len(mz), len(intens) fda = pyopenms.FloatDataArray() fda.setName("Ion Mobility") fda.resize(len(mz)) for k,val in enumerate(ims): fda[k] = val sframe = pyopenms.MSSpectrum() sframe.setMSLevel(mslevel) sframe.setRT(time) sframe.setFloatDataArrays([fda]) p = pyopenms.Precursor() if msms == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) sframe.setPrecursors([p]) sframe.set_peaks( (mz, intens) ) sframe.sortByPosition() exp.consumeSpectrum(sframe)
def bin_spectrum(self, spec: ms.MSSpectrum) -> None: """Bins a single spectrum in two passes. Keyword arguments: spec: the spectrum to bin """ points = util.get_spectrum_points(spec) points.sort(key=itemgetter(3)) # Ascending IM temp_bins = [[], [[]]] # 2D spectra new_bins = [[], [[]]] # Final 1D spectra for i in range(self.num_bins): for j in range(2): temp_bins[j].append([]) new_bins[j].append([]) for i in range(len(points)): # Assign points to bins bin_idx = int((points[i][3] - self.im_start) / self.bin_size) if bin_idx >= self.num_bins: bin_idx = self.num_bins - 1 temp_bins[0][bin_idx].append(points[i]) bin_idx = int((points[i][3] - self.im_offset) / self.bin_size) + 1 if points[i][3] < self.im_offset: bin_idx = 0 elif bin_idx > self.num_bins: bin_idx = self.num_bins temp_bins[1][bin_idx].append(points[i]) for i in range(self.num_bins): # First pass if len(temp_bins[0][i]) == 0: continue temp_bins[0][i].sort(key=itemgetter(1)) # Ascending m/z mz_start, curr_mz = 0, temp_bins[0][i][0][1] running_intensity = 0 for j in range(len(temp_bins[0][i])): if self.within_epsilon(curr_mz, temp_bins[0][i][j][1]): running_intensity += temp_bins[0][i][j][2] else: # Reached a new m/z slice point = list( temp_bins[0][i][mz_start]) # Prevents aliasing point[2] = running_intensity new_bins[0][i].append(point) mz_start, curr_mz = j, temp_bins[0][i][j][1] running_intensity = temp_bins[0][i][j][2] point = list( temp_bins[0][i][mz_start]) # Take care of the last slice point[2] = running_intensity new_bins[0][i].append(point) transpose = list(zip(*new_bins[0][i])) new_spec = ms.MSSpectrum() # The final binned spectrum im_fda = ms.FloatDataArray() for im in transpose[3]: im_fda.push_back(im) new_spec.setRT(spec.getRT()) new_spec.set_peaks((list(transpose[1]), list(transpose[2]))) new_spec.setFloatDataArrays([im_fda]) self.exps[0][i].addSpectrum(new_spec) for i in range(self.num_bins + 1): # Second pass if len(temp_bins[1][i]) == 0: continue temp_bins[1][i].sort(key=itemgetter(1)) mz_start, curr_mz = 0, temp_bins[1][i][0][1] running_intensity = 0 for j in range(len(temp_bins[1][i])): if self.within_epsilon(curr_mz, temp_bins[1][i][j][1]): running_intensity += temp_bins[1][i][j][2] else: point = list(temp_bins[1][i][mz_start]) point[2] = running_intensity new_bins[1][i].append(point) mz_start, curr_mz = j, temp_bins[1][i][j][1] running_intensity = temp_bins[1][i][j][2] point = list(temp_bins[1][i][mz_start]) point[2] = running_intensity new_bins[1][i].append(point) transpose = list(zip(*new_bins[1][i])) new_spec = ms.MSSpectrum() im_fda = ms.FloatDataArray() for im in transpose[3]: im_fda.push_back(im) new_spec.setRT(spec.getRT()) new_spec.set_peaks((list(transpose[1]), list(transpose[2]))) new_spec.setFloatDataArrays([im_fda]) self.exps[1][i].addSpectrum(new_spec)
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. Note that msms = 2 means that we have an MS2 scan whereas msms = 8 stands for pasef scan. """ # Get a projected mass spectrum: q = conn.execute( "SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}". format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 next_scan_switch = -1 mslevel = 1 scan_data = [] scan_data_it = 0 # Check whether we have a MS2 or a PASEF scan if msms == 2: q = conn.execute( "SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}" .format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 elif msms == 8: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy FROM PasefFrameMsMsInfo WHERE Frame={0} ORDER BY IsolationMz ASC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_start mslevel = 2 if verbose: print "mslevel", mslevel, msms # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first for k, scan in reversed( list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis[k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for dr_time in mz]) # We have multiple MS2 spectra in each frame, we need to separate # them based on the information from PasefFrameMsMsInfo which # indicates the switch scan and the isolation parameter for each # quadrupole isolation. if next_scan_switch != -1 and next_scan_switch == k: if verbose: print "Switch to new scan at", k, "with mapping", scandata sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) exp.consumeSpectrum(sframe) allmz = [] allint = [] allim = [] if k == 0: continue scan_data_it += 1 tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_start continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks((mz, intens)) s.setRT(time) p = pyopenms.Precursor() p.setDriftTime(drift_time) if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) if compressFrame and next_scan_switch == -1: sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) exp.consumeSpectrum(sframe)
def pick_spectra(self, spec: ms.MSSpectrum, peak_radius: int = 1, window_radius: float = 0.015, pp_mode: str = 'int', min_int_mult: float = 0.10, strict: bool = True) -> ms.MSSpectrum(): """Peak picks a single spectrum. Keyword arguments: spec: the spectrum to peak pick peak_radius: the minimum peak radius of a peak set window_radius: the maximum m/z window radius of a peak set pp_mode: the mode to use ('ltr' or 'int') min_int_mult: a multiplier to the maximum peak intensity in a set (for differentiating between signal and noise) strict: if False, allow a single increase in intensity in either direction Returns: the peak picked spectrum. """ num_peaks = spec.size() spec.sortByPosition() peak_idx = [] # Intensity lookup table picked = [False] * num_peaks if pp_mode == 'int': for i in range(num_peaks): peak_idx.append([spec[i].getIntensity(), i]) peak_idx = sorted(peak_idx, reverse=True) picked_spec = ms.MSSpectrum() picked_spec.setMSLevel(1) picked_spec.setRT(spec.getRT()) for idx in range(num_peaks): # Begin peak picking i = idx if pp_mode == 'ltr' else peak_idx[idx][1] if picked[i]: continue init_intensity = spec[i].getIntensity() total_intensity = spec[i].getIntensity() init_position = spec[i].getPos() left_picked, right_picked = 0, 0 low_bound, high_bound = i, i sFlag = False # Flag for when strict is False threshold = peak_radius for j in range(i - 1, -1, -1): # Walk left if picked[j] or abs(spec[j].getPos() - init_position) > window_radius: break if spec[j].getIntensity() > spec[j + 1].getIntensity(): if strict or sFlag or j + 1 == i: # Don't start with an abnormal peak break sFlag = True threshold += 1 # End the peak set with a lower peak ("increase peak_radius") total_intensity += spec[j].getIntensity() left_picked += 1 low_bound -= 1 if left_picked >= threshold and spec[j].getIntensity() <= init_intensity * min_int_mult: break if left_picked < threshold: continue sFlag = False threshold = peak_radius for j in range(i + 1, num_peaks): # Walk right if picked[j] or abs(spec[j].getPos() - init_position) > window_radius: break if spec[j].getIntensity() > spec[j - 1].getIntensity(): if strict or sFlag or j - 1 == i: break sFlag = True threshold += 1 total_intensity += spec[j].getIntensity() right_picked += 1 high_bound += 1 if right_picked >= threshold and spec[j].getIntensity() <= init_intensity * min_int_mult: break if right_picked < threshold: continue total_position = 0 for j in range(low_bound, high_bound + 1): picked[j] = True if total_intensity != 0: total_position += spec[j].getPos() * (spec[j].getIntensity() / total_intensity) p = ms.Peak1D() p.setIntensity(total_intensity) p.setPos(total_position) picked_spec.push_back(p) return picked_spec
def testMSSpectrum(): """ @tests: MSSpectrum.clear MSSpectrum.clearMetaInfo MSSpectrum.findNearest MSSpectrum.getAcquisitionInfo MSSpectrum.getComment MSSpectrum.getDataProcessing MSSpectrum.getInstrumentSettings MSSpectrum.getKeys MSSpectrum.getMSLevel MSSpectrum.getMetaValue MSSpectrum.getName MSSpectrum.getNativeID MSSpectrum.getPeptideIdentifications MSSpectrum.getPrecursors MSSpectrum.getProducts MSSpectrum.getRT MSSpectrum.getSourceFile MSSpectrum.getType MSSpectrum.get_peaks MSSpectrum.intensityInRange MSSpectrum.isMetaEmpty MSSpectrum.metaValueExists MSSpectrum.push_back MSSpectrum.removeMetaValue MSSpectrum.setAcquisitionInfo MSSpectrum.setComment MSSpectrum.setDataProcessing MSSpectrum.setInstrumentSettings MSSpectrum.setMSLevel MSSpectrum.setMetaValue MSSpectrum.setName MSSpectrum.setNativeID MSSpectrum.setPeptideIdentifications MSSpectrum.setPrecursors MSSpectrum.setProducts MSSpectrum.setRT MSSpectrum.setSourceFile MSSpectrum.setType MSSpectrum.set_peaks MSSpectrum.size MSSpectrum.unify MSSpectrum.updateRanges MSSpectrum.__eq__ MSSpectrum.__ge__ MSSpectrum.__getitem__ MSSpectrum.__gt__ MSSpectrum.__le__ MSSpectrum.__lt__ MSSpectrum.__ne__ """ spec = pyopenms.MSSpectrum() _testMetaInfoInterface(spec) testSpectrumSetting(spec) spec.setRT(3.0) assert spec.getRT() == 3.0 spec.setMSLevel(2) assert spec.getMSLevel() == 2 spec.setName("spec") assert spec.getName() == "spec" p = pyopenms.Peak1D() p.setMZ(1000.0) p.setIntensity(200.0) spec.push_back(p) assert spec.size() == 1 assert spec[0] == p spec.updateRanges() assert isinstance(spec.findNearest(0.0), int) assert spec == spec assert not spec != spec assert spec.get_peaks().shape == (1, 2), spec.get_peaks().shape
def _plot(self, ax, color, top=True, verificative=True, show_iden=True, highlight=True): target = self if self.is_sorted() else self.sort_by_mz() if verificative: temp = target.clip().remove_precursor(copy=False).rank_transform( copy=False) mz, intensity = temp.mz, temp.intensity else: mz, intensity = target._binning(target.mz, target.intensity) rects = ax.bar(mz, intensity, width=1, color=color, linewidth=0, snap=False) if show_iden or highlight: identification = target.get_identification( ) if not self.override_iden else self.override_iden if identification: tpp_string = identification.to_tpp_string() title = identification.to_string(tpp_string) if self.override_iden: title = 'Overridden: ' + title if show_iden: y_loc = 0.9 if top else 0.1 ax.text(0.5, y_loc, title, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=12) if highlight: tspec = om.MSSpectrum() try: aa_sequence = om.AASequence.fromString(tpp_string) except Exception: logging.info( 'String "{}" cannot be understood by pyopenms.AASequence.' .format(tpp_string)) aa_sequence = None if aa_sequence: Spectrum.tsg.getSpectrum(tspec, aa_sequence, 1, identification.charge) ions_mz = tspec.get_peaks()[0].astype(np.uint32) annotations = tspec.getStringDataArrays()[0] temp_dict = {} colors = { 'b': 'blue', 'y': 'red', 'a': 'green', 'c': 'teal', 'x': 'purple', 'z': 'orange' } for i in range(len(ions_mz)): temp_dict[ions_mz[i]] = annotations[i] for i in range(len(mz)): if mz[i] in temp_dict.keys(): ion_name = temp_dict[mz[i]].decode() color = colors[ion_name[0]] rect = rects[i] height = rect.get_height() pos_x = rect.get_x() + rect.get_width() / 2. pos_y = height + 0.01 * (1 if height > 0 else -1) ax.text(pos_x, pos_y, ion_name, fontsize=6, ha='center', va='center', color=color) rect.set_color(color) return
def load(self, ifname: str, peakMap: pyopenms.MSExperiment): inF = open(ifname, 'r') lines = inF.read().splitlines() curLine = 0 nLines = len(lines) #generate spectrum list spectraList = list() while curLine < nLines: if lines[curLine] == 'BEGIN IONS': spectrum = pyopenms.MSSpectrum() spectrum.setMSLevel(2) precursor = pyopenms.Precursor() curLine += 1 while curLine < nLines: if lines[curLine][0].isalpha(): match = re.search('^([A-Z]+)=(.+)$', lines[curLine]) if match.group(1) == 'TITLE': titleData = match.group(2).split(',') for s in titleData: if re.search('^scan[_=]', s): match = re.search('^scan[_=]([0-9]+)', s) assert (len(match.groups()) == 1) spectrum.setNativeID('scan={}'.format( match.group(1))) elif match.group(1) == 'PEPMASS': preMZ = [ float(x) for x in match.group(2).split(' ') ] assert (len(preMZ) <= 2) precursor.setMZ(preMZ[0]) if len(preMZ) > 1: precursor.setIntensity(preMZ[1]) elif match.group(1) == 'CHARGE': match = re.search('^([0-9])[+-]{0,1}$', match.group(2)) assert (len(match.groups()) == 1) precursor.setCharge(int(match.group(1))) elif match.group(1) == 'RTINSECONDS': spectrum.setRT(float(match.group(2))) elif lines[curLine][0].isnumeric(): while curLine < nLines and lines[curLine] != 'END IONS': ion = [float(x) for x in lines[curLine].split(' ')] assert (len(ion) == 2) ion_temp = pyopenms.Peak1D() ion_temp.setMZ(ion[0]) ion_temp.setIntensity(ion[1]) spectrum.push_back(ion_temp) curLine += 1 break curLine += 1 spectrum.setPrecursors([precursor]) spectraList.append(spectrum) curLine += 1 peakMap.setSpectra(spectraList)
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True, keep_frames=False): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. Note that msms = 2 means that we have an MS2 scan whereas msms = 8 stands for pasef scan. (New tdf 5.1 has msms = 9 for pasef scan) """ # Get a projected mass spectrum: q = conn.execute( "SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}". format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 next_scan_switch = -1 mslevel = 1 scan_data = [] scan_data_it = 0 in_scan = False scandata = None # Check whether we have a MS2 or a PASEF scan if msms == 2: q = conn.execute( "SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}" .format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 # new tdf 5.1 has pasef scan msms = 9 elif msms == 9: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy, Frame FROM DiaFrameMsMsWindows INNER JOIN DiaFrameMsMsInfo ON DiaFrameMsMsWindows.WindowGroup = DiaFrameMsMsInfo.WindowGroup WHERE Frame={0} ORDER BY ScanNumBegin DESC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_end # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if next_scan_switch == num_scans: next_scan_switch = scan_start in_scan = True mslevel = 2 elif msms == 8: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy FROM PasefFrameMsMsInfo WHERE Frame={0} ORDER BY ScanNumBegin DESC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_end # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if next_scan_switch == num_scans: next_scan_switch = scan_start in_scan = True mslevel = 2 else: # MS1 pass if verbose: print("Frame", frame_id, "mslevel", mslevel, msms, "contains nr scans:", num_scans, "and nr pasef scans", len(scandata) if scandata else -1) print("Scandata for PASEF:", scandata) if keep_frames: next_scan_switch = -1 # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) nr_scans_created = 0 allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first (and high scan times first) for k, scan in reversed( list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis[k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for dr_time in mz]) # We have multiple MS2 spectra in each frame, we need to separate # them based on the information from PasefFrameMsMsInfo which # indicates the switch scan and the isolation parameter for each # quadrupole isolation. if next_scan_switch >= 0 and next_scan_switch >= k: if verbose: print("Switch to new scan at", k, "/", next_scan_switch, "store scan of size", len(allmz)) if in_scan: # Only store spectrum when actually inside a scan, skip the "between scan" pushes sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) sframe.setNativeID("frame=%s_scan=%s" % (frame_id, next_scan_switch)) exp.consumeSpectrum(sframe) nr_scans_created += 1 allmz = [] allint = [] allim = [] if k == 0: continue if in_scan: scan_data_it += 1 if scan_data_it >= len(scandata): if verbose: print( "LEFT the last scan, nothing else to do here") next_scan_switch = -2 continue # Already prepare for next scan tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) in_scan = False next_scan_switch = scan_end if verbose: print("LEAVING scan now, next scan starts at:", next_scan_switch) # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if k == next_scan_switch: if verbose: print("STARTING new scan immediately at", k, ":", center - width / 2.0, center + width / 2.0, "scan will end at:", next_scan_switch) next_scan_switch = scan_start in_scan = True else: in_scan = True next_scan_switch = scan_start if verbose: print("STARTING new scan at", k, ":", center - width / 2.0, center + width / 2.0, "scan will end at:", next_scan_switch) continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks((mz, intens)) s.setRT(time) s.setNativeID("frame=%s spec %s" % (frame_id, k)) p = pyopenms.Precursor() p.setDriftTime(drift_time) if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) # Store data compressed for cases where the whole frame represents a single spectrum (e.g. MS1) if compressFrame and next_scan_switch == -1: sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) sframe.setNativeID("frame=%s" % frame_id) exp.consumeSpectrum(sframe) nr_scans_created += 1 if scandata is not None and (nr_scans_created != len(scandata)): raise Exception("Something went quite wrong here, we expected", len(scandata), "scans, but only created", nr_scans_created)