def main(): if len(sys.argv) < 3: raise RuntimeError("need arguments: tdf_directory output.mzML") analysis_dir = sys.argv[1] output_fname = sys.argv[2] if sys.version_info.major == 2: analysis_dir = unicode(analysis_dir) td = timsdata.TimsData(analysis_dir) conn = td.conn # Get total frame count: q = conn.execute("SELECT COUNT(*) FROM Frames") row = q.fetchone() N = row[0] print("Analysis has {0} frames.".format(N)) # Store output if output_fname.lower().endswith("mzml"): consumer = pyopenms.PlainMSDataWritingConsumer(output_fname) # Compress output try: opt = consumer.getOptions() cfg = pyopenms.NumpressConfig() cfg.estimate_fixed_point = True cfg.numpressErrorTolerance = -1.0 # skip check, faster cfg.setCompression("linear"); cfg.linear_fp_mass_acc = -1; # set the desired RT accuracy in seconds opt.setNumpressConfigurationMassTime(cfg) cfg = pyopenms.NumpressConfig() cfg = pyopenms.NumpressConfig() cfg.estimate_fixed_point = True cfg.numpressErrorTolerance = -1.0 # skip check, faster cfg.setCompression("slof"); opt.setNumpressConfigurationIntensity(cfg) opt.setCompression(True) # zlib compression consumer.setOptions(opt) except Exception: pass if output_fname.lower().endswith("sqmass"): consumer = pyopenms.MSDataSqlConsumer(output_fname) for frame_id in range(N): store_frame(frame_id+1, td, conn, consumer, compressFrame=True)
def get_consumer(output_fname): # Store output if output_fname.lower().endswith("mzml"): consumer = pyopenms.PlainMSDataWritingConsumer(output_fname) # Compress output try: opt = consumer.getOptions() diapysef.util.setCompressionOptions(opt) consumer.setOptions(opt) except Exception as e: print(e) print( "Your version of pyOpenMS does not support any compression, your files may get rather large" ) pass elif output_fname.lower().endswith("sqmass"): consumer = pyopenms.MSDataSqlConsumer(output_fname) else: raise Exception("Supported filenames: mzML and sqMass.") return consumer
class FilteringConsumer: """ Consumer that forwards all calls the internal consumer (after filtering) """ def __init__(self, consumer): self._internal_consumer = consumer def setExperimentalSettings(self, s): self._internal_consumer.setExperimentalSettings(s) def setExpectedSize(self, a, b): self._internal_consumer.setExpectedSize(a, b) def consumeChromatogram(self, c): if c.getNativeID().find(filter_string) != -1: self._internal_consumer.consumeChromatogram(c) def consumeSpectrum(self, s): if s.getNativeID().find(filter_string) != -1: self._internal_consumer.consumeSpectrum(s) ################################### # Do the actual work ################################### consumer = pyopenms.PlainMSDataWritingConsumer(outfile) consumer = FilteringConsumer(consumer) pyopenms.MzMLFile().transform(infile, consumer)
def writeCombinedMzML(peptides, outputFile, templateFile, mzDelta, minIntensity): import pyopenms, copy print 'Writing combined mzML file' # generator for template file spectra print 'Opening template file' spectra = MZMLtoSpectrum(templateFile) # output_file = pyopenms.MzMLFile() # output_experiment = pyopenms.MSExperiment() # can't use usual writer - stores all data in memory before # writing to mzML at once ---> too memory intensive # use consumer to write spectra on the fly consumer = pyopenms.PlainMSDataWritingConsumer(outputFile) def replaceData(targetmz, mzs, ints, MZmin, MZmax, synthMZs, synthINTs, minIntensity, peptide): # # 1 remove synth data below threshold # maparray = np.where(synthINTs > minIntensity) # synthMZs = synthMZs[maparray] # synthINTs = synthINTs[maparray] # check some data is still present if synthMZs.shape[0] < 1: return mzs, ints noise = np.empty(0) maxSignal = 0 maxSN = 0 # section data into isotopes for i in range(20): i = float(i) / 2 isotopeMin = MZmin + i - 0.05 isotopeMax = MZmin + i + 0.3 if isotopeMin > MZmax: break # get synthetic data chuncks indexArray = np.where((synthMZs >= isotopeMin) & (synthMZs < isotopeMax)) isotopemzs = synthMZs[indexArray] isotopeints = synthINTs[indexArray] if isotopemzs.shape[0] < 1: #print 'no isotope peaks for peptide %s: mz: %s, rt: %s i-value: %s' %(peptide.index, peptide.newmz, peptide.newrt, i) #print 'isotopeMin: %s, isotopeMax: %s' %(isotopeMin, isotopeMax) peptide.write = False continue # get experimental data within isotope boundaries minIsotope = np.min(isotopemzs) maxIsotope = np.max(isotopemzs) indexArray = np.where((mzs > minIsotope) & (mzs < maxIsotope)) isotopeExptlMzs = mzs[indexArray] isotopeExptlInts = ints[indexArray] # match exptl data to nearest synth mz # and increment corresponding xynth int for i in range(isotopeExptlMzs.shape[0]): eMz = isotopeExptlMzs[i] index = np.argmin(np.absolute(synthMZs - eMz)) synthINTs[index] += isotopeExptlInts[i] overlap = np.where((mzs > minIsotope) & (mzs < maxIsotope)) # get overlapping ints noise = ints[overlap] signal = np.max(isotopeints) if signal > maxSignal: maxSignal = signal mzs = np.delete(mzs, overlap) ints = np.delete(ints, overlap) mzs = np.concatenate((mzs, synthMZs)) ints = np.concatenate((ints, synthINTs)) sortmap = np.argsort(mzs) mzs = mzs[sortmap] ints = ints[sortmap] return mzs, ints, maxSignal print 'writing spectra' for i, s in enumerate(spectra): if i % 100 == 0: print 'Processing spectrum: %s, RT: %.2f' % (i, s.rt / 60) mzs = s.mzs ints = s.ints for p in peptides: if s.rt > p.RTmax or s.rt < p.RTmin: continue for rti, rt in enumerate(p.synthRTs): if rt == s.rt: # be careful here synthMZs = p.synthMZs synthINTs1 = p.synthINTs[rti] synthINTs2 = copy.deepcopy(synthINTs1) # add light peak mzs, ints, signal = replaceData(p.newmz, mzs, ints, p.MZmin, p.MZmax, synthMZs, synthINTs1, minIntensity, p) if signal > p.lightsignal: p.lightsignal = signal # add heavy peak mzs, ints, signal = replaceData(p.newmz + mzDelta, mzs, ints, p.MZmin + mzDelta, p.MZmax + mzDelta, synthMZs + mzDelta, synthINTs2, minIntensity, p) if signal > p.heavysignal: p.heavysignal = signal new_spectrum = copy.deepcopy(s.original) new_spectrum.set_peaks((mzs, ints)) # use consumer to write new spectrum consumer.consumeSpectrum(new_spectrum) #output_file.store(outputFile, output_experiment) print 'done' return peptides