def convert(self, input: str): """ converts the given input and stores it at the defined postgres database location :param input: :return: """ finder = MSMSFinder() def callback(msms: PySpectrum, file_name: str): with db.atomic() as transaction: if msms is None: self.extract_record(file_name) else: # 3. load sample object record = MZMLSampleRecord.get( MZMLSampleRecord.file_name == file_name) # 3. associated msms spectra to it try: # 4. commit transaction highest = msms.highest_peaks(1)[0] spectra = msms.convert(msms, mode="raw").spectra precurosr = msms.selected_precursors[0] if len( msms.selected_precursors) > 0 else {} scannumber = msms.index splash = Splash().splash( Spectrum(spectra, SpectrumType.MS)) spectra = MZMLMSMSSpectraRecord.create( sample=record, msms=spectra, rt=msms.scan_time[0], splash=splash, scan_number=scannumber, level=msms.ms_level, base_peak=highest[0], base_peak_intensity=highest[1], precursor=precurosr['mz'] if 'mz' in precurosr else 0, precursor_intensity=precurosr['i'] if 'i' in precurosr else 0, precursor_charge=precurosr['charge'] if 'charge' in precurosr else 0, ion_count=len(msms.peaks("centroided"))) except IndexError as e: # not able to find highest peak pass finder.locate(msmsSource=input, callback=callback, filters=[MSMinLevelFilter(2)])
def test_encode_msms(source): finder = MSMSFinder() encoder = DualEncoder(intensity_max=1000, min_mz=0, max_mz=2000, directory="data/encoded") data = [] def callback(msms: Spectrum, file_name: str): nonlocal data if msms is not None: data.append(msms.convert(msms)) finder.locate(msmsSource=source, callback=callback, filters=[MSMinLevelFilter(2)]) from joblib import Parallel, delayed Parallel(n_jobs=multiprocessing.cpu_count())(delayed(encoder.encode)(x) for x in data)
def test_locate_with_msms_filter(source): """ :return: """ finder = MSMSFinder() count = 0 def callback(msms: Spectrum, file_name: str): nonlocal count count = count + 1 assert msms.ms_level > 1 finder.locate(msmsSource=source, callback=callback, filters=[MSMinLevelFilter(2)]) assert count > 0
def test_convert(source): """ :return: """ finder = MSMSFinder() count = 0 def callback(msms: Spectrum, file_name: str): nonlocal count count = count + 1 converted = msms.convert(msms) finder.locate(msmsSource=source, callback=callback, filters=[MSMinLevelFilter(2)]) assert count > 0
def convert(self, input: str, output: str): """ converts the given file to a csv file containing all msms information :param name: :return: """ finder = MSMSFinder() with open(output, "w+") as out: out.write("Level;Basepeak;Basepeak Intensity;Time;Splash;MSMS\n") def callback(msms: PySpectrum, file_name: str): if msms is not None: highest = msms.highest_peaks(1)[0] spectra = msms.convert(msms).spectra splash = Splash().splash(Spectrum(spectra, SpectrumType.MS)) out.write("{};{};{};{};{};{}\n".format( msms.ms_level, highest[0], highest[1], msms.scan_time[0], splash, spectra)) finder.locate(msmsSource=input, callback=callback, filters=[MSMinLevelFilter(2)])
min_mz=args.min_mz, max_mz=args.max_mz, directory="{}".format(args.destination)) data = [] def callback(msms: Spectrum, file_name: str): """ builds our data list :param msms: :param file_name: :return: """ data.append(msms.convert(msms)) try: finder.locate(msmsSource=file, callback=callback, filters=[MSMinLevelFilter(2)]) if len(data) > 0: from joblib import Parallel, delayed Parallel(n_jobs=multiprocessing.cpu_count())( delayed(encoder.encode)(spec=x, store_string=args.txt) for x in data) except ParseError: print("ignoring file: {}, due to format errors!".format(file)) print("processing was finished for {} files".format(counter))