def __init__(self, d_directory_location, analyzer='ICR', instrument_label='15T', auto_process=True, auto_noise=True, keep_profile=False): Thread.__init__(self) d_directory_location = Path(d_directory_location) if not d_directory_location.exists(): raise FileNotFoundError("File does not exist: " + str(d_directory_location)) self.scan_attr = d_directory_location / "scan.xml" if not self.scan_attr.exists(): raise FileExistsError( "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ maybe an Imaging experiment?\ please ReadBruker_SolarixTransientImage class for Imaging dataset " % d_directory_location) self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) self.auto_process = auto_process self.auto_noise = auto_noise self.keep_profile = keep_profile
def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'): """ # Parameters ---------- file_location: text, pathlib.Path(), or s3path.S3Path Path object from pathlib containing the file location """ if isinstance(file_location, str): # if obj is a string it defaults to create a Path obj, pass the S3Path if needed file_location = Path(file_location) if not file_location.exists(): raise FileNotFoundError("%s not found" % file_location) if not file_location.suffix == '.corems': raise TypeError("%s is not a valid CoreMS file" % file_location) Thread.__init__(self) ReadCoremsMasslist.__init__(self, file_location) self.lcms = LCMSBase(self.file_location, analyzer=analyzer, instrument_label=instrument_label)
def __init__(self, d_directory_location, analyzer='ICR', instrument_label='15T', auto_process=True, auto_noise=True, keep_profile=False): """ # Parameters ---------- file_location: text, pathlib.Path(), or s3path.S3Path Path object from pathlib containing the file location """ Thread.__init__(self) if isinstance(d_directory_location, str): # if obj is a string it defaults to create a Path obj, pass the S3Path if needed d_directory_location = Path(d_directory_location) if not d_directory_location.exists(): raise FileNotFoundError("File does not exist: " + str(d_directory_location)) self.scan_attr = d_directory_location / "scan.xml" if not self.scan_attr.exists(): raise FileExistsError("%s does not seem to be a valid Solarix Mass Spectra Experiment,\ maybe an Imaging experiment?\ please ReadBruker_SolarixTransientImage class for Imaging dataset " % d_directory_location) self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) self.auto_process = auto_process self.auto_noise = auto_noise self.keep_profile = keep_profile
def __init__(self, file_location): Thread.__init__(self) ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) self.lcms = LCMSBase(self.file_location) self.list_scans = sorted([int(i) for i in list(self.h5pydata.keys())])
def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'): if not Path(file_location).exists: raise FileNotFoundError("%s not found" % file_location) if not Path(file_location).suffix == '.corems': raise TypeError("%s is not a valid CoreMS file" % file_location) Thread.__init__(self) ReadCoremsMasslist.__init__(self, file_location) self.lcms = LCMSBase(self.file_location, analyzer=analyzer,instrument_label=instrument_label)
def __init__(self, file_location): Thread.__init__(self) self.iRawDataPlus = RawFileReaderAdapter.FileFactory(file_location) self.res = self.iRawDataPlus.SelectInstrument(0, 1) self.lcms = LCMSBase(file_location) self._initial_scan_number = self.iRawDataPlus.RunHeaderEx.FirstSpectrum self._final_scan_number = self.iRawDataPlus.RunHeaderEx.LastSpectrum self.file_location = file_location
def __init__(self, file_location): Thread.__init__(self) self.thermo_Library = CreateObject("MSFileReader.XRawfile") self.thermo_Library.open(file_location) self.res = self.thermo_Library.SetCurrentController(0, 1) self.check_load_success() self.LCMS = LCMSBase(file_location) self._initial_scan_number = 1 self._final_scan_number = self.get_scans_numbers() self.file_location = file_location
def __init__(self, file_location, auto_process=True): Thread.__init__(self) self.lcms = LCMSBase(file_location) """Set up the COM object interface""" self.Bruker_Library = CreateObject("EDAL.MSAnalysis") self.res = self.Bruker_Library.Open(file_location) self.check_load_sucess() self._initial_scan_number = 1 self._final_scan_number = self.get_scans_numbers() self.file_location = file_location self.auto_process = auto_process
class ReadCoreMSHDF_MassSpectra(ReadCoreMSHDF_MassSpectrum, Thread): def __init__(self, file_location): Thread.__init__(self) ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) self.lcms = LCMSBase(self.file_location) self.list_scans = sorted([int(i) for i in list(self.h5pydata.keys())]) def import_mass_spectra(self): list_rt, list_tic = list(), list() for scan_number in self.list_scans: mass_spec = self.get_mass_spectrum(scan_number) list_rt.append(mass_spec.retention_time) list_tic.append(mass_spec.tic) self.lcms.add_mass_spectrum(mass_spec) self.lcms.retention_time = list_rt self.lcms.tic = list_tic self.lcms.scans_number = self.list_scans def run(self): '''creates the lcms obj''' self.import_mass_spectra() def get_lcms_obj(self): if self.lcms: return self.lcms else: raise Exception("returning a empty lcms class")
def __init__(self, file_location, analyzer='ICR', instrument_label='21T', auto_process=True): ''' # Parameters ---------- ## file_location : Path or S3Path file_location is full data path ''' Thread.__init__(self) if isinstance(file_location, str): # if obj is a string it defaults to create a Path obj, pass the S3Path if needed self.file_location = Path(file_location) self.lcms = LCMSBase(file_location, analyzer=analyzer, instrument_label=instrument_label) if isinstance(file_location, S3Path): data = BytesIO(file_location.open('rb').read()) else: data = file_location self.hdf_obj = h5py.File(data, 'r') self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) self.initial_scan_number = self.list_scans[0] self.final_scan_number = self.list_scans[-1] self.file_location = file_location self.auto_process = True self.analyzer = analyzer self.instrument_label = instrument_label
def __init__(self, file_location, analyzer='ICR', instrument_label='21T', auto_process=True): Thread.__init__(self) self.lcms = LCMSBase(file_location, analyzer=analyzer,instrument_label=instrument_label) self.hdf_obj = h5py.File(file_location, 'r') self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) self.initial_scan_number = self.list_scans [0] self.final_scan_number = self.list_scans [-1] self.file_location = file_location self.auto_process = True self.analyzer = analyzer self.instrument_label = instrument_label
class ImportLCMSThermoMSFileReader(Thread): """ Read FULL mode spectra only from raw file data and store it return a LC-MS class * Default behavior is to load all scans numbers * set start_scan_number and final_scan_number to change it before calling start(), or run() """ def __init__(self, file_location): Thread.__init__(self) self.iRawDataPlus = RawFileReaderAdapter.FileFactory(file_location) self.res = self.iRawDataPlus.SelectInstrument(0, 1) self.lcms = LCMSBase(file_location) self._initial_scan_number = self.iRawDataPlus.RunHeaderEx.FirstSpectrum self._final_scan_number = self.iRawDataPlus.RunHeaderEx.LastSpectrum self.file_location = file_location @property def initial_scan_number(self): return self._initial_scan_number @property def final_scan_number(self): return self._final_scan_number def run(self): '''thread will automatically process mass spectrum use the get_mass_spectra class to import without processing mass spectrum''' d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters) # return self.lcms def get_mass_spectra(self, auto_process=True): d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters, auto_process=auto_process) return self.lcms def check_load_success(self): """ 0 if successful; otherwise, see Error Codes on MSFileReader Manual """ if self.res == 0: self.break_it = False return True else: raise ImportError(str(self.res)) def get_filter_for_scan_num(self, scan_number): """Returns the closest matching run time that corresponds to scan_number for the current controller. This function is only supported for MS device controllers. e.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]'] """ scan_label = self.iRawDataPlus.GetScanEventStringForScanNumber( scan_number) return str(scan_label).split() def check_full_scan(self, scan_number): # scan_filter.ScanMode 0 = FULL scan_filter = self.iRawDataPlus.GetFilterForScanNumber(scan_number) return scan_filter.ScanMode == 0 def get_polarity_mode(self, scan_number): polarity_symbol = self.get_filter_for_scan_num(scan_number)[1] if polarity_symbol == "+": return 1 # return "POSITIVE_ION_MODE" elif polarity_symbol == "-": return -1 else: raise Exception("Polarity Mode Unknown, please set it manually") def get_scan_header(self, scan): ''' Get full dictionary of scan header meta data, i.e. AGC status, ion injection time, etc. ''' header = self.iRawDataPlus.GetTrailerExtraInformation(scan) header_dic = {} for i in numpy.arange(header.Length): header_dic.update({header.Labels[i]: header.Values[i]}) return header_dic def get_data(self, scan, d_parameter, scan_type): if scan_type == "Centroid": centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False) noise = list(centroidStream.Noises) baselines = list(centroidStream.Baselines) rp = list(centroidStream.Resolutions) magnitude = list(centroidStream.Intensities) mz = list(centroidStream.Masses) # charge = scans_labels[5] array_noise_std = (numpy.array(noise) - numpy.array(baselines)) / 3 l_signal_to_noise = numpy.array(magnitude) / array_noise_std d_parameter["baselise_noise_std"] = numpy.average(array_noise_std) d_parameter["baselise_noise_std_std"] = numpy.average( array_noise_std) data_dict = { Labels.mz: mz, Labels.abundance: magnitude, Labels.rp: rp, Labels.s2n: l_signal_to_noise, } else: scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan) profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan, scanStatistics) magnitude = list(profileStream.Intensities) mz = list(profileStream.Positions) data_dict = { Labels.mz: mz, Labels.abundance: magnitude, } return data_dict def is_profile_scan_for_scan_num(self, scan_number): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) isCentroid = scanStatistics.IsCentroidScan return bool(not isCentroid) def get_summed_mass_spectrum(self, initial_scan_number, final_scan_number=None, auto_process=True, pd_method=True, pd_merge_n=100): d_params = default_parameters(self.file_location) # assumes scans is full scan or reduced profile scan d_params["label"] = Labels.thermo_profile if type(initial_scan_number) is list: d_params["polarity"] = self.get_polarity_mode( initial_scan_number[0]) scanrange = initial_scan_number else: d_params["polarity"] = self.get_polarity_mode(initial_scan_number) if final_scan_number == None: final_scan_number = self._final_scan_number scanrange = range(initial_scan_number, final_scan_number + 1) if pd_method: def sort_sum_df(df): """ Nested function to sort dataframe and sum rows with exact matching indexes (m/z) """ df = df.sort_index() df = df.groupby(level=0).sum() return df # initialise empty Pandas series big_df = pd.Series(index=[], dtype='float64') for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) tmp_df = pd.Series(index=list(segmentedScan.Positions), dtype='float64', data=list(segmentedScan.Intensities)) big_df = big_df.append(tmp_df) #this allows you to merge/sum the values earlier, however it slows down a lot #limited benefit unless running into memory issues #for complex data it is necessary to stop the iterations getting too slow if scan_number % pd_merge_n == 0: big_df = sort_sum_df(big_df) big_df = sort_sum_df(big_df) data_dict = { Labels.mz: list(big_df.index.values), Labels.abundance: list(big_df.values), } else: all_mz = dict() for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) len_data = segmentedScan.Positions.Length for i in range(len_data): mz = segmentedScan.Positions[i] abundance = segmentedScan.Intensities[i] if mz in all_mz: all_mz[mz] = all_mz[mz] + abundance else: all_mz[mz] = abundance mz_all = [] abun_all = [] for mz in sorted(all_mz): mz_all.append(mz) abun_all.append(all_mz[mz]) data_dict = { Labels.mz: mz_all, Labels.abundance: abun_all, } print('Summed. Now Processing.') mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) return mass_spec def _import_mass_spectra(self, d_params, auto_process=True): # if self.check_load_success(): """get number of scans""" list_Tics = list() list_RetentionTimeSeconds = list() list_scans = list() for scan_number in range(self.initial_scan_number, self.final_scan_number + 1): "only import FULL scans it ignores all others" scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) d_params["label"] = Labels.thermo_profile d_params["polarity"] = self.get_polarity_mode(scan_number) d_params["rt"] = self.iRawDataPlus.RetentionTimeFromScanNumber( scan_number) d_params["scan_number"] = scan_number list_RetentionTimeSeconds.append(d_params.get("rt")) list_Tics.append(scanStatistics.TIC) list_scans.append(scan_number) if self.check_full_scan(scan_number): data_dict = self.get_data(scan_number, d_params, "Profile") print("loading profile scan number: ", scan_number) mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) self.lcms.add_mass_spectrum(mass_spec) else: data_dict = self.get_data(scan_number, d_params, "Centroid") print("loading centroid scan number: ", scan_number) mass_spec = MassSpecCentroid(data_dict, d_params) self.lcms.add_mass_spectrum(mass_spec) #pool = multiprocessing.Pool(5) #result = pool.starmap(MassSpecCentroid, results) # for ms in result: # self.lcms.add_mass_spectrum(ms) self.lcms.retention_time(list_RetentionTimeSeconds) self.lcms.tic = list_Tics self.lcms.scans_number = list_scans def get_lcms(self): """get_lc_ms_class method should only be used when using this class as a Thread, otherwise use the run() method to return the LCMS class""" if self.lcms.get(self._initial_scan_number): return self.lcms else: self.run() if self.lcms.get(self._initial_scan_number): return self.lcms else: raise Exception("returning a empty LCMS class") def get_tic(self, plot=False): """ Reads the TIC values for each scan from the Thermo headers Returns a pandas dataframe of Scans, TICs, and Times (Optionally) plots the TIC chromatogram. """ first_scan = self._initial_scan_number final_scan = self._final_scan_number scanrange = range(first_scan, final_scan + 1) ms_tic = pd.DataFrame(index=scanrange, columns=['TIC', 'Time']) for scan in scanrange: scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan) ms_tic.loc[scan, 'TIC'] = scanStatistics.TIC ms_tic.loc[scan, 'Time'] = scanStatistics.StartTime if plot: import matplotlib.pyplot as plt #maybe better in top of file? fig, ax = plt.subplots(figsize=(6, 3)) ax.plot(ms_tic['Time'], ms_tic['TIC']) ax.set_xlabel('Time (min)') ax.set_ylabel('TIC') plt.show() return ms_tic, fig return ms_tic def get_best_scans_idx(self, stdevs=2, method='mean', plot=False): ''' Method to determine the best scan indexes for selective co-addition Based on calculating the mean (default) of the TIC values and setting an upper limit above/below that within X standard deviations. Mean or median makes limited difference, it seems. Empirically, 1-2 stdevs enough to filter out the worst datapoints. Optionally, plot the TIC with horizontal lines for the standard dev cutoffs. ''' tic = self.get_tic() if method == 'median': tic_median = tic['TIC'].median() elif method == 'mean': tic_median = tic['TIC'].mean() else: print("Method " + print(str(method)) + " undefined") tic_std = tic['TIC'].std() upperlimit = tic_median - (stdevs * tic_std) lowerlimit = tic_median + (stdevs * tic_std) tic_filtered = tic[(tic['TIC'] > upperlimit) & (tic['TIC'] < lowerlimit)] scans = list(tic_filtered.index.values) if plot: import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(8, 4)) ax.plot(tic['Time'], tic['TIC']) ax.axhline(y=upperlimit, c='r') ax.axhline(y=lowerlimit, c='r') return fig, scans else: return scans
class ImportLCMSBrukerCompassXtract(Thread): '''class docs''' def __init__(self, file_location, auto_process=True): Thread.__init__(self) self.lcms = LCMSBase(file_location) """Set up the COM object interface""" self.Bruker_Library = CreateObject("EDAL.MSAnalysis") self.res = self.Bruker_Library.Open(file_location) self.check_load_sucess() self._initial_scan_number = 1 self._final_scan_number = self.get_scans_numbers() self.file_location = file_location self.auto_process = auto_process @property def initial_scan_number(self): return self._initial_scan_number @property def final_scan_number(self): return self._final_scan_number def check_scan(self, scan): scan_numbers = self.get_scans_numbers() return scan <= scan_numbers @initial_scan_number.setter def initial_scan_number(self, initial_scan_number): if self.check_scan(initial_scan_number): self._initial_scan_number = initial_scan_number else: raise Exception("startscan and finalscan should be less than %s" % self.get_scans_numbers()) @final_scan_number.setter def final_scan_number(self, final_scan_number): if self.check_scan(final_scan_number): self._final_scan_number = final_scan_number else: raise Exception("startscan and finalscan should be less than %s" % self.get_scans_numbers()) def get_scans_numbers(self): scan_numbers = self.Bruker_Library.MSSpectrumCollection.Count return scan_numbers def get_polarity_mode(self, spectrum): polarity_symbol = spectrum.Polarity if polarity_symbol == 0: return 1 # return "POSITIVE_ION_MODE" elif polarity_symbol == 1: return -1 # return "NEGATIVE_ION_MODE" else: raise IOError("Could not read mass spectrum polarity mode") def check_load_sucess(self): """ 0 if successful; otherwise, see Error Codes """ if self.res == 0: self.break_it = False else: raise ImportError(str(self.res)) def get_bruker_tics(self): strAnalysisData = BSTR("SumIntensity") if self.Bruker_Library.HasAnalysisData(strAnalysisData): tics_array = self.Bruker_Library.GetAnalysisData(strAnalysisData) tics_array = array(tics_array) return tics_array def get_bruker_retention_time(self): strAnalysisData = BSTR("RetentionTime") if self.Bruker_Library.HasAnalysisData(strAnalysisData): tics_array = self.Bruker_Library.GetAnalysisData(strAnalysisData) tics_array = array(tics_array) else: tics_array = [0] return tics_array @staticmethod def get_data(spectra, scan): """init_variable_from_get_spectrums # massList set up later #retention_time = spectrum.RetentionTime """ spectrum = spectra[scan] is_profile = c_long(1) masslist = spectrum.GetMassIntensityValues(is_profile) # index_to_cut = self.find_index_of_mass(1200, masslist[0]) data_dict = { Labels.mz: array(masslist[0]), Labels.abundance: array(masslist[1]), Labels.rp: None, Labels.s2n: None, } return data_dict def run(self): '''creates the lcms obj''' d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters) def _import_mass_spectra(self, d_params): spectra = self.Bruker_Library.MSSpectrumCollection list_rt = self.get_bruker_retention_time() list_Tics = self.get_bruker_tics() list_scans = list() for scan_number in range(self.initial_scan_number, self.final_scan_number + 1): if spectra[scan_number].MSMSStage == 1: # this label needs to go inside a encapsulation class for consistence d_params["label"] = Labels.bruker_profile d_params["polarity"] = self.get_polarity_mode( spectra[scan_number]) d_params["rt"] = list_rt[scan_number - 1] d_params["scan_number"] = scan_number list_scans.append(scan_number) data_dict = self.get_data(spectra, scan_number) mass_spec = MassSpecProfile(data_dict, d_params, auto_process=self.auto_process) mass_spec.process_mass_spec() self.lcms.add_mass_spectrum(mass_spec) self.lcms.retention_time = list_rt self.lcms.tic = list_Tics self.lcms.scans_number = list_scans # return each_mass_spectrum def get_lcms_obj(self): """get_lc_ms_class method should only be used when using this class as a Thread, otherwise use the run() method to return the lcms class""" if self.lcms.get(self._initial_scan_number): return self.lcms else: raise Exception("returning a empty lcms class")
class ReadBruker_SolarixTransientMassSpectra(Thread): '''class docs''' def __init__(self, d_directory_location, analyzer='ICR', instrument_label='15T', auto_process=True, auto_noise=True, keep_profile=False): Thread.__init__(self) d_directory_location = Path(d_directory_location) if not d_directory_location.exists(): raise FileNotFoundError("File does not exist: " + str(d_directory_location)) self.scan_attr = d_directory_location / "scan.xml" if not self.scan_attr.exists(): raise FileExistsError( "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ maybe an Imaging experiment?\ please ReadBruker_SolarixTransientImage class for Imaging dataset " % d_directory_location) self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) self.auto_process = auto_process self.auto_noise = auto_noise self.keep_profile = keep_profile def get_scan_attr(self): from bs4 import BeautifulSoup soup = BeautifulSoup(self.scan_attr.open(), 'xml') list_rt = [float(rt.text) for rt in soup.find_all('minutes')] list_tic = [float(tic.text) for tic in soup.find_all('tic')] list_scan = [int(scan.text) for scan in soup.find_all('count')] dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) return dict_scan_rt_tic def import_mass_spectra(self): dict_scan_rt_tic = self.get_scan_attr() list_rt, list_tic = list(), list(), list_scans = sorted(list(dict_scan_rt_tic.keys())) for scan_number in list_scans: mass_spec = self.get_mass_spectrum(scan_number) self.lcms.add_mass_spectrum(mass_spec) list_rt.append(dict_scan_rt_tic.get(scan_number)[0]) list_tic.append(dict_scan_rt_tic.get(scan_number)[1]) self.lcms.retention_time = list_rt self.lcms.tic = list_tic self.lcms.scans_number = list_scans def get_mass_spectrum(self, scan_number): bruker_reader = ReadBrukerSolarix(self.lcms.file_location) bruker_transient = bruker_reader.get_transient(scan_number) mass_spec = bruker_transient.get_mass_spectrum( plot_result=False, auto_process=self.auto_process, keep_profile=self.keep_profile, auto_noise=self.auto_noise) return mass_spec def run(self): '''creates the lcms obj''' self.import_mass_spectra() def get_lcms_obj(self): if self.lcms: return self.lcms else: raise Exception("returning a empty lcms class")
class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread): def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'): """ # Parameters ---------- file_location: text, pathlib.Path(), or s3path.S3Path Path object from pathlib containing the file location """ if isinstance(file_location, str): # if obj is a string it defaults to create a Path obj, pass the S3Path if needed file_location = Path(file_location) if not file_location.exists(): raise FileNotFoundError("%s not found" % file_location) if not file_location.suffix == '.corems': raise TypeError("%s is not a valid CoreMS file" % file_location) Thread.__init__(self) ReadCoremsMasslist.__init__(self, file_location) self.lcms = LCMSBase(self.file_location, analyzer=analyzer, instrument_label=instrument_label) def get_scans_filename(self): all_other = self.file_location.glob('*_scan*[!.json]') scans_filepath = [(file_path_obj.stem.split('scan')[1], file_path_obj) for file_path_obj in all_other] scans_filepath.sort(key=lambda m: int(m[0])) return scans_filepath def set_filepath_datatype_and_delimiter(self, file_path_obj): self.file_location = file_path_obj if file_path_obj.suffix == '.pkl': self.data_type == 'dataframe' else: if file_path_obj.suffix == '.csv': self.data_type == 'txt' self.delimiter = ',' elif file_path_obj.suffix == '.xlsx': self.data_type == 'excel' self.delimiter = ',' elif file_path_obj.suffix == '.txt': self.data_type == 'txt' self.delimiter = '\t' print('WARNING using tab as delimiter') else: raise NotImplementedError('%s data not yet supported ' % file_path_obj.suffix) def import_mass_spectra(self): list_rt, list_tic, list_scan = list(), list(), list() for scan_number, file_path_obj in self.get_scans_filename(): self.set_filepath_datatype_and_delimiter(file_path_obj) mass_spec = self.get_mass_spectrum(int(scan_number)) list_scan.append(int(scan_number)) list_rt.append(mass_spec.retention_time) list_tic.append(mass_spec.tic) self.lcms.add_mass_spectrum(mass_spec) self.lcms.retention_time = list_rt self.lcms.tic_list = list_tic self.lcms.scans_number = list_scan def run(self): '''creates the lcms obj''' self.import_mass_spectra() def get_lcms_obj(self): if self.lcms: return self.lcms else: raise Exception("returning a empty lcms class")
class ImportLCMSThermoMSFileReader(Thread): """ Read FULL and PROFILE (it ignores all other scans) raw file data and store it return a LCMS class * Default behavior is to load all scans numbers * set start_scan_number and final_scan_number to change it before calling start(), or run() * Noise threshold will break the mass_spec.process_mass_spec() if the method in the MassSpecSetting class is set to something other than Relative Abundance (it needs to be fixed to work with all methods) """ def __init__(self, file_location): Thread.__init__(self) self.thermo_Library = CreateObject("MSFileReader.XRawfile") self.thermo_Library.open(file_location) self.res = self.thermo_Library.SetCurrentController(0, 1) self.check_load_success() self.LCMS = LCMSBase(file_location) self._initial_scan_number = 1 self._final_scan_number = self.get_scans_numbers() self.file_location = file_location @property def initial_scan_number(self): return self._initial_scan_number @property def final_scan_number(self): return self._final_scan_number def check_scan(self, scan): scan_numbers = self.get_scans_numbers() return scan <= scan_numbers @initial_scan_number.setter def initial_scan_number(self, start_scan_number): if self.check_scan(start_scan_number): self._initial_scan_number = start_scan_number else: raise Exception( "startscan and finalscan should be less than %s" % self.get_scans_numbers() ) @final_scan_number.setter def final_scan_number(self, final_scan_number): if self.check_scan(final_scan_number): self._final_scan_number = final_scan_number else: raise Exception( "startscan and finalscan should be less than %s" % self.get_scans_numbers() ) def run(self): '''thread will automatically process mass spectrum use the get_mass_spectra class to import without processing mass spectrum''' d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters) # return self.LCMS def get_mass_spectra(self,auto_process=True): d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters, auto_process=auto_process) return self.LCMS def check_load_success(self): """ 0 if successful; otherwise, see Error Codes on MSFileReader Manual """ if self.res == 0: self.break_it = False return True else: raise ImportError(str(self.res)) def get_filter_for_scan_num(self, scan_number): """Returns the closest matching run time that corresponds to scan_number for the current controller. This function is only supported for MS device controllers. e.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]'] """ str_filter = BSTR(None) error = self.thermo_Library.GetFilterForScanNum(scan_number, byref(str_filter)) if error: raise IOError( "scan %i GetFilterForScanNum error : %s" % (scan_number, str(error)) ) else: return str(str_filter.value).split() def check_full_scan(self, scan_number): scan_mode_symbol = self.get_filter_for_scan_num(scan_number)[4] return scan_mode_symbol == "Full" def get_polarity_mode(self, scan_number): polarity_symbol = self.get_filter_for_scan_num(scan_number)[1] if polarity_symbol == "+": return 1 # return "POSITIVE_ION_MODE" elif polarity_symbol == "-": return -1 else: raise Exception("Polarity Mode Unknown, please set it manually") def get_data(self, scan, d_parameter): scan = c_long(scan) pvarLabels = VARIANT() pvarFlags = VARIANT() self.thermo_Library.GetLabelData(pvarLabels, pvarFlags, scan) scans_labels = numpy.array(pvarLabels.value) mz = scans_labels[0] magnitude = scans_labels[1] rp = scans_labels[2] base_noise = scans_labels[3] noise = scans_labels[4] # charge = scans_labels[5] array_noise_std = (numpy.array(noise) - numpy.array(base_noise)) / 3 l_signal_to_noise = numpy.array(magnitude) / array_noise_std d_parameter["baselise_noise"] = numpy.average(array_noise_std) d_parameter["baselise_noise_std"] = numpy.average(array_noise_std) data_dict = { Labels.mz: mz, Labels.abundance: magnitude, Labels.rp: rp, Labels.s2n: l_signal_to_noise, } return data_dict def get_scans_numbers(self): nScans = c_long() self.thermo_Library.GetNumSpectra(nScans) return int(nScans.value) def get_ScanHeaderInfoForScanNum(self, scan_number): nScanNumber = c_long(scan_number) # get info for the twelfth scan nPackets = c_long(0) dRetantionTime = c_double(0.0) dLowMass = c_double(0.0) dHighMass = c_double(0.0) dTIC = c_double(0.0) dBasePeakMass = c_double(0.0) dBasePeakIntensity = c_double(0.0) nChannels = c_long(0) bUniformTime = c_long(False) dFrequency = c_double(0.0) self.thermo_Library.GetScanHeaderInfoForScanNum( nScanNumber, nPackets, dRetantionTime, dLowMass, dHighMass, dTIC, dBasePeakMass, dBasePeakIntensity, nChannels, bUniformTime, dFrequency, ) return dRetantionTime.value, dTIC.value def is_profile_scan_for_scan_num(self, scan_number): IsProfileScan = c_long() error = self.thermo_Library.IsProfileScanForScanNum( c_long(scan_number), byref(IsProfileScan) ) if error: raise IOError("IsProfileScanForScanNum error :", error) # print (IsProfileScan.value, bool(1)) return bool(IsProfileScan.value) def _import_mass_spectra(self, d_params, auto_process=True): results = [] # Each_Mass_Spectrum = namedtuple('each_mass_spectrum', ['mass_list', 'abundance_list', 'retention_time', 'scan_number', 'tic_number']) if self.check_load_success(): """get number of scans""" list_Tics = list() list_RetentionTimeSeconds = list() list_scans = list() """key = scan_number or retention time""" # print(self.initial_scan_number, self.final_scan_number) for scan_number in range( self.initial_scan_number, self.final_scan_number + 1 ): #print(scan_number) # scan_number = scan_number + 1 "only import FULL scans and Profile Mode, it ignores all others" if self.check_full_scan(scan_number): if self.is_profile_scan_for_scan_num(scan_number): d_params["label"] = Labels.thermo_centroid d_params["polarity"] = self.get_polarity_mode(scan_number) d_params["rt"], TIC = self.get_ScanHeaderInfoForScanNum( scan_number ) d_params["scan_number"] = scan_number list_RetentionTimeSeconds.append(d_params.get("rt")) list_Tics.append(TIC) list_scans.append(scan_number) data_dict = self.get_data(scan_number, d_params) #results.append((data, d_params)) mass_spec = MassSpecCentroid(data_dict, d_params) self.LCMS.add_mass_spectrum(mass_spec) #pool = multiprocessing.Pool(5) #result = pool.starmap(MassSpecCentroid, results) #for ms in result: #self.LCMS.add_mass_spectrum(ms) self.LCMS.retention_time = list_RetentionTimeSeconds self.LCMS.set_tic_list(list_Tics) self.LCMS.set_scans_number_list(list_scans) def get_lcms(self): """get_lc_ms_class method should only be used when using this class as a Thread, otherwise use the run() method to return the LCMS class""" if self.LCMS.get(self._initial_scan_number): return self.LCMS else: raise Exception("returning a empty LCMS class")
class ReadHDF_BoosterMassSpectra(Thread): '''class docs''' def __init__(self, file_location, analyzer='ICR', instrument_label='21T', auto_process=True): Thread.__init__(self) self.lcms = LCMSBase(file_location, analyzer=analyzer,instrument_label=instrument_label) self.hdf_obj = h5py.File(file_location, 'r') self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) self.initial_scan_number = self.list_scans [0] self.final_scan_number = self.list_scans [-1] self.file_location = file_location self.auto_process = True self.analyzer = analyzer self.instrument_label = instrument_label def get_polarity(self, file_location, scan): self.h5pydata = h5py.File(file_location, 'r') self.scans = list(self.h5pydata.keys()) polarity = self.get_attr_data(scan,'r_h_polarity') if polarity == 'negative scan': return -1 else: return +1 def get_attr_data(self, scan, attr_srt): return self.hdf_obj[str(scan)].attrs[attr_srt] def import_mass_spectra(self, d_params): list_rt, list_tic = list(), list() for scan_number in self.list_scans: d_params["rt"] = list_rt.append(self.get_attr_data(scan_number, 'r_h_start_time')) d_params["scan_number"] = scan_number d_params['label'] = Labels.booster_profile d_params["polarity"] = self.get_polarity(self.file_location, scan_number) d_params["Aterm"] = self.get_attr_data(scan_number, 'r_cparams')[0] d_params["Bterm"] = self.get_attr_data(scan_number, 'r_cparams')[1] d_params['analyzer'] = self.analyzer d_params['instrument_label'] = self.instrument_label list_rt.append(d_params["rt"]) list_tic.append(self.get_attr_data(scan_number, 'r_h_tic')) mass_spec = self.get_mass_spectrum(scan_number, d_params) self.lcms.add_mass_spectrum(mass_spec) self.lcms.retention_time = list_rt self.lcms.tic = list_tic self.lcms.scans_number = self.list_scans def get_mass_spectrum(self, scan, d_params): booster_data = self.hdf_obj[str(scan)] if booster_data.shape[0] is not 2: raise NotImplementedError('opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum') else: data_dict = { Labels.mz: booster_data[0], Labels.abundance: booster_data[1], Labels.rp: None, Labels.s2n: None, } mass_spec = MassSpecProfile(data_dict, d_params, auto_process=self.auto_process) return mass_spec def run(self): '''creates the lcms obj''' d_parameters = default_parameters(self.file_location) self.import_mass_spectra(d_parameters) def get_lcms_obj(self): if self.lcms.get(self.initial_scan_number): return self.lcms else: raise Exception("returning a empty lcms class")