Exemple #1
0
    def __init__(self,
                 d_directory_location,
                 analyzer='ICR',
                 instrument_label='15T',
                 auto_process=True,
                 auto_noise=True,
                 keep_profile=False):

        Thread.__init__(self)

        d_directory_location = Path(d_directory_location)

        if not d_directory_location.exists():
            raise FileNotFoundError("File does not exist: " +
                                    str(d_directory_location))

        self.scan_attr = d_directory_location / "scan.xml"
        if not self.scan_attr.exists():
            raise FileExistsError(
                "%s does not seem to be a valid Solarix Mass Spectra Experiment,\
                                maybe an Imaging experiment?\
                                please ReadBruker_SolarixTransientImage class for Imaging dataset "
                % d_directory_location)

        self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label)

        self.auto_process = auto_process
        self.auto_noise = auto_noise
        self.keep_profile = keep_profile
Exemple #2
0
    def __init__(self,
                 file_location,
                 analyzer='Unknown',
                 instrument_label='Unknown'):
        """
         # Parameters
		----------
        file_location: text,  pathlib.Path(), or s3path.S3Path 
            Path object from pathlib containing the file location
        """

        if isinstance(file_location, str):
            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
            file_location = Path(file_location)

        if not file_location.exists():
            raise FileNotFoundError("%s not found" % file_location)

        if not file_location.suffix == '.corems':

            raise TypeError("%s is not a valid CoreMS file" % file_location)

        Thread.__init__(self)

        ReadCoremsMasslist.__init__(self, file_location)

        self.lcms = LCMSBase(self.file_location,
                             analyzer=analyzer,
                             instrument_label=instrument_label)
Exemple #3
0
    def __init__(self, d_directory_location, analyzer='ICR', instrument_label='15T', 
                       auto_process=True, auto_noise=True, keep_profile=False):
        """
         # Parameters
		----------
        file_location: text,  pathlib.Path(), or s3path.S3Path 
            Path object from pathlib containing the file location
        """
        Thread.__init__(self)
        
        if  isinstance(d_directory_location, str):
			# if obj is a string it defaults to create a Path obj, pass the S3Path if needed
            d_directory_location = Path(d_directory_location)
        
        
        if not d_directory_location.exists():
            raise FileNotFoundError("File does not exist: " + str(d_directory_location))
        
        self.scan_attr = d_directory_location / "scan.xml"
        
        if not self.scan_attr.exists():
            raise FileExistsError("%s does not seem to be a valid Solarix Mass Spectra Experiment,\
                                maybe an Imaging experiment?\
                                please ReadBruker_SolarixTransientImage class for Imaging dataset " % d_directory_location)

        self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label)

        self.auto_process = auto_process
        self.auto_noise = auto_noise
        self.keep_profile = keep_profile
Exemple #4
0
    def __init__(self, file_location):
        
        Thread.__init__(self)
        
        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
        
        self.lcms = LCMSBase(self.file_location)

        self.list_scans =  sorted([int(i) for i in list(self.h5pydata.keys())])
Exemple #5
0
 def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'):
     
     if not Path(file_location).exists:
         raise FileNotFoundError("%s not found" % file_location)
     
     if not Path(file_location).suffix == '.corems':
         
         raise TypeError("%s is not a valid CoreMS file" % file_location)
     
     Thread.__init__(self)
     
     ReadCoremsMasslist.__init__(self, file_location)
     
     self.lcms = LCMSBase(self.file_location, analyzer=analyzer,instrument_label=instrument_label)
Exemple #6
0
    def __init__(self, file_location):

        Thread.__init__(self)

        self.iRawDataPlus = RawFileReaderAdapter.FileFactory(file_location)

        self.res = self.iRawDataPlus.SelectInstrument(0, 1)

        self.lcms = LCMSBase(file_location)

        self._initial_scan_number = self.iRawDataPlus.RunHeaderEx.FirstSpectrum

        self._final_scan_number = self.iRawDataPlus.RunHeaderEx.LastSpectrum

        self.file_location = file_location
Exemple #7
0
    def __init__(self, file_location):

        Thread.__init__(self)

        self.thermo_Library = CreateObject("MSFileReader.XRawfile")

        self.thermo_Library.open(file_location)

        self.res = self.thermo_Library.SetCurrentController(0, 1)

        self.check_load_success()

        self.LCMS = LCMSBase(file_location)

        self._initial_scan_number = 1

        self._final_scan_number = self.get_scans_numbers()

        self.file_location = file_location
Exemple #8
0
    def __init__(self, file_location, auto_process=True):

        Thread.__init__(self)

        self.lcms = LCMSBase(file_location)
        """Set up the COM object interface"""
        self.Bruker_Library = CreateObject("EDAL.MSAnalysis")

        self.res = self.Bruker_Library.Open(file_location)

        self.check_load_sucess()

        self._initial_scan_number = 1

        self._final_scan_number = self.get_scans_numbers()

        self.file_location = file_location

        self.auto_process = auto_process
Exemple #9
0
class ReadCoreMSHDF_MassSpectra(ReadCoreMSHDF_MassSpectrum, Thread):
    
    def __init__(self, file_location):
        
        Thread.__init__(self)
        
        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
        
        self.lcms = LCMSBase(self.file_location)

        self.list_scans =  sorted([int(i) for i in list(self.h5pydata.keys())])

    def import_mass_spectra(self):
        
        list_rt, list_tic = list(), list()
        
        for scan_number in self.list_scans:
            
            mass_spec = self.get_mass_spectrum(scan_number)

            list_rt.append(mass_spec.retention_time)

            list_tic.append(mass_spec.tic)
            
            self.lcms.add_mass_spectrum(mass_spec)

        self.lcms.retention_time = list_rt
        self.lcms.tic = list_tic
        self.lcms.scans_number = self.list_scans
    
    def run(self):
        '''creates the lcms obj'''

        self.import_mass_spectra()
            
    def get_lcms_obj(self):
        
        if self.lcms:
            return self.lcms
        else:
            raise Exception("returning a empty lcms class")
Exemple #10
0
    def __init__(self,
                 file_location,
                 analyzer='ICR',
                 instrument_label='21T',
                 auto_process=True):
        '''
		 # Parameters
		----------
		## file_location : Path or S3Path
        file_location is full data path
		'''
        Thread.__init__(self)

        if isinstance(file_location, str):
            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
            self.file_location = Path(file_location)

        self.lcms = LCMSBase(file_location,
                             analyzer=analyzer,
                             instrument_label=instrument_label)

        if isinstance(file_location, S3Path):
            data = BytesIO(file_location.open('rb').read())
        else:
            data = file_location

        self.hdf_obj = h5py.File(data, 'r')

        self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())])

        self.initial_scan_number = self.list_scans[0]

        self.final_scan_number = self.list_scans[-1]

        self.file_location = file_location

        self.auto_process = True

        self.analyzer = analyzer

        self.instrument_label = instrument_label
Exemple #11
0
    def __init__(self, file_location, analyzer='ICR', instrument_label='21T', auto_process=True):

        Thread.__init__(self)

        self.lcms = LCMSBase(file_location, analyzer=analyzer,instrument_label=instrument_label)

        self.hdf_obj =  h5py.File(file_location, 'r')

        self.list_scans =  sorted([int(i) for i in list(self.hdf_obj.keys())])

        self.initial_scan_number = self.list_scans [0]

        self.final_scan_number = self.list_scans [-1]

        self.file_location = file_location

        self.auto_process = True

        self.analyzer = analyzer

        self.instrument_label = instrument_label
Exemple #12
0
class ImportLCMSThermoMSFileReader(Thread):
    """     Read FULL mode spectra only from raw file data and store it return a LC-MS class
    *  Default behavior is to load all scans numbers

    *  set start_scan_number  and final_scan_number to change it before calling start(), or run()
    """
    def __init__(self, file_location):

        Thread.__init__(self)

        self.iRawDataPlus = RawFileReaderAdapter.FileFactory(file_location)

        self.res = self.iRawDataPlus.SelectInstrument(0, 1)

        self.lcms = LCMSBase(file_location)

        self._initial_scan_number = self.iRawDataPlus.RunHeaderEx.FirstSpectrum

        self._final_scan_number = self.iRawDataPlus.RunHeaderEx.LastSpectrum

        self.file_location = file_location

    @property
    def initial_scan_number(self):
        return self._initial_scan_number

    @property
    def final_scan_number(self):
        return self._final_scan_number

    def run(self):
        '''thread will automatically process mass spectrum
        use the get_mass_spectra class to import without processing mass spectrum'''

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters)

        # return self.lcms

    def get_mass_spectra(self, auto_process=True):

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters, auto_process=auto_process)
        return self.lcms

    def check_load_success(self):
        """ 0 if successful; otherwise, see Error Codes on MSFileReader Manual """
        if self.res == 0:

            self.break_it = False
            return True
        else:

            raise ImportError(str(self.res))

    def get_filter_for_scan_num(self, scan_number):
        """Returns the closest matching run time that corresponds to scan_number for the current
        controller. This function is only supported for MS device controllers.
        e.g.  ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']
        """
        scan_label = self.iRawDataPlus.GetScanEventStringForScanNumber(
            scan_number)

        return str(scan_label).split()

    def check_full_scan(self, scan_number):
        # scan_filter.ScanMode 0 = FULL
        scan_filter = self.iRawDataPlus.GetFilterForScanNumber(scan_number)

        return scan_filter.ScanMode == 0

    def get_polarity_mode(self, scan_number):

        polarity_symbol = self.get_filter_for_scan_num(scan_number)[1]

        if polarity_symbol == "+":

            return 1
            # return "POSITIVE_ION_MODE"

        elif polarity_symbol == "-":

            return -1

        else:

            raise Exception("Polarity Mode Unknown, please set it manually")

    def get_scan_header(self, scan):
        '''
        Get full dictionary of scan header meta data, i.e. AGC status, ion injection time, etc.
        '''
        header = self.iRawDataPlus.GetTrailerExtraInformation(scan)
        header_dic = {}
        for i in numpy.arange(header.Length):
            header_dic.update({header.Labels[i]: header.Values[i]})
        return header_dic

    def get_data(self, scan, d_parameter, scan_type):

        if scan_type == "Centroid":

            centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False)

            noise = list(centroidStream.Noises)

            baselines = list(centroidStream.Baselines)

            rp = list(centroidStream.Resolutions)

            magnitude = list(centroidStream.Intensities)

            mz = list(centroidStream.Masses)

            # charge = scans_labels[5]
            array_noise_std = (numpy.array(noise) - numpy.array(baselines)) / 3
            l_signal_to_noise = numpy.array(magnitude) / array_noise_std

            d_parameter["baselise_noise_std"] = numpy.average(array_noise_std)

            d_parameter["baselise_noise_std_std"] = numpy.average(
                array_noise_std)

            data_dict = {
                Labels.mz: mz,
                Labels.abundance: magnitude,
                Labels.rp: rp,
                Labels.s2n: l_signal_to_noise,
            }

        else:

            scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan)

            profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                scan, scanStatistics)

            magnitude = list(profileStream.Intensities)

            mz = list(profileStream.Positions)

            data_dict = {
                Labels.mz: mz,
                Labels.abundance: magnitude,
            }

        return data_dict

    def is_profile_scan_for_scan_num(self, scan_number):

        scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
            scan_number)

        isCentroid = scanStatistics.IsCentroidScan

        return bool(not isCentroid)

    def get_summed_mass_spectrum(self,
                                 initial_scan_number,
                                 final_scan_number=None,
                                 auto_process=True,
                                 pd_method=True,
                                 pd_merge_n=100):

        d_params = default_parameters(self.file_location)

        # assumes scans is full scan or reduced profile scan

        d_params["label"] = Labels.thermo_profile

        if type(initial_scan_number) is list:
            d_params["polarity"] = self.get_polarity_mode(
                initial_scan_number[0])

            scanrange = initial_scan_number
        else:
            d_params["polarity"] = self.get_polarity_mode(initial_scan_number)

            if final_scan_number == None:
                final_scan_number = self._final_scan_number

            scanrange = range(initial_scan_number, final_scan_number + 1)

        if pd_method:

            def sort_sum_df(df):
                """
                Nested function to sort dataframe and sum rows with exact matching indexes (m/z)
                """
                df = df.sort_index()
                df = df.groupby(level=0).sum()
                return df

            # initialise empty Pandas series
            big_df = pd.Series(index=[], dtype='float64')

            for scan_number in tqdm(scanrange):
                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)
                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                tmp_df = pd.Series(index=list(segmentedScan.Positions),
                                   dtype='float64',
                                   data=list(segmentedScan.Intensities))
                big_df = big_df.append(tmp_df)

                #this allows you to merge/sum the values earlier, however it slows down a lot
                #limited benefit unless running into memory issues
                #for complex data it is necessary to stop the iterations getting too slow
                if scan_number % pd_merge_n == 0:
                    big_df = sort_sum_df(big_df)

            big_df = sort_sum_df(big_df)
            data_dict = {
                Labels.mz: list(big_df.index.values),
                Labels.abundance: list(big_df.values),
            }

        else:
            all_mz = dict()

            for scan_number in tqdm(scanrange):

                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)

                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                len_data = segmentedScan.Positions.Length

                for i in range(len_data):

                    mz = segmentedScan.Positions[i]
                    abundance = segmentedScan.Intensities[i]

                    if mz in all_mz:
                        all_mz[mz] = all_mz[mz] + abundance
                    else:
                        all_mz[mz] = abundance

            mz_all = []
            abun_all = []

            for mz in sorted(all_mz):
                mz_all.append(mz)
                abun_all.append(all_mz[mz])

            data_dict = {
                Labels.mz: mz_all,
                Labels.abundance: abun_all,
            }

        print('Summed. Now Processing.')

        mass_spec = MassSpecProfile(data_dict,
                                    d_params,
                                    auto_process=auto_process)

        return mass_spec

    def _import_mass_spectra(self, d_params, auto_process=True):

        # if self.check_load_success():
        """get number of scans"""

        list_Tics = list()

        list_RetentionTimeSeconds = list()

        list_scans = list()

        for scan_number in range(self.initial_scan_number,
                                 self.final_scan_number + 1):

            "only import FULL scans it ignores all others"

            scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                scan_number)

            d_params["label"] = Labels.thermo_profile

            d_params["polarity"] = self.get_polarity_mode(scan_number)

            d_params["rt"] = self.iRawDataPlus.RetentionTimeFromScanNumber(
                scan_number)

            d_params["scan_number"] = scan_number

            list_RetentionTimeSeconds.append(d_params.get("rt"))

            list_Tics.append(scanStatistics.TIC)

            list_scans.append(scan_number)

            if self.check_full_scan(scan_number):

                data_dict = self.get_data(scan_number, d_params, "Profile")

                print("loading profile scan number: ", scan_number)

                mass_spec = MassSpecProfile(data_dict,
                                            d_params,
                                            auto_process=auto_process)

                self.lcms.add_mass_spectrum(mass_spec)

            else:

                data_dict = self.get_data(scan_number, d_params, "Centroid")

                print("loading centroid scan number: ", scan_number)

                mass_spec = MassSpecCentroid(data_dict, d_params)

                self.lcms.add_mass_spectrum(mass_spec)

        #pool = multiprocessing.Pool(5)
        #result = pool.starmap(MassSpecCentroid, results)
        # for ms in result:
        # self.lcms.add_mass_spectrum(ms)

        self.lcms.retention_time(list_RetentionTimeSeconds)
        self.lcms.tic = list_Tics
        self.lcms.scans_number = list_scans

    def get_lcms(self):
        """get_lc_ms_class method should only be used when using this class as a Thread, 
        otherwise use the run() method to return the LCMS class"""

        if self.lcms.get(self._initial_scan_number):
            return self.lcms
        else:
            self.run()

            if self.lcms.get(self._initial_scan_number):

                return self.lcms
            else:
                raise Exception("returning a empty LCMS class")

    def get_tic(self, plot=False):
        """
        Reads the TIC values for each scan from the Thermo headers
        Returns a pandas dataframe of Scans, TICs, and Times
        (Optionally) plots the TIC chromatogram.
        """
        first_scan = self._initial_scan_number
        final_scan = self._final_scan_number
        scanrange = range(first_scan, final_scan + 1)

        ms_tic = pd.DataFrame(index=scanrange, columns=['TIC', 'Time'])
        for scan in scanrange:
            scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan)
            ms_tic.loc[scan, 'TIC'] = scanStatistics.TIC
            ms_tic.loc[scan, 'Time'] = scanStatistics.StartTime

        if plot:
            import matplotlib.pyplot as plt  #maybe better in top of file?
            fig, ax = plt.subplots(figsize=(6, 3))
            ax.plot(ms_tic['Time'], ms_tic['TIC'])
            ax.set_xlabel('Time (min)')
            ax.set_ylabel('TIC')
            plt.show()
            return ms_tic, fig
        return ms_tic

    def get_best_scans_idx(self, stdevs=2, method='mean', plot=False):
        '''
        Method to determine the best scan indexes for selective co-addition
        Based on calculating the mean (default) of the TIC values
        and setting an upper limit above/below that within X standard deviations.
        Mean or median makes limited difference, it seems.
        Empirically, 1-2 stdevs enough to filter out the worst datapoints.
        Optionally, plot the TIC with horizontal lines for the standard dev cutoffs.
        '''
        tic = self.get_tic()

        if method == 'median':
            tic_median = tic['TIC'].median()
        elif method == 'mean':
            tic_median = tic['TIC'].mean()
        else:
            print("Method " + print(str(method)) + " undefined")

        tic_std = tic['TIC'].std()

        upperlimit = tic_median - (stdevs * tic_std)
        lowerlimit = tic_median + (stdevs * tic_std)

        tic_filtered = tic[(tic['TIC'] > upperlimit)
                           & (tic['TIC'] < lowerlimit)]
        scans = list(tic_filtered.index.values)

        if plot:
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(figsize=(8, 4))
            ax.plot(tic['Time'], tic['TIC'])
            ax.axhline(y=upperlimit, c='r')
            ax.axhline(y=lowerlimit, c='r')
            return fig, scans
        else:
            return scans
Exemple #13
0
class ImportLCMSBrukerCompassXtract(Thread):
    '''class docs'''
    def __init__(self, file_location, auto_process=True):

        Thread.__init__(self)

        self.lcms = LCMSBase(file_location)
        """Set up the COM object interface"""
        self.Bruker_Library = CreateObject("EDAL.MSAnalysis")

        self.res = self.Bruker_Library.Open(file_location)

        self.check_load_sucess()

        self._initial_scan_number = 1

        self._final_scan_number = self.get_scans_numbers()

        self.file_location = file_location

        self.auto_process = auto_process

    @property
    def initial_scan_number(self):
        return self._initial_scan_number

    @property
    def final_scan_number(self):
        return self._final_scan_number

    def check_scan(self, scan):

        scan_numbers = self.get_scans_numbers()
        return scan <= scan_numbers

    @initial_scan_number.setter
    def initial_scan_number(self, initial_scan_number):
        if self.check_scan(initial_scan_number):
            self._initial_scan_number = initial_scan_number
        else:
            raise Exception("startscan and finalscan should be less than %s" %
                            self.get_scans_numbers())

    @final_scan_number.setter
    def final_scan_number(self, final_scan_number):

        if self.check_scan(final_scan_number):
            self._final_scan_number = final_scan_number
        else:
            raise Exception("startscan and finalscan should be less than %s" %
                            self.get_scans_numbers())

    def get_scans_numbers(self):

        scan_numbers = self.Bruker_Library.MSSpectrumCollection.Count

        return scan_numbers

    def get_polarity_mode(self, spectrum):

        polarity_symbol = spectrum.Polarity

        if polarity_symbol == 0:

            return 1
            # return "POSITIVE_ION_MODE"

        elif polarity_symbol == 1:

            return -1
            # return "NEGATIVE_ION_MODE"

        else:

            raise IOError("Could not read mass spectrum polarity mode")

    def check_load_sucess(self):
        """ 0 if successful; otherwise, see Error Codes """

        if self.res == 0:

            self.break_it = False

        else:

            raise ImportError(str(self.res))

    def get_bruker_tics(self):

        strAnalysisData = BSTR("SumIntensity")

        if self.Bruker_Library.HasAnalysisData(strAnalysisData):

            tics_array = self.Bruker_Library.GetAnalysisData(strAnalysisData)

            tics_array = array(tics_array)

        return tics_array

    def get_bruker_retention_time(self):

        strAnalysisData = BSTR("RetentionTime")

        if self.Bruker_Library.HasAnalysisData(strAnalysisData):

            tics_array = self.Bruker_Library.GetAnalysisData(strAnalysisData)

            tics_array = array(tics_array)
        else:
            tics_array = [0]

        return tics_array

    @staticmethod
    def get_data(spectra, scan):
        """init_variable_from_get_spectrums
        # massList set up later
        #retention_time = spectrum.RetentionTime
        """

        spectrum = spectra[scan]

        is_profile = c_long(1)

        masslist = spectrum.GetMassIntensityValues(is_profile)

        # index_to_cut = self.find_index_of_mass(1200, masslist[0])

        data_dict = {
            Labels.mz: array(masslist[0]),
            Labels.abundance: array(masslist[1]),
            Labels.rp: None,
            Labels.s2n: None,
        }

        return data_dict

    def run(self):
        '''creates the lcms obj'''
        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters)

    def _import_mass_spectra(self, d_params):

        spectra = self.Bruker_Library.MSSpectrumCollection

        list_rt = self.get_bruker_retention_time()

        list_Tics = self.get_bruker_tics()

        list_scans = list()

        for scan_number in range(self.initial_scan_number,
                                 self.final_scan_number + 1):

            if spectra[scan_number].MSMSStage == 1:
                # this label needs to go inside a encapsulation class for consistence
                d_params["label"] = Labels.bruker_profile

                d_params["polarity"] = self.get_polarity_mode(
                    spectra[scan_number])

                d_params["rt"] = list_rt[scan_number - 1]

                d_params["scan_number"] = scan_number

                list_scans.append(scan_number)

                data_dict = self.get_data(spectra, scan_number)

                mass_spec = MassSpecProfile(data_dict,
                                            d_params,
                                            auto_process=self.auto_process)

                mass_spec.process_mass_spec()

                self.lcms.add_mass_spectrum(mass_spec)

        self.lcms.retention_time = list_rt
        self.lcms.tic = list_Tics
        self.lcms.scans_number = list_scans
        # return each_mass_spectrum

    def get_lcms_obj(self):
        """get_lc_ms_class method should only be used when using this class as a Thread, 
        otherwise use the run() method to return the lcms class"""

        if self.lcms.get(self._initial_scan_number):
            return self.lcms
        else:
            raise Exception("returning a empty lcms class")
Exemple #14
0
class ReadBruker_SolarixTransientMassSpectra(Thread):
    '''class docs'''
    def __init__(self,
                 d_directory_location,
                 analyzer='ICR',
                 instrument_label='15T',
                 auto_process=True,
                 auto_noise=True,
                 keep_profile=False):

        Thread.__init__(self)

        d_directory_location = Path(d_directory_location)

        if not d_directory_location.exists():
            raise FileNotFoundError("File does not exist: " +
                                    str(d_directory_location))

        self.scan_attr = d_directory_location / "scan.xml"
        if not self.scan_attr.exists():
            raise FileExistsError(
                "%s does not seem to be a valid Solarix Mass Spectra Experiment,\
                                maybe an Imaging experiment?\
                                please ReadBruker_SolarixTransientImage class for Imaging dataset "
                % d_directory_location)

        self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label)

        self.auto_process = auto_process
        self.auto_noise = auto_noise
        self.keep_profile = keep_profile

    def get_scan_attr(self):

        from bs4 import BeautifulSoup

        soup = BeautifulSoup(self.scan_attr.open(), 'xml')

        list_rt = [float(rt.text) for rt in soup.find_all('minutes')]
        list_tic = [float(tic.text) for tic in soup.find_all('tic')]
        list_scan = [int(scan.text) for scan in soup.find_all('count')]

        dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic)))

        return dict_scan_rt_tic

    def import_mass_spectra(self):

        dict_scan_rt_tic = self.get_scan_attr()

        list_rt, list_tic = list(), list(),

        list_scans = sorted(list(dict_scan_rt_tic.keys()))

        for scan_number in list_scans:

            mass_spec = self.get_mass_spectrum(scan_number)

            self.lcms.add_mass_spectrum(mass_spec)

            list_rt.append(dict_scan_rt_tic.get(scan_number)[0])

            list_tic.append(dict_scan_rt_tic.get(scan_number)[1])

        self.lcms.retention_time = list_rt
        self.lcms.tic = list_tic
        self.lcms.scans_number = list_scans

    def get_mass_spectrum(self, scan_number):

        bruker_reader = ReadBrukerSolarix(self.lcms.file_location)

        bruker_transient = bruker_reader.get_transient(scan_number)

        mass_spec = bruker_transient.get_mass_spectrum(
            plot_result=False,
            auto_process=self.auto_process,
            keep_profile=self.keep_profile,
            auto_noise=self.auto_noise)

        return mass_spec

    def run(self):
        '''creates the lcms obj'''
        self.import_mass_spectra()

    def get_lcms_obj(self):

        if self.lcms:
            return self.lcms
        else:
            raise Exception("returning a empty lcms class")
Exemple #15
0
class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread):
    def __init__(self,
                 file_location,
                 analyzer='Unknown',
                 instrument_label='Unknown'):
        """
         # Parameters
		----------
        file_location: text,  pathlib.Path(), or s3path.S3Path 
            Path object from pathlib containing the file location
        """

        if isinstance(file_location, str):
            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
            file_location = Path(file_location)

        if not file_location.exists():
            raise FileNotFoundError("%s not found" % file_location)

        if not file_location.suffix == '.corems':

            raise TypeError("%s is not a valid CoreMS file" % file_location)

        Thread.__init__(self)

        ReadCoremsMasslist.__init__(self, file_location)

        self.lcms = LCMSBase(self.file_location,
                             analyzer=analyzer,
                             instrument_label=instrument_label)

    def get_scans_filename(self):

        all_other = self.file_location.glob('*_scan*[!.json]')

        scans_filepath = [(file_path_obj.stem.split('scan')[1], file_path_obj)
                          for file_path_obj in all_other]

        scans_filepath.sort(key=lambda m: int(m[0]))

        return scans_filepath

    def set_filepath_datatype_and_delimiter(self, file_path_obj):

        self.file_location = file_path_obj

        if file_path_obj.suffix == '.pkl':

            self.data_type == 'dataframe'

        else:

            if file_path_obj.suffix == '.csv':
                self.data_type == 'txt'
                self.delimiter = ','

            elif file_path_obj.suffix == '.xlsx':
                self.data_type == 'excel'
                self.delimiter = ','

            elif file_path_obj.suffix == '.txt':
                self.data_type == 'txt'
                self.delimiter = '\t'
                print('WARNING using tab as delimiter')
            else:
                raise NotImplementedError('%s data not yet supported ' %
                                          file_path_obj.suffix)

    def import_mass_spectra(self):

        list_rt, list_tic, list_scan = list(), list(), list()

        for scan_number, file_path_obj in self.get_scans_filename():

            self.set_filepath_datatype_and_delimiter(file_path_obj)

            mass_spec = self.get_mass_spectrum(int(scan_number))

            list_scan.append(int(scan_number))

            list_rt.append(mass_spec.retention_time)

            list_tic.append(mass_spec.tic)

            self.lcms.add_mass_spectrum(mass_spec)

        self.lcms.retention_time = list_rt
        self.lcms.tic_list = list_tic
        self.lcms.scans_number = list_scan

    def run(self):
        '''creates the lcms obj'''

        self.import_mass_spectra()

    def get_lcms_obj(self):

        if self.lcms:

            return self.lcms

        else:

            raise Exception("returning a empty lcms class")
Exemple #16
0
class ImportLCMSThermoMSFileReader(Thread):
    
    """     Read FULL and PROFILE (it ignores all other scans) raw file data and store it return a LCMS class
    *  Default behavior is to load all scans numbers

    *  set start_scan_number  and final_scan_number to change it before calling start(), or run()

    *  Noise threshold will break the mass_spec.process_mass_spec() if the method in the
    MassSpecSetting class is set to something other than Relative Abundance
    (it needs to be fixed to work with all methods)
    """

    def __init__(self, file_location):

        Thread.__init__(self)

        self.thermo_Library = CreateObject("MSFileReader.XRawfile")

        self.thermo_Library.open(file_location)

        self.res = self.thermo_Library.SetCurrentController(0, 1)

        self.check_load_success()

        self.LCMS = LCMSBase(file_location)

        self._initial_scan_number = 1

        self._final_scan_number = self.get_scans_numbers()

        self.file_location = file_location

    @property
    def initial_scan_number(self):
        return self._initial_scan_number

    @property
    def final_scan_number(self):
        return self._final_scan_number

    def check_scan(self, scan):

        scan_numbers = self.get_scans_numbers()
        return scan <= scan_numbers

    @initial_scan_number.setter
    def initial_scan_number(self, start_scan_number):
        if self.check_scan(start_scan_number):
            self._initial_scan_number = start_scan_number
        else:
            raise Exception(
                "startscan and finalscan should be less than %s"
                % self.get_scans_numbers()
            )

    @final_scan_number.setter
    def final_scan_number(self, final_scan_number):

        if self.check_scan(final_scan_number):
            self._final_scan_number = final_scan_number
        else:
            raise Exception(
                "startscan and finalscan should be less than %s"
                % self.get_scans_numbers()
            )

    
    def run(self):
        '''thread will automatically process mass spectrum
        use the get_mass_spectra class to import without processing mass spectrum'''

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters)

        # return self.LCMS

    def get_mass_spectra(self,auto_process=True):

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters, auto_process=auto_process)
        return self.LCMS

    def check_load_success(self):
        """ 0 if successful; otherwise, see Error Codes on MSFileReader Manual """
        if self.res == 0:

            self.break_it = False
            return True
        else:

            raise ImportError(str(self.res))

    def get_filter_for_scan_num(self, scan_number):
        """Returns the closest matching run time that corresponds to scan_number for the current
        controller. This function is only supported for MS device controllers.
        e.g.  ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']
        """
        str_filter = BSTR(None)
        error = self.thermo_Library.GetFilterForScanNum(scan_number, byref(str_filter))
        if error:
            raise IOError(
                "scan %i GetFilterForScanNum error : %s" % (scan_number, str(error))
            )
        else:
            return str(str_filter.value).split()

    def check_full_scan(self, scan_number):

        scan_mode_symbol = self.get_filter_for_scan_num(scan_number)[4]

        return scan_mode_symbol == "Full"

    def get_polarity_mode(self, scan_number):

        polarity_symbol = self.get_filter_for_scan_num(scan_number)[1]

        if polarity_symbol == "+":

            return 1
            # return "POSITIVE_ION_MODE"

        elif polarity_symbol == "-":

            return -1

        else:

            raise Exception("Polarity Mode Unknown, please set it manually")

    def get_data(self, scan, d_parameter):

        scan = c_long(scan)
        pvarLabels = VARIANT()
        pvarFlags = VARIANT()

        self.thermo_Library.GetLabelData(pvarLabels, pvarFlags, scan)
        scans_labels = numpy.array(pvarLabels.value)

        mz = scans_labels[0]
        magnitude = scans_labels[1]
        rp = scans_labels[2]
        base_noise = scans_labels[3]
        noise = scans_labels[4]
        # charge = scans_labels[5]

        array_noise_std = (numpy.array(noise) - numpy.array(base_noise)) / 3
        l_signal_to_noise = numpy.array(magnitude) / array_noise_std

        d_parameter["baselise_noise"] = numpy.average(array_noise_std)

        d_parameter["baselise_noise_std"] = numpy.average(array_noise_std)

        data_dict = {
            Labels.mz: mz,
            Labels.abundance: magnitude,
            Labels.rp: rp,
            Labels.s2n: l_signal_to_noise,
        }

        return data_dict

    def get_scans_numbers(self):

        nScans = c_long()
        self.thermo_Library.GetNumSpectra(nScans)

        return int(nScans.value)

    def get_ScanHeaderInfoForScanNum(self, scan_number):

        nScanNumber = c_long(scan_number)  # get info for the twelfth scan
        nPackets = c_long(0)
        dRetantionTime = c_double(0.0)
        dLowMass = c_double(0.0)
        dHighMass = c_double(0.0)
        dTIC = c_double(0.0)
        dBasePeakMass = c_double(0.0)
        dBasePeakIntensity = c_double(0.0)
        nChannels = c_long(0)
        bUniformTime = c_long(False)
        dFrequency = c_double(0.0)
        self.thermo_Library.GetScanHeaderInfoForScanNum(
            nScanNumber,
            nPackets,
            dRetantionTime,
            dLowMass,
            dHighMass,
            dTIC,
            dBasePeakMass,
            dBasePeakIntensity,
            nChannels,
            bUniformTime,
            dFrequency,
        )

        return dRetantionTime.value, dTIC.value

    def is_profile_scan_for_scan_num(self, scan_number):

        IsProfileScan = c_long()
        error = self.thermo_Library.IsProfileScanForScanNum(
            c_long(scan_number), byref(IsProfileScan)
        )
        if error:
            raise IOError("IsProfileScanForScanNum error :", error)
        # print (IsProfileScan.value, bool(1))
        return bool(IsProfileScan.value)

    def _import_mass_spectra(self, d_params, auto_process=True):
        results = []
        # Each_Mass_Spectrum = namedtuple('each_mass_spectrum', ['mass_list', 'abundance_list', 'retention_time', 'scan_number', 'tic_number'])

        if self.check_load_success():

            """get number of scans"""

            list_Tics = list()

            list_RetentionTimeSeconds = list()

            list_scans = list()

            """key = scan_number or retention time"""
            # print(self.initial_scan_number, self.final_scan_number)
            for scan_number in range(
                self.initial_scan_number, self.final_scan_number + 1
            ):
                #print(scan_number)
                # scan_number = scan_number + 1

                "only import FULL scans and Profile Mode, it ignores all others"

                if self.check_full_scan(scan_number):

                    if self.is_profile_scan_for_scan_num(scan_number):

                        d_params["label"] = Labels.thermo_centroid

                        d_params["polarity"] = self.get_polarity_mode(scan_number)

                        d_params["rt"], TIC = self.get_ScanHeaderInfoForScanNum(
                            scan_number
                        )

                        d_params["scan_number"] = scan_number

                        list_RetentionTimeSeconds.append(d_params.get("rt"))

                        list_Tics.append(TIC)

                        list_scans.append(scan_number)

                        data_dict = self.get_data(scan_number, d_params)

                        #results.append((data, d_params))
                        
                        mass_spec = MassSpecCentroid(data_dict, d_params)
                        
                        self.LCMS.add_mass_spectrum(mass_spec)

            #pool = multiprocessing.Pool(5)
            #result = pool.starmap(MassSpecCentroid, results)
            #for ms in result:
            #self.LCMS.add_mass_spectrum(ms)
            
            self.LCMS.retention_time = list_RetentionTimeSeconds
            self.LCMS.set_tic_list(list_Tics)
            self.LCMS.set_scans_number_list(list_scans)

    def get_lcms(self):
        """get_lc_ms_class method should only be used when using this class as a Thread, 
        otherwise use the run() method to return the LCMS class"""

        if self.LCMS.get(self._initial_scan_number):
            return self.LCMS
        else:
            raise Exception("returning a empty LCMS class")
Exemple #17
0
class ReadHDF_BoosterMassSpectra(Thread):
    
    '''class docs'''
    
    def __init__(self, file_location, analyzer='ICR', instrument_label='21T', auto_process=True):

        Thread.__init__(self)

        self.lcms = LCMSBase(file_location, analyzer=analyzer,instrument_label=instrument_label)

        self.hdf_obj =  h5py.File(file_location, 'r')

        self.list_scans =  sorted([int(i) for i in list(self.hdf_obj.keys())])

        self.initial_scan_number = self.list_scans [0]

        self.final_scan_number = self.list_scans [-1]

        self.file_location = file_location

        self.auto_process = True

        self.analyzer = analyzer

        self.instrument_label = instrument_label
    
    def get_polarity(self, file_location, scan):

        self.h5pydata = h5py.File(file_location, 'r')

        self.scans = list(self.h5pydata.keys())
        
        polarity = self.get_attr_data(scan,'r_h_polarity')
        
        if polarity == 'negative scan': return -1
        
        else: return +1    
    
    def get_attr_data(self, scan, attr_srt):

        return self.hdf_obj[str(scan)].attrs[attr_srt]

    def import_mass_spectra(self, d_params):
        
        list_rt, list_tic = list(), list()
        
        for scan_number in self.list_scans:
            
            d_params["rt"] =  list_rt.append(self.get_attr_data(scan_number, 'r_h_start_time'))

            d_params["scan_number"] = scan_number

            d_params['label'] = Labels.booster_profile
    
            d_params["polarity"] = self.get_polarity(self.file_location, scan_number)

            d_params["Aterm"] = self.get_attr_data(scan_number, 'r_cparams')[0]

            d_params["Bterm"] = self.get_attr_data(scan_number, 'r_cparams')[1]

            d_params['analyzer'] = self.analyzer
        
            d_params['instrument_label'] = self.instrument_label

            list_rt.append(d_params["rt"])

            list_tic.append(self.get_attr_data(scan_number, 'r_h_tic'))
            
            mass_spec = self.get_mass_spectrum(scan_number, d_params)

            self.lcms.add_mass_spectrum(mass_spec)

        self.lcms.retention_time = list_rt
        self.lcms.tic = list_tic
        self.lcms.scans_number = self.list_scans
        
    def get_mass_spectrum(self, scan, d_params):
        
        booster_data = self.hdf_obj[str(scan)]
        
        if booster_data.shape[0] is not 2:
            
            raise NotImplementedError('opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum')
       
        else:
            
            data_dict = {
                Labels.mz: booster_data[0],
                Labels.abundance: booster_data[1],
                Labels.rp: None,
                Labels.s2n: None,
            }
            
           
            mass_spec = MassSpecProfile(data_dict, d_params, auto_process=self.auto_process)

        return mass_spec

    def run(self):
        '''creates the lcms obj'''

        d_parameters = default_parameters(self.file_location)
        self.import_mass_spectra(d_parameters)
            
    def get_lcms_obj(self):
        
        if self.lcms.get(self.initial_scan_number):
            return self.lcms
        else:
            raise Exception("returning a empty lcms class")