Example #1
0
    def mass_spec_factory(self, rt, datadict):

        #tic = sum(datadict.get('abundance'))

        scan_index = datadict['scan_number'][0]

        mz_list, abundance_list = zip(*sorted(zip(datadict['mz'], datadict['abundance'])))

        data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list}

        d_params = default_parameters(self._ms[scan_index]._filename)
                
        d_params["rt"] = rt

        d_params["scan_number"] = scan_index

        d_params['label'] = Labels.gcms_centroid

        d_params["polarity"] = self._ms[scan_index].polarity

        d_params['analyzer'] = self._ms[scan_index].analyzer

        d_params['instrument_label'] = self._ms[scan_index].instrument_label
        
        d_params["filename_path"] = self._ms[scan_index].instrument_label

        ms = MassSpecCentroidLowRes(data_dict, d_params )

        return ms
Example #2
0
    def get_output_parameters(self, polarity, scan_index=0):

        d_params = default_parameters(self.file_location)
        d_params["filename_path"] = self.file_location
        d_params["scan_number"] = int(self.scans[scan_index])
        d_params['polarity'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'polarity')
        d_params['rt'] = self.get_raw_data_attr_data(scan_index,
                                                     'MassSpecAttrs', 'rt')

        d_params['tic'] = self.get_raw_data_attr_data(scan_index,
                                                      'MassSpecAttrs', 'tic')

        d_params['mobility_scan'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'mobility_scan')
        d_params['mobility_rt'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'mobility_rt')
        d_params['Aterm'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'Aterm')
        d_params['Bterm'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'Bterm')
        d_params['Cterm'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'Cterm')
        d_params['baselise_noise'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'baselise_noise')
        d_params['baselise_noise_std'] = self.get_raw_data_attr_data(
            scan_index, 'MassSpecAttrs', 'baselise_noise_std')

        d_params['analyzer'] = self.get_high_level_attr_data('analyzer')
        d_params['instrument_label'] = self.get_high_level_attr_data(
            'instrument_label')
        d_params['sample_name'] = self.get_high_level_attr_data('sample_name')

        return d_params
Example #3
0
    def get_output_parameters(self):

        d_params = default_parameters(self.file_location)

        d_params["polarity"] = self.polarity

        d_params["filename_path"] = self.file_location

        d_params["mobility_scan"] = 0

        d_params["mobility_rt"] = 0

        d_params["scan_number"] = 0

        d_params["rt"] = self.get_attr_data(0, 'r_h_start_time')

        d_params['label'] = Labels.booster_profile

        d_params["Aterm"] = self.get_attr_data(0, 'r_cparams')[0]

        d_params["Bterm"] = self.get_attr_data(0, 'r_cparams')[1]

        return d_params
Example #4
0
def get_output_parameters(polarity, file_location):
        
        d_params = default_parameters(file_location)
        
        d_params['analyzer'] = 'Generic Simulated'

        d_params['instrument_label'] = 'Generic Simulated'

        d_params["polarity"] = polarity
        
        d_params["filename_path"] = file_location
        
        d_params["mobility_scan"] = 0
        
        d_params["mobility_rt"] = 0
        
        d_params["scan_number"] = 0
        
        d_params["rt"] = 0

        d_params[Labels.label] = Labels.simulated_profile
        
        return d_params
Example #5
0
    def set_metadata(self,
                     firstScanNumber=0,
                     lastScanNumber=0,
                     scans_list=False,
                     label=Labels.thermo_profile):
        '''
        Collect metadata to be ingested in the mass spectrum object

        scans_list: list[int] or false
        lastScanNumber: int
        firstScanNumber: int
        '''

        d_params = default_parameters(self.file_path)

        # assumes scans is full scan or reduced profile scan

        d_params['label'] = label

        if scans_list:
            d_params['scan_number'] = scans_list

            d_params['polarity'] = self.get_polarity_mode(scans_list[0])

        else:

            d_params['scan_number'] = '{}-{}'.format(firstScanNumber,
                                                     lastScanNumber)

            d_params['polarity'] = self.get_polarity_mode(firstScanNumber)

        d_params['analyzer'] = self.iRawDataPlus.GetInstrumentData().Model

        d_params['instrument_label'] = self.iRawDataPlus.GetInstrumentData(
        ).Name

        return d_params
Example #6
0
    def get_output_parameters(self, polarity, scan_index=0):

        # TODO pull attrs from json settings file in load_settings function MassSpecAttrs group and analyzer, instrument_label and sample_name
        from copy import deepcopy

        output_parameters = default_parameters(self.file_location)

        if self.isCentroid:
            output_parameters['label'] = Labels.corems_centroid
        else:
            output_parameters['label'] = Labels.bruker_profile

        output_parameters['analyzer'] = self.analyzer

        output_parameters['instrument_label'] = self.instrument_label

        output_parameters['sample_name'] = self.sample_name

        output_parameters["Aterm"] = None

        output_parameters["Bterm"] = None

        output_parameters["Cterm"] = None

        output_parameters["polarity"] = polarity

        '''scan_number and rt will be need to lc ms'''

        output_parameters["mobility_scan"] = 0

        output_parameters["mobility_rt"] = 0

        output_parameters["scan_number"] = scan_index

        output_parameters["rt"] = 0

        return output_parameters
Example #7
0
    def run(self):
        '''populate the gcms obj'''

        d_parameters = default_parameters(self.file_location)

        self.import_mass_spectra(d_parameters)
Example #8
0
    def get_mass_spectra(self, auto_process=True):

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters, auto_process=auto_process)
        return self.lcms
Example #9
0
    def run(self):
        '''thread will automatically process mass spectrum
        use the get_mass_spectra class to import without processing mass spectrum'''

        d_parameters = default_parameters(self.file_location)
        self._import_mass_spectra(d_parameters)
Example #10
0
    def get_summed_mass_spectrum(self,
                                 initial_scan_number,
                                 final_scan_number=None,
                                 auto_process=True,
                                 pd_method=True,
                                 pd_merge_n=100):

        d_params = default_parameters(self.file_location)

        # assumes scans is full scan or reduced profile scan

        d_params["label"] = Labels.thermo_profile

        if type(initial_scan_number) is list:
            d_params["polarity"] = self.get_polarity_mode(
                initial_scan_number[0])

            scanrange = initial_scan_number
        else:
            d_params["polarity"] = self.get_polarity_mode(initial_scan_number)

            if final_scan_number == None:
                final_scan_number = self._final_scan_number

            scanrange = range(initial_scan_number, final_scan_number + 1)

        if pd_method:

            def sort_sum_df(df):
                """
                Nested function to sort dataframe and sum rows with exact matching indexes (m/z)
                """
                df = df.sort_index()
                df = df.groupby(level=0).sum()
                return df

            # initialise empty Pandas series
            big_df = pd.Series(index=[], dtype='float64')

            for scan_number in tqdm(scanrange):
                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)
                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                tmp_df = pd.Series(index=list(segmentedScan.Positions),
                                   dtype='float64',
                                   data=list(segmentedScan.Intensities))
                big_df = big_df.append(tmp_df)

                #this allows you to merge/sum the values earlier, however it slows down a lot
                #limited benefit unless running into memory issues
                #for complex data it is necessary to stop the iterations getting too slow
                if scan_number % pd_merge_n == 0:
                    big_df = sort_sum_df(big_df)

            big_df = sort_sum_df(big_df)
            data_dict = {
                Labels.mz: list(big_df.index.values),
                Labels.abundance: list(big_df.values),
            }

        else:
            all_mz = dict()

            for scan_number in tqdm(scanrange):

                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)

                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                len_data = segmentedScan.Positions.Length

                for i in range(len_data):

                    mz = segmentedScan.Positions[i]
                    abundance = segmentedScan.Intensities[i]

                    if mz in all_mz:
                        all_mz[mz] = all_mz[mz] + abundance
                    else:
                        all_mz[mz] = abundance

            mz_all = []
            abun_all = []

            for mz in sorted(all_mz):
                mz_all.append(mz)
                abun_all.append(all_mz[mz])

            data_dict = {
                Labels.mz: mz_all,
                Labels.abundance: abun_all,
            }

        print('Summed. Now Processing.')

        mass_spec = MassSpecProfile(data_dict,
                                    d_params,
                                    auto_process=auto_process)

        return mass_spec
Example #11
0
 def run(self):
     '''creates the lcms obj'''
     d_parameters = default_parameters(self.file_location)
     self._import_mass_spectra(d_parameters)
Example #12
0
    def deconvolution(self, peaks_entity_data, maximum_tic):

        i = 0
        tic_list = []
        rt_list = []

        for apex_rt, datadict in sorted(peaks_entity_data.items()):

            if apex_rt in datadict.keys():

                apex_data = datadict[apex_rt]

                ref_apex_rt = datadict["ref_apex_rt"]

                tic = sum(apex_data.get('abundance'))

                norm_smooth_tic = (tic / maximum_tic) * 100

                if norm_smooth_tic > self.chromatogram_settings.peak_height_min_percent and len(
                        apex_data['mz']) > 3:

                    scan_index = apex_data['scan_number'][0]

                    mz_list, abundance_list = zip(
                        *sorted(zip(apex_data['mz'], apex_data['abundance'])))

                    data_dict = {
                        Labels.mz: mz_list,
                        Labels.abundance: abundance_list
                    }

                    d_params = default_parameters(
                        self._ms[scan_index]._filename)

                    d_params["rt"] = apex_rt

                    d_params["scan_number"] = scan_index

                    d_params['label'] = Labels.gcms_centroid

                    d_params["polarity"] = self._ms[scan_index].polarity

                    d_params['analyzer'] = self._ms[scan_index].analyzer

                    d_params['instrument_label'] = self._ms[
                        scan_index].instrument_label

                    d_params["filename_path"] = self._ms[
                        scan_index].instrument_label

                    ms = MassSpecCentroidLowRes(data_dict, d_params)

                    #needs to define peak start and end, passing just minus and plus one from apex pos for now
                    gc_peak = GCPeak(ms, (i - 1, i, i + 1))
                    i += 1
                    self.gcpeaks.append(gc_peak)

                    tic_list.append(tic)
                    rt_list.append(ref_apex_rt)

                    peak_rt = []
                    peak_tic = []

                    for rt, each_datadict in datadict.items():

                        if rt != "ref_apex_rt":
                            peak_rt.append(rt)
                            peak_tic.append(sum(each_datadict["abundance"]))

                    peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))

                    #ax = plt.gca()

                    #markerline_a, stemlines_a, baseline_a  = ax.stem(data[0], data[1], linefmt='-',  markerfmt=" ", use_line_collection =True, label=rt)

                    #plt.setp(markerline_a, 'color', c, 'linewidth', 2)
                    #plt.setp(stemlines_a, 'color', c, 'linewidth', 2)
                    #plt.setp(baseline_a, 'color', c, 'linewidth', 2)

                    #ax.set_xlabel("$\t{m/z}$", fontsize=12)
                    #ax.set_ylabel('Abundance', fontsize=12)
                    #ax.tick_params(axis='both', which='major', labelsize=12)

                    #ax.axes.spines['top'].set_visible(False)
                    #ax.axes.spines['right'].set_visible(False)

                    #ax.get_yaxis().set_visible(False)
                    #ax.spines['left'].set_visible(False)
                    plt.plot(peak_rt, peak_tic)
                    #plt.legend()
                    #plt.show()
                    #plt.close()
        #self.rt_clustering(rt_list, tic_list)
        plt.plot(self.retention_time, self._processed_tic, c='black')
        plt.plot(rt_list, tic_list, c='black', marker='^', linewidth=0)
        plt.show()
Example #13
0
    def get_summed_mass_spectrum(self,
                                 auto_process=True,
                                 pd_method=True,
                                 pd_merge_n=100) -> MassSpecProfile:
        '''
        Manually sum mass spectrum over a scan range
        start_scan: int
        end_scan: int
        auto_process: bool
            If true performs peak picking, and noise threshold calculation after creation of mass spectrum object 
        pd_method: bool
            If true uses pandas to align and sum data
            Else: Assumes data is aligned and sum each data point across all mass spectra
        Returns:
            MassSpecProfile
        '''

        d_params = default_parameters(self.file_path)

        # assumes scans is full scan or reduced profile scan

        d_params['label'] = Labels.thermo_profile

        if type(self.start_scan) is list:
            d_params['polarity'] = self.get_polarity_mode(self.start_scan[0])

            scanrange = self.start_scan
        else:
            d_params['polarity'] = self.get_polarity_mode(self.start_scan)

            scanrange = range(self.start_scan, self.end_scan + 1)

        if pd_method:

            def sort_sum_df(df):
                '''
                Nested function to sort dataframe and sum rows with exact matching indexes (m/z)
                '''
                df = df.sort_index()
                df = df.groupby(level=0).sum()
                return df

            # initialise empty Pandas series
            big_df = pd.Series(index=[], dtype='float64')

            for scan_number in tqdm(scanrange):
                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)
                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                tmp_df = pd.Series(index=list(segmentedScan.Positions),
                                   dtype='float64',
                                   data=list(segmentedScan.Intensities))
                big_df = big_df.append(tmp_df)

                # this allows you to merge/sum the values earlier, however it slows down a lot
                # limited benefit unless running into memory issues
                # for complex data it is necessary to stop the iterations getting too slow
                if scan_number % pd_merge_n == 0:
                    big_df = sort_sum_df(big_df)

            big_df = sort_sum_df(big_df)
            data_dict = {
                Labels.mz: list(big_df.index.values),
                Labels.abundance: list(big_df.values),
            }
        else:
            all_mz = dict()

            for scan_number in tqdm(scanrange):

                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
                    scan_number)

                segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
                    scan_number, scanStatistics)

                len_data = segmentedScan.Positions.Length

                for i in range(len_data):

                    mz = segmentedScan.Positions[i]
                    abundance = segmentedScan.Intensities[i]

                    if mz in all_mz:
                        all_mz[mz] = all_mz[mz] + abundance
                    else:
                        all_mz[mz] = abundance

            mz_all = []
            abun_all = []

            for mz in sorted(all_mz):

                mz_all.append(mz)
                abun_all.append(all_mz[mz])

            data_dict = {
                Labels.mz: mz_all,
                Labels.abundance: abun_all,
            }

        print('Summed. Now Processing.')

        mass_spec = MassSpecProfile(data_dict,
                                    d_params,
                                    auto_process=auto_process)

        return mass_spec
Example #14
0
    def get_transient(self, scan_number=1):

        file_d_params = self.parse_parameters(self.parameter_filename_location)

        self.fix_freq_limits(file_d_params)

        from sys import platform

        if platform == "win32":
            # Windows...
            dt = dtype("l")
        else:
            dt = dtype("i")

        # get rt, scan, and tic from scan.xml file, otherwise  using 0 defaults values

        output_parameters = deepcopy(
            default_parameters(self.d_directory_location))

        if self.transient_data_path.name == 'ser':

            if self.scan_attr.exists:

                dict_scan_rt_tic = self.get_scan_attr()

                output_parameters["scan_number"] = scan_number

                output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0]

                output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1]

        output_parameters["analyzer"] = "ICR"

        output_parameters["label"] = "Bruker_Frequency"

        output_parameters["Aterm"] = float(file_d_params.get("ML1"))

        output_parameters["Bterm"] = float(file_d_params.get("ML2"))

        output_parameters["Cterm"] = float(file_d_params.get("ML3"))

        output_parameters["exc_high_freq"] = float(
            file_d_params.get("EXC_Freq_High"))

        output_parameters["exc_low_freq"] = float(
            file_d_params.get("EXC_Freq_Low"))

        output_parameters["bandwidth"] = float(file_d_params.get("SW_h"))

        output_parameters["number_data_points"] = int(file_d_params.get("TD"))

        output_parameters["polarity"] = str(file_d_params.get("Polarity"))

        data_points = int(file_d_params.get("TD"))

        scan = output_parameters["scan_number"]

        if self.transient_data_path.name == 'ser':
            with open(self.transient_data_path, 'rb') as databin:

                #seek start scan data
                databin.seek((scan - 1) * 4 * data_points)
                #read scan data and parse to 32int struct
                data = frombuffer(databin.read(4 * data_points), dtype=dt)
        else:

            data = fromfile(self.transient_data_path.open(), dtype=dt)

        return Transient(data, output_parameters)