Python MsUtils 예제들, ichorlib.msClasses.MsUtils Python 예제들

예제 #1

0

파일 보기

 def _calculateOmega(self, td, mz, charge, gas='Nitrogen'):
     """Equation for calculating the collision cross section (omega)."""
     td = np.array(td)
     tdPrime = utils._calculateTdPrime(td, self.waveVelocity)
     tdDoublePrime = utils._calculateTdDoublePrime(tdPrime, mz)
     ccsPrime = self.coefficientA * tdDoublePrime**self.coefficientB
     reducedMass = utils._calculateReducedMass(mz, charge, gas)
     ccs = ccsPrime * charge * np.sqrt(1. / reducedMass)
     return ccs

예제 #2

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def simulateSpecies(self, xvals, peakShape='gaussian'):
        """Simulates a mass spectrum using the object's attributes
        Valid peak shapes are: 'hybrid', 'gaussian' & 'lorentzian
        if one_fwhh is to be used be sure to set self.peakFwhm before
        calling this function.
        """
        combined = np.zeros(len(xvals), dtype='float')
        for peak in self.charges_to_fit:
            z = peak.charge
            centre = msutils.calc_mz(self.csd_mass, z)
            amplitude = self.calc_amplitude(centre)
            combined += msutils.gaussian(xvals, amplitude, centre, self.p_fwhh)
            #combined += utils.draw_peaks[peakShape](xvals, amplitude, centre, self.peakFwhm)

        return combined

예제 #3

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def estimateCharges(self, limit=1):
        """Estimate charges to be simulated by fitting the charge state
        Gaussian distribution.
        Used as a subfunction for self.setSpecies()
        Limit is given as a percentage of the total height of the Gaussian
        and is used as the cutoff point for whether a charge state is to
        be included or discarded.
        """
        # TODO(gns) - perhaps lower the limit for atropos at least
        # TODO (KT) - maybe change this altogether
        # probably the default value as well.

        self.charges_to_fit = []

        zs = np.arange(1, 151)
        charges = []
        for z in zs:
            xval = msutils.calc_mz(self.csd_mass, z)
            height = self.calc_amplitude(xval)

            if height > self.g_amp * (float(limit) / 100):
                charges.append(z)
                temp_peak = MsPeak()
                temp_peak.x = xval
                temp_peak.y = height
                temp_peak.charge = z
                self.charges_to_fit.append(temp_peak)
                #print('Charges to simulate: z: {0:8.2f} xval: {1:5.2f}  height: {2:8.2f} limit: {3:8.2f}'.format(z, xval,height,self.g_amp * (float(limit) / 100)))

        return charges

예제 #4

0

파일 보기

파일: Matcher.py 프로젝트: wizofe/triton

    def match_peptide_fragments_pandas(self, df_search_space, mass_specrtum,
                                       ppm_error):
        """

        Args:
            df_search_space:
            mass_specrtum:
            ppm_error:

        Returns:

        """
        column_headers_peptide_search_space = [
            'Sequence', 'Ion', 'Charge', 'Mass_Theor', 'Modifications',
            'Mass_Obs', 'Intensity'
        ]

        search_results_array = []

        for i in range(len(mass_specrtum.topN_xvals)):

            experimental_mass = mass_specrtum.topN_xvals[i]
            experimental_intensity = mass_specrtum.topN_yvals[i]

            #print "Searching mass {0:10.3f}".format(experimental_mass)

            #TODO make a new function to return lower and upper masses based on ppm error
            #TODO just use 0.1 for testing the funnction at the moment

            ppm_to_daltons = msu.calc_daltons_from_ppm(experimental_mass,
                                                       ppm_error)

            temp_low_mass = experimental_mass - ppm_to_daltons
            temp_high_mass = experimental_mass + ppm_to_daltons

            # TODO the following code works but I am afraid it will override values as new masses are searched
            # TODO best to create a new df with the search results
            #temp_match = df_search_space.loc[
            #    (df_search_space['Mass_exp'] > temp_low_mass) & (df_search_space['Mass_exp'] < temp_high_mass), ['ppm', 'Mass_Obs', 'Intensity']] = \
            #    [ppm_calculated, experimental_mass, experimental_intensity]

            temp_match = df_search_space.loc[
                (df_search_space['Mass_Theor'] > temp_low_mass)
                & (df_search_space['Mass_Theor'] < temp_high_mass)]

            if temp_match.empty:
                pass
            else:
                to_add_to_results = np.append(
                    temp_match.as_matrix()[0],
                    [experimental_mass, experimental_intensity])
                search_results_array.append(to_add_to_results)

        df_search_results = pd.DataFrame(
            search_results_array, columns=column_headers_peptide_search_space)
        df_search_results['ppm'] = (
            df_search_results['Mass_Theor'] - df_search_results['Mass_Obs']
        ) / df_search_results['Mass_Theor'] * 1000000

        return df_search_results

예제 #5

0

파일 보기

    def calc_mass_and_charge(self):

        self.mspeaks.sort(key=operator.itemgetter(0), reverse=True)

        charges = collections.OrderedDict()
        masses = []
        zs = range(1, 101)
        for z in zs:
            charges[z] = []
            for i in range(len(self.mspeaks)):
                mz = self.mspeaks[i].x
                mass = msutils.calc_mass(mz, z + i)
                charges[z].append(mass)
                #print "Charge {0} - mz {1} - mass {2}".format(z+i, mz, mass)
            calcmass = np.average(charges[z])
            stdev = np.std(charges[z])
            #print "z {0} - Mass {1} - StDev {2}\n".format(z, calcmass, stdev)
            masses.append([stdev, z, calcmass, charges[z]])

        masses.sort(key=operator.itemgetter(0), reverse=False)

        # Go though the best solution and set mass to this object and charges to
        # the msPeak objects
        tz = masses[0][1]
        self.mass = masses[0][2]
        self.massStd = masses[0][0]
        i = 0
        for tmass in masses[0][3]:
            self.mspeaks[i].charge = tz
            #diff = self.mass - tmass
            #print "z: {0} m/z: {1} mass: {2} massDiff {3} mspeakmx {4}".format(tz, tmass, self.mass, diff, self.mspeaks[i].x)
            tz += 1
            i += 1

예제 #6

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def update_mass_after_optimisation(self):
        """
        After optimisation there is a new mass calculated so the
        charges_to_fit array needs to be updated.
        :return:
        """

        for peak in self.charges_to_fit:
            peak.x = msutils.calc_mz(self.csd_mass, peak.charge)
            peak.y = self.calc_amplitude(peak.x)

예제 #7

0

파일 보기

    def print_error_per_peak(self):
        """ Print what the deviation from the average mass is for each peak
        so that it helps with identifying outlier peaks

        :return:
        """
        if self.mass != 0:
            for peak in self.mspeaks:
                calcmass = msutils.calc_mass(peak.x, peak.charge)
                diff = self.mass - calcmass
                print(
                    "z {1: <3} m/z {0: 9.3f} mass {2:8.2f} massDiff {3:8.2f} ".
                    format(peak.x, peak.charge, calcmass, diff))

예제 #8

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def plot_residuals_per_peak(self, ax, mspeaks_object, **kwargs):
        """
        Calulcates the residuals per each experimental peak and
        plots them. Use this to assess whether a peak does not belong to
        a particular charge state series.
        Useful when assigning peaks to a csd at the beggining
        :return:
        """

        for peak in mspeaks_object:
            residual = self.csd_mass - msutils.calc_mass(peak.x, peak.charge)
            #print self.csd_mass, residual
            ln = ax.plot(peak.x, residual, **kwargs)

        return ln

예제 #9

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def optimiseParameters(self):
        """| Use non linear least squares to fit the parameters of the
        Gaussian.
        | setValues means the optimised parameters are set to this object,
        else the parameters are returned as a dictionary.

        this needs the mz and intensity values of each peak identified
        from the peak picking algorithm - otherwise wont work

        """
        xvals = []
        yvals = []
        for peak in self.mspeaks:
            xvals.append(peak.x)
            yvals.append(peak.y)

        fitfunc = lambda p, x: msutils.gaussian(x, p[0], p[1], p[2])
        errorfunc = lambda p, x, y: fitfunc(p, x) - y

        h, c, f = self.estimateParameters(xvals, yvals)
        p0 = [h, c, f]
        p1, success = optimize.leastsq(errorfunc, p0[:], args=(xvals, yvals))

        if not success:
            print('Gaussian charge state distribution estimation failed')
            d = {}
            d['amplitude'] = h
            d['centre'] = c
            d['fwhm'] = f
            self.g_amp = h
            self.g_mu = c
            self.g_fwhh = f

        else:
            d = {}
            d['amplitude'] = p1[0]
            d['centre'] = p1[1]
            d['fwhm'] = p1[2]
            self.g_amp = p1[0]
            self.g_mu = p1[1]
            self.g_fwhh = p1[2]

        #print(
        #'Optimised Gaussian  Mean: {0:8.2f} Amplitute: {1:5.2f}  FWHH: {2:8.2f}'.format(d['centre'], d['amplitude'],
        #                                                                               d['fwhm']))

        return d

예제 #10

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def calculateMassAndCharges(self, mspeak_objects):
        """Calculate the mass of a molecular species using the given m/z
        values.
        Primarily used as a subfunction of self.calculateMass().
        """
        # TODO change this so that after the optimisation the ms_peak objects
        # are updated to include the charge

        mspeak_objects.sort(key=lambda id: id.x, reverse=True)

        charges = collections.OrderedDict()
        lowest = 10000000
        lowest_z = 0
        zs = range(1, 101)
        for z in zs:
            charges[z] = []
            i = 0
            for peak in mspeak_objects:
                charges[z].append(msutils.calc_mass(peak.x, z + i))
                i += 1

        for z in list(charges.keys()):
            sd = np.std(charges[z])
            if sd < lowest:
                lowest = sd
                lowest_z = z

        # calculating error
        total_error = []
        for mass in charges[lowest_z]:
            total_error.append(abs(np.average(charges[lowest_z]) - mass))
        average_error = np.average(total_error)

        # assign charges to mspeaks
        counter = lowest_z
        for peak in mspeak_objects:
            peak.charge = counter
            counter += 1

        self.csd_mass = np.average(charges[lowest_z])
        self.csd_mass_error = average_error
        self.csd_charge_states = [
            lowest_z + i for i in range(len(mspeak_objects))
        ]

예제 #11

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def plot_csd_gaussian(self,
                          ax,
                          xaxis,
                          fwhm=10,
                          peakShape='gaussian',
                          **kwargs):
        """ Plot the overall Gaussian which encompases the CSD
        :param ax:
        :param xaxis:
        :return:
        """
        y_offset = 3

        text_to_plot = str(self.name) + " " + str(self.csd_mass)
        ln = ax.plot(
            xaxis, msutils.gaussian(xaxis, self.g_amp, self.g_mu, self.g_fwhh),
            **kwargs)
        ln = ax.text(self.g_mu, self.g_amp + y_offset, text_to_plot)
        return ln

예제 #12

0

파일 보기

    def calcMassAndChargeOld(self):

        for pe in self.mspeaks:
            print(pe.x)

        self.mspeaks.sort(key=operator.itemgetter(0), reverse=False)

        for pe2 in self.mspeaks:
            print(pe2.x)

        mzarray = []
        for peak in self.mspeaks:
            mzarray.append(peak.x)

        mzarray.sort(reverse=True)

        charges = collections.OrderedDict()
        masses = []
        zs = range(1, 101)
        for z in zs:
            charges[z] = []
            for i, mz in enumerate(mzarray):
                mass = msutils.calc_mass(mz, z + i)
                charges[z].append(mass)
                #print "Charge {0} - mz {1} - mass {2}".format(z+i, mz, mass)
            calcmass = np.average(charges[z])
            stdev = np.std(charges[z])
            #print "z {0} - Mass {1} - StDev {2}\n".format(z, calcmass, stdev)
            masses.append([stdev, z, calcmass, charges[z]])

        masses.sort(key=operator.itemgetter(0), reverse=False)

        tz = masses[0][1]
        avgmass = masses[0][2]
        for tmass in masses[0][3]:
            diff = avgmass - tmass
            print("z: {0} m/z: {1} mass: {2} massDiff {3}".format(
                tz, tmass, avgmass, diff))
            tz += 1

        return avgmass, masses[0]

예제 #13

0

파일 보기

파일: MsCSD.py 프로젝트: wizofe/triton

    def calculateMassAndChargesBACK(self):
        """Calculate the mass of a molecular species using the given m/z
        values.
        Primarily used as a subfunction of self.calculateMass().
        """
        # TODO change this so that after the optimisation the ms_peak objects
        # are updated to include the charge
        iarray = []
        for peak in self.mspeaks:
            iarray.append(peak.x)

        # iarray = mzs[:]
        iarray.sort()
        iarray.reverse()
        charges = collections.OrderedDict()
        lowest = 10000000
        lowest_z = 0
        zs = range(1, 101)
        for z in zs:
            charges[z] = []
            for i, mz in enumerate(iarray):
                charges[z].append(msutils.calc_mass(mz, z + i))

        for z in list(charges.keys()):
            sd = np.std(charges[z])
            if sd < lowest:
                lowest = sd
                lowest_z = z

        # calculating error
        total_error = []
        for mass in charges[lowest_z]:
            total_error.append(abs(np.average(charges[lowest_z]) - mass))
        average_error = np.average(total_error)

        self.csd_mass = np.average(charges[lowest_z])
        self.csd_mass_error = average_error
        self.csd_charge_states = [lowest_z + i for i in range(len(iarray))]

예제 #14

0

파일 보기

파일: FileManipulations.py 프로젝트: wizofe/triton

    def dir_to_pandas_frame(self,
                            dir_path,
                            experiment_name="Experiment1",
                            smooth_data=False):
        """
        Given a directory it looks at all the .txt files and combines them
        into a challenger input file
        The txt file should have an ending of _TrapV and this will describe the
        column of each data file
        :return:
        """

        self.smoothes = 5
        self.window_len = 5
        self.poly_order = 1

        if smooth_data == True:
            print((
                'Smoothing... Smoothes {0} Window length {1} Polynomial order {2}'
            ).format(self.smoothes, self.window_len, self.poly_order))

        intensity_array = []
        atd_array = []
        count = 1

        data = {}

        for temp_file in os.listdir(dir_path):

            if temp_file.endswith(".txt"):

                filepath = os.path.join(dir_path, temp_file)
                print(filepath)

                temp_voltage = filepath.split('_')
                temp_voltage = temp_voltage[-1].strip('.txt')

                result = []
                f = open(filepath, "r")
                lines = f.readlines()

                for x in lines:
                    stripped = x.strip('\r\n')
                    tokens = stripped.split('\t')

                    print(tokens[1])
                    e_notations = tokens[1].replace("E", "e")
                    print(e_notations)
                    result.append(float(e_notations))

                    if count == 1:  #only get the atd values once
                        atd_array.append(float(tokens[0]))

                f.close()

                temp_voltage = '\"' + temp_voltage + '\"'  # this is so that Javascript Challenger works

                result = np.asarray(result)

                if smooth_data == True:

                    for i in range(self.smoothes):
                        result = msutils.sg(result,
                                            window_size=self.window_len,
                                            order=self.poly_order)

                result = result / result.max()

                data[temp_voltage] = result
                intensity_array.append(result)

                if count == 1:
                    exp_name_string = '\"' + experiment_name + '\"'  #this is so that Javascript Challenger works
                    data[exp_name_string] = atd_array
                    count += 1  # increase counter so as to stop

        frame = pd.DataFrame(data)

        return frame

예제 #15

0

파일 보기

파일: Matcher.py 프로젝트: wizofe/triton

    def create_search_space(self, fragments, charges, modifications):
        """

        Args:
            fragments: An array of Fragment objects
            charges: The charges to consider
            modifications: The modifications to consider
            modifications is a dictionary with first element the
            mod name and second the residue(s) or positions affected

        Returns:
            A pandas dataframe with the fragment match information

        """
        #TODO add extra columns in dataframe so that during the match you can populate them
        data_to_save = [
        ]  # print "Searching mass {0:10.3f}".format(experimental_mass)

        for fragment in fragments:

            frag_string_modified = ''
            oligo_position = ''
            frag_mass = self.mc.calc_oligo_fragment(fragment.sequence,
                                                    fragment.ion,
                                                    fragment.five_prime_end,
                                                    fragment.three_prime_end)

            if fragment.ion[0] in ['a', 'b', 'c', 'd', 'M']:
                oligo_position = fragment.position_to
            if fragment.ion[0] in ['w', 'x', 'y', 'z']:
                oligo_position = fragment.position_from - 1

            for z in charges:

                fragm_mz = abs(msu.calc_mz(
                    frag_mass, z))  # need to abs for negatively charged ions
                data_to_save.append([
                    fragment.sequence, fragment.ion, z, fragm_mz,
                    frag_string_modified, oligo_position
                ])

            for modification in modifications:

                to_modify_with = self.modify_oligo_fragments(
                    fragment, modification)

                if to_modify_with['modification_mass'] > 0:

                    frag_mass_modified = frag_mass + to_modify_with[
                        'modification_mass']
                    frag_string_modified = to_modify_with['modification']

                    for z in charges:

                        fragm_mz = abs(msu.calc_mz(frag_mass_modified, z))

                        # print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f} {7}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified)
                        data_to_save.append([
                            fragment.sequence, fragment.ion, z, fragm_mz,
                            frag_string_modified, oligo_position
                        ])

        df = pd.DataFrame(data_to_save, columns=self.column_headers)
        #df.to_csv('matched_fragments.csv')

        return df

예제 #16

0

파일 보기

파일: Matcher.py 프로젝트: wizofe/triton

    def match_oligo_fragments(self, fragments, mass_specrtum, ppm_error,
                              charges, modifications):
        """

        Args:
            fragments: An array of Fragment objects
            mass_specrtum: A MassSpectrum object
            ppm_error: The ppm error used for the match
            charges: The charges to consider
            modifications: The modifications to consider
            modifications is a dictionary with first element the
            mod name and second the residue(s) or positions affected

        Returns:
            A pandas dataframe with the fragment match information

        """

        data_to_save = []

        for i in range(len(mass_specrtum.topN_xvals)):

            experimental_mass = mass_specrtum.topN_xvals[i]
            experimental_intensity = mass_specrtum.topN_yvals[i]

            #print "Searching mass {0:10.3f}".format(experimental_mass)

            for fragment in fragments:

                frag_string_modified = ''
                oligo_position = ''
                frag_mass = self.mc.calc_oligo_fragment(
                    fragment.sequence, fragment.ion, fragment.five_prime_end,
                    fragment.three_prime_end)

                if fragment.ion[0] in ['a', 'b', 'c', 'd', 'M']:
                    oligo_position = fragment.position_to
                if fragment.ion[0] in ['w', 'x', 'y', 'z']:
                    oligo_position = fragment.position_from - 1

                for z in charges:

                    fragm_mz = abs(msu.calc_mz(
                        frag_mass,
                        z))  # need to abs for negatively charged ions
                    ppm_calculated = msu.calcppmerror(experimental_mass,
                                                      fragm_mz)

                    if abs(ppm_calculated) < ppm_error:

                        #print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated)

                        data_to_save.append([
                            fragment.sequence, fragment.ion, z,
                            experimental_mass, experimental_intensity,
                            fragm_mz, ppm_calculated, frag_string_modified,
                            oligo_position
                        ])

                for modification in modifications:

                    to_modify_with = self.modify_oligo_fragments(
                        fragment, modification)

                    if to_modify_with['modification_mass'] > 0:

                        frag_mass_modified = frag_mass + to_modify_with[
                            'modification_mass']
                        frag_string_modified = to_modify_with['modification']

                        for z in charges:

                            fragm_mz = abs(msu.calc_mz(frag_mass_modified, z))
                            ppm_calculated = msu.calcppmerror(
                                experimental_mass, fragm_mz)

                            if abs(ppm_calculated) < ppm_error:

                                #print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f} {7}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified)
                                data_to_save.append([
                                    fragment.sequence, fragment.ion, z,
                                    experimental_mass, experimental_intensity,
                                    fragm_mz, ppm_calculated,
                                    frag_string_modified, oligo_position
                                ])

        df = pd.DataFrame(data_to_save, columns=self.column_headers)
        #df.to_csv('matched_fragments.csv')

        return df