def _calculateOmega(self, td, mz, charge, gas='Nitrogen'): """Equation for calculating the collision cross section (omega).""" td = np.array(td) tdPrime = utils._calculateTdPrime(td, self.waveVelocity) tdDoublePrime = utils._calculateTdDoublePrime(tdPrime, mz) ccsPrime = self.coefficientA * tdDoublePrime**self.coefficientB reducedMass = utils._calculateReducedMass(mz, charge, gas) ccs = ccsPrime * charge * np.sqrt(1. / reducedMass) return ccs
def simulateSpecies(self, xvals, peakShape='gaussian'): """Simulates a mass spectrum using the object's attributes Valid peak shapes are: 'hybrid', 'gaussian' & 'lorentzian if one_fwhh is to be used be sure to set self.peakFwhm before calling this function. """ combined = np.zeros(len(xvals), dtype='float') for peak in self.charges_to_fit: z = peak.charge centre = msutils.calc_mz(self.csd_mass, z) amplitude = self.calc_amplitude(centre) combined += msutils.gaussian(xvals, amplitude, centre, self.p_fwhh) #combined += utils.draw_peaks[peakShape](xvals, amplitude, centre, self.peakFwhm) return combined
def estimateCharges(self, limit=1): """Estimate charges to be simulated by fitting the charge state Gaussian distribution. Used as a subfunction for self.setSpecies() Limit is given as a percentage of the total height of the Gaussian and is used as the cutoff point for whether a charge state is to be included or discarded. """ # TODO(gns) - perhaps lower the limit for atropos at least # TODO (KT) - maybe change this altogether # probably the default value as well. self.charges_to_fit = [] zs = np.arange(1, 151) charges = [] for z in zs: xval = msutils.calc_mz(self.csd_mass, z) height = self.calc_amplitude(xval) if height > self.g_amp * (float(limit) / 100): charges.append(z) temp_peak = MsPeak() temp_peak.x = xval temp_peak.y = height temp_peak.charge = z self.charges_to_fit.append(temp_peak) #print('Charges to simulate: z: {0:8.2f} xval: {1:5.2f} height: {2:8.2f} limit: {3:8.2f}'.format(z, xval,height,self.g_amp * (float(limit) / 100))) return charges
def match_peptide_fragments_pandas(self, df_search_space, mass_specrtum, ppm_error): """ Args: df_search_space: mass_specrtum: ppm_error: Returns: """ column_headers_peptide_search_space = [ 'Sequence', 'Ion', 'Charge', 'Mass_Theor', 'Modifications', 'Mass_Obs', 'Intensity' ] search_results_array = [] for i in range(len(mass_specrtum.topN_xvals)): experimental_mass = mass_specrtum.topN_xvals[i] experimental_intensity = mass_specrtum.topN_yvals[i] #print "Searching mass {0:10.3f}".format(experimental_mass) #TODO make a new function to return lower and upper masses based on ppm error #TODO just use 0.1 for testing the funnction at the moment ppm_to_daltons = msu.calc_daltons_from_ppm(experimental_mass, ppm_error) temp_low_mass = experimental_mass - ppm_to_daltons temp_high_mass = experimental_mass + ppm_to_daltons # TODO the following code works but I am afraid it will override values as new masses are searched # TODO best to create a new df with the search results #temp_match = df_search_space.loc[ # (df_search_space['Mass_exp'] > temp_low_mass) & (df_search_space['Mass_exp'] < temp_high_mass), ['ppm', 'Mass_Obs', 'Intensity']] = \ # [ppm_calculated, experimental_mass, experimental_intensity] temp_match = df_search_space.loc[ (df_search_space['Mass_Theor'] > temp_low_mass) & (df_search_space['Mass_Theor'] < temp_high_mass)] if temp_match.empty: pass else: to_add_to_results = np.append( temp_match.as_matrix()[0], [experimental_mass, experimental_intensity]) search_results_array.append(to_add_to_results) df_search_results = pd.DataFrame( search_results_array, columns=column_headers_peptide_search_space) df_search_results['ppm'] = ( df_search_results['Mass_Theor'] - df_search_results['Mass_Obs'] ) / df_search_results['Mass_Theor'] * 1000000 return df_search_results
def calc_mass_and_charge(self): self.mspeaks.sort(key=operator.itemgetter(0), reverse=True) charges = collections.OrderedDict() masses = [] zs = range(1, 101) for z in zs: charges[z] = [] for i in range(len(self.mspeaks)): mz = self.mspeaks[i].x mass = msutils.calc_mass(mz, z + i) charges[z].append(mass) #print "Charge {0} - mz {1} - mass {2}".format(z+i, mz, mass) calcmass = np.average(charges[z]) stdev = np.std(charges[z]) #print "z {0} - Mass {1} - StDev {2}\n".format(z, calcmass, stdev) masses.append([stdev, z, calcmass, charges[z]]) masses.sort(key=operator.itemgetter(0), reverse=False) # Go though the best solution and set mass to this object and charges to # the msPeak objects tz = masses[0][1] self.mass = masses[0][2] self.massStd = masses[0][0] i = 0 for tmass in masses[0][3]: self.mspeaks[i].charge = tz #diff = self.mass - tmass #print "z: {0} m/z: {1} mass: {2} massDiff {3} mspeakmx {4}".format(tz, tmass, self.mass, diff, self.mspeaks[i].x) tz += 1 i += 1
def update_mass_after_optimisation(self): """ After optimisation there is a new mass calculated so the charges_to_fit array needs to be updated. :return: """ for peak in self.charges_to_fit: peak.x = msutils.calc_mz(self.csd_mass, peak.charge) peak.y = self.calc_amplitude(peak.x)
def print_error_per_peak(self): """ Print what the deviation from the average mass is for each peak so that it helps with identifying outlier peaks :return: """ if self.mass != 0: for peak in self.mspeaks: calcmass = msutils.calc_mass(peak.x, peak.charge) diff = self.mass - calcmass print( "z {1: <3} m/z {0: 9.3f} mass {2:8.2f} massDiff {3:8.2f} ". format(peak.x, peak.charge, calcmass, diff))
def plot_residuals_per_peak(self, ax, mspeaks_object, **kwargs): """ Calulcates the residuals per each experimental peak and plots them. Use this to assess whether a peak does not belong to a particular charge state series. Useful when assigning peaks to a csd at the beggining :return: """ for peak in mspeaks_object: residual = self.csd_mass - msutils.calc_mass(peak.x, peak.charge) #print self.csd_mass, residual ln = ax.plot(peak.x, residual, **kwargs) return ln
def optimiseParameters(self): """| Use non linear least squares to fit the parameters of the Gaussian. | setValues means the optimised parameters are set to this object, else the parameters are returned as a dictionary. this needs the mz and intensity values of each peak identified from the peak picking algorithm - otherwise wont work """ xvals = [] yvals = [] for peak in self.mspeaks: xvals.append(peak.x) yvals.append(peak.y) fitfunc = lambda p, x: msutils.gaussian(x, p[0], p[1], p[2]) errorfunc = lambda p, x, y: fitfunc(p, x) - y h, c, f = self.estimateParameters(xvals, yvals) p0 = [h, c, f] p1, success = optimize.leastsq(errorfunc, p0[:], args=(xvals, yvals)) if not success: print('Gaussian charge state distribution estimation failed') d = {} d['amplitude'] = h d['centre'] = c d['fwhm'] = f self.g_amp = h self.g_mu = c self.g_fwhh = f else: d = {} d['amplitude'] = p1[0] d['centre'] = p1[1] d['fwhm'] = p1[2] self.g_amp = p1[0] self.g_mu = p1[1] self.g_fwhh = p1[2] #print( #'Optimised Gaussian Mean: {0:8.2f} Amplitute: {1:5.2f} FWHH: {2:8.2f}'.format(d['centre'], d['amplitude'], # d['fwhm'])) return d
def calculateMassAndCharges(self, mspeak_objects): """Calculate the mass of a molecular species using the given m/z values. Primarily used as a subfunction of self.calculateMass(). """ # TODO change this so that after the optimisation the ms_peak objects # are updated to include the charge mspeak_objects.sort(key=lambda id: id.x, reverse=True) charges = collections.OrderedDict() lowest = 10000000 lowest_z = 0 zs = range(1, 101) for z in zs: charges[z] = [] i = 0 for peak in mspeak_objects: charges[z].append(msutils.calc_mass(peak.x, z + i)) i += 1 for z in list(charges.keys()): sd = np.std(charges[z]) if sd < lowest: lowest = sd lowest_z = z # calculating error total_error = [] for mass in charges[lowest_z]: total_error.append(abs(np.average(charges[lowest_z]) - mass)) average_error = np.average(total_error) # assign charges to mspeaks counter = lowest_z for peak in mspeak_objects: peak.charge = counter counter += 1 self.csd_mass = np.average(charges[lowest_z]) self.csd_mass_error = average_error self.csd_charge_states = [ lowest_z + i for i in range(len(mspeak_objects)) ]
def plot_csd_gaussian(self, ax, xaxis, fwhm=10, peakShape='gaussian', **kwargs): """ Plot the overall Gaussian which encompases the CSD :param ax: :param xaxis: :return: """ y_offset = 3 text_to_plot = str(self.name) + " " + str(self.csd_mass) ln = ax.plot( xaxis, msutils.gaussian(xaxis, self.g_amp, self.g_mu, self.g_fwhh), **kwargs) ln = ax.text(self.g_mu, self.g_amp + y_offset, text_to_plot) return ln
def calcMassAndChargeOld(self): for pe in self.mspeaks: print(pe.x) self.mspeaks.sort(key=operator.itemgetter(0), reverse=False) for pe2 in self.mspeaks: print(pe2.x) mzarray = [] for peak in self.mspeaks: mzarray.append(peak.x) mzarray.sort(reverse=True) charges = collections.OrderedDict() masses = [] zs = range(1, 101) for z in zs: charges[z] = [] for i, mz in enumerate(mzarray): mass = msutils.calc_mass(mz, z + i) charges[z].append(mass) #print "Charge {0} - mz {1} - mass {2}".format(z+i, mz, mass) calcmass = np.average(charges[z]) stdev = np.std(charges[z]) #print "z {0} - Mass {1} - StDev {2}\n".format(z, calcmass, stdev) masses.append([stdev, z, calcmass, charges[z]]) masses.sort(key=operator.itemgetter(0), reverse=False) tz = masses[0][1] avgmass = masses[0][2] for tmass in masses[0][3]: diff = avgmass - tmass print("z: {0} m/z: {1} mass: {2} massDiff {3}".format( tz, tmass, avgmass, diff)) tz += 1 return avgmass, masses[0]
def calculateMassAndChargesBACK(self): """Calculate the mass of a molecular species using the given m/z values. Primarily used as a subfunction of self.calculateMass(). """ # TODO change this so that after the optimisation the ms_peak objects # are updated to include the charge iarray = [] for peak in self.mspeaks: iarray.append(peak.x) # iarray = mzs[:] iarray.sort() iarray.reverse() charges = collections.OrderedDict() lowest = 10000000 lowest_z = 0 zs = range(1, 101) for z in zs: charges[z] = [] for i, mz in enumerate(iarray): charges[z].append(msutils.calc_mass(mz, z + i)) for z in list(charges.keys()): sd = np.std(charges[z]) if sd < lowest: lowest = sd lowest_z = z # calculating error total_error = [] for mass in charges[lowest_z]: total_error.append(abs(np.average(charges[lowest_z]) - mass)) average_error = np.average(total_error) self.csd_mass = np.average(charges[lowest_z]) self.csd_mass_error = average_error self.csd_charge_states = [lowest_z + i for i in range(len(iarray))]
def dir_to_pandas_frame(self, dir_path, experiment_name="Experiment1", smooth_data=False): """ Given a directory it looks at all the .txt files and combines them into a challenger input file The txt file should have an ending of _TrapV and this will describe the column of each data file :return: """ self.smoothes = 5 self.window_len = 5 self.poly_order = 1 if smooth_data == True: print(( 'Smoothing... Smoothes {0} Window length {1} Polynomial order {2}' ).format(self.smoothes, self.window_len, self.poly_order)) intensity_array = [] atd_array = [] count = 1 data = {} for temp_file in os.listdir(dir_path): if temp_file.endswith(".txt"): filepath = os.path.join(dir_path, temp_file) print(filepath) temp_voltage = filepath.split('_') temp_voltage = temp_voltage[-1].strip('.txt') result = [] f = open(filepath, "r") lines = f.readlines() for x in lines: stripped = x.strip('\r\n') tokens = stripped.split('\t') print(tokens[1]) e_notations = tokens[1].replace("E", "e") print(e_notations) result.append(float(e_notations)) if count == 1: #only get the atd values once atd_array.append(float(tokens[0])) f.close() temp_voltage = '\"' + temp_voltage + '\"' # this is so that Javascript Challenger works result = np.asarray(result) if smooth_data == True: for i in range(self.smoothes): result = msutils.sg(result, window_size=self.window_len, order=self.poly_order) result = result / result.max() data[temp_voltage] = result intensity_array.append(result) if count == 1: exp_name_string = '\"' + experiment_name + '\"' #this is so that Javascript Challenger works data[exp_name_string] = atd_array count += 1 # increase counter so as to stop frame = pd.DataFrame(data) return frame
def create_search_space(self, fragments, charges, modifications): """ Args: fragments: An array of Fragment objects charges: The charges to consider modifications: The modifications to consider modifications is a dictionary with first element the mod name and second the residue(s) or positions affected Returns: A pandas dataframe with the fragment match information """ #TODO add extra columns in dataframe so that during the match you can populate them data_to_save = [ ] # print "Searching mass {0:10.3f}".format(experimental_mass) for fragment in fragments: frag_string_modified = '' oligo_position = '' frag_mass = self.mc.calc_oligo_fragment(fragment.sequence, fragment.ion, fragment.five_prime_end, fragment.three_prime_end) if fragment.ion[0] in ['a', 'b', 'c', 'd', 'M']: oligo_position = fragment.position_to if fragment.ion[0] in ['w', 'x', 'y', 'z']: oligo_position = fragment.position_from - 1 for z in charges: fragm_mz = abs(msu.calc_mz( frag_mass, z)) # need to abs for negatively charged ions data_to_save.append([ fragment.sequence, fragment.ion, z, fragm_mz, frag_string_modified, oligo_position ]) for modification in modifications: to_modify_with = self.modify_oligo_fragments( fragment, modification) if to_modify_with['modification_mass'] > 0: frag_mass_modified = frag_mass + to_modify_with[ 'modification_mass'] frag_string_modified = to_modify_with['modification'] for z in charges: fragm_mz = abs(msu.calc_mz(frag_mass_modified, z)) # print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f} {7}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified) data_to_save.append([ fragment.sequence, fragment.ion, z, fragm_mz, frag_string_modified, oligo_position ]) df = pd.DataFrame(data_to_save, columns=self.column_headers) #df.to_csv('matched_fragments.csv') return df
def match_oligo_fragments(self, fragments, mass_specrtum, ppm_error, charges, modifications): """ Args: fragments: An array of Fragment objects mass_specrtum: A MassSpectrum object ppm_error: The ppm error used for the match charges: The charges to consider modifications: The modifications to consider modifications is a dictionary with first element the mod name and second the residue(s) or positions affected Returns: A pandas dataframe with the fragment match information """ data_to_save = [] for i in range(len(mass_specrtum.topN_xvals)): experimental_mass = mass_specrtum.topN_xvals[i] experimental_intensity = mass_specrtum.topN_yvals[i] #print "Searching mass {0:10.3f}".format(experimental_mass) for fragment in fragments: frag_string_modified = '' oligo_position = '' frag_mass = self.mc.calc_oligo_fragment( fragment.sequence, fragment.ion, fragment.five_prime_end, fragment.three_prime_end) if fragment.ion[0] in ['a', 'b', 'c', 'd', 'M']: oligo_position = fragment.position_to if fragment.ion[0] in ['w', 'x', 'y', 'z']: oligo_position = fragment.position_from - 1 for z in charges: fragm_mz = abs(msu.calc_mz( frag_mass, z)) # need to abs for negatively charged ions ppm_calculated = msu.calcppmerror(experimental_mass, fragm_mz) if abs(ppm_calculated) < ppm_error: #print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated) data_to_save.append([ fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified, oligo_position ]) for modification in modifications: to_modify_with = self.modify_oligo_fragments( fragment, modification) if to_modify_with['modification_mass'] > 0: frag_mass_modified = frag_mass + to_modify_with[ 'modification_mass'] frag_string_modified = to_modify_with['modification'] for z in charges: fragm_mz = abs(msu.calc_mz(frag_mass_modified, z)) ppm_calculated = msu.calcppmerror( experimental_mass, fragm_mz) if abs(ppm_calculated) < ppm_error: #print '{0:<30} {1:<5} {2:<3} {3:10.3f} {4:10.3f} {5:10.3f} {6:7.1f} {7}'.format(fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified) data_to_save.append([ fragment.sequence, fragment.ion, z, experimental_mass, experimental_intensity, fragm_mz, ppm_calculated, frag_string_modified, oligo_position ]) df = pd.DataFrame(data_to_save, columns=self.column_headers) #df.to_csv('matched_fragments.csv') return df