def request_ms(self): ms = MasterSpectrum() delta_func = calculate_Delta_by_ppm(20) for m in self.mz_ary: p = Peak(float(m), 1, delta_func) ms.add(p, 0) return ms
def load_recalibrate(self): fc = calculate_Delta_by_ppm(self.ppm) tmt_mass = calculate_tag_tmt10() with mgf.read(self.path) as spectra: for spectrum in spectra: ms = MasterSpectrum() params = spectrum['params'] for mass, intensity in zip(spectrum['m/z array'], spectrum['intensity array']): ms.add(Peak(mass, intensity, fc)) peak = Peak(tmt_mass, 0.5, fc) if peak.key() not in ms.spectrum[0]: recalibrate = False else: idx, bin_to_ack, a, b = ms.binary( peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0) if idx == -1: recalibrate = False else: recalibrate = True recalibration_mass = ms.spectrum[0][peak.key()][idx].mz diff = tmt_mass - recalibration_mass print(params['title']) print("original={0}\tdiff={1}".format( recalibration_mass, diff)) mass_list = [] int_list = [] if recalibrate: ppm_shift = calculate_ppm_shift(diff, tmt_mass) for key in ms.spectrum[0].keys(): for mp in ms.spectrum[0][key]: if recalibrate: if self.type == 'ppm': diff = calculate_da_shift(mp.mz, ppm_shift) mass_list.append(mp.mz + diff) elif self.type == 'absolute': diff = diff mass_list.append(mp.mz + diff) else: print(self.type) raise ValueError("what did you dooooo") else: mass_list.append(mp.mz) int_list.append(mp.intensity) print("len is:\t{0}".format(len(mass_list))) mgf.write(spectra=[{ 'm/z array': mass_list, 'intensity array': int_list, 'params': params }], output=self.file_out)
def load_from_csv(self, path, delta_function=calculate_Delta_by_ppm(20)): with open(path, 'r') as csvfile: readr = csv.reader(csvfile) header = True for row in readr: if header: header = False else: p = Peak(float(row[0]), float(row[1]), delta_function) mp = MasterPeak(p) mp.counts = int(row[2]) mp.mz_origin = float(row[5]) self.add(mp, str(row[6]))
def createDeltaPrecursorMasterSpectrum( self, delta_func=calculate_Delta_by_ppm(20)): with mgf.read(self.path) as spectra: for spectrum in spectra: int_dic = spectrum['intensity array'] mz_dic = spectrum['m/z array'] chrg_spec = spectrum['params']['charge'][0] precursor = calculatePrecursor( mz=spectrum['params']['pepmass'][0], charge=chrg_spec) rel_int = calculateRelativeIntensity(int_dic) for m, i in zip(mz_dic, rel_int): p = Peak(precursor - float(m), float(i), delta_func) self.masterSpectrum.add(p, 0)
def generateMS_by_score_file(score_info_object): """ every object has an mz array every object has an diff array """ ms = MasterSpectrum() delta_func = calculate_Delta_by_ppm(20) dPeaks_matched = 0 for m, diff in zip(score_info_object['mz'], score_info_object['diff']): p = Peak(float(m), 1, delta_func, meta=diff) ms.add(p, 0) dPeaks_matched += 1 return ms, dPeaks_matched
def load_from_mgf(self, path, ignoreCharges, delta_func=calculate_Delta_by_ppm(20)): up = 0 with mgf.read(path) as spectra: for spectrum in spectra: up = up + 1 rel_int = calculateRelativeIntensity( spectrum['intensity array']) charge_of_spectrum = str(spectrum['params']['charge'][0]) for m, i in zip(spectrum['m/z array'], rel_int): p = Peak(float(m), float(i), delta_func) if ignoreCharges: self.add(p, 0) else: self.add(p, charge_of_spectrum)
def test_patch_mgf(self): ms = MasterSpectrum() ms.load_from_mgf( 'tests/data/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted.mgf', ignoreCharges=True) assert_equal(177 in ms.spectrum[0].keys(), True) mgfP = MgfPatcher(delta_func=calculate_Delta_by_ppm(20)) mgfP.readExclusionList('tests/data/exclusionList.txt') mgfP.patchMgf( 'tests/data/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted.mgf', 'tests/data/temp/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted_patched.mgf' ) ms = MasterSpectrum() ms.load_from_mgf( 'tests/data/temp/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted_patched.mgf', ignoreCharges=True) assert_equal(177 in ms.spectrum[0].keys(), False) assert_equal(433 in ms.spectrum[0].keys(), False)
def analyse_mzid_vs_mgf(self): delta_func = calculate_Delta_by_ppm(20) if self.mgf_reads == {}: raise ValueError("need the mgf beforehand read_enhanced_spectrum") else: for ids in self.identifications: mzs = ids.report_all_mzs() ms = self.mgf_reads[ids.scan_id].request_ms() for mz in mzs: peak = Peak(mz, 2, delta_func) if peak.key() in ms.spectrum[0]: idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0) if idx == -1: error = "mz:\t{0}\nscan_id:\t{1}".format(mz, ids.scan_id) raise ValueError(error) else: pass else: error = "mz:\t{0}\nscan_id:\t{1}".format(mz, ids.scan_id) raise ValueError(error)
def __init__(self, ppm, id_2, id_1, peak_list_2, peak_list_1, mass_2, mass_1, charge, extra_mass, int_list_2, int_list_1, params2, params1): self.ppm = ppm self.charge = int(charge) self.extra_mass = extra_mass self.fc = calculate_Delta_by_ppm(20) if float(mass_2) > float(mass_1): self.mass_1 = mass_2 self.mass_2 = mass_1 self.id_1 = int(id_2) self.id_2 = int(id_1) self.peak_list_1 = peak_list_2 self.peak_list_2 = peak_list_1 self.int_list_1 = int_list_2 self.int_list_2 = int_list_1 self.params = params2 else: self.mass_1 = mass_1 self.mass_2 = mass_2 self.id_1 = int(id_1) self.id_2 = int(id_2) self.peak_list_1 = peak_list_1 self.peak_list_2 = peak_list_2 self.int_list_1 = int_list_1 self.int_list_2 = int_list_2 self.params = params1 self.params["#numtags"] = "{0}".format("-1")
def test_init(self): deltaP = DeltaPatcher(delta_func=calculate_Delta_by_ppm(20)) gps = GeneratorForPerfectSpectra() gps.generateAminoAcidDeltaList("tests/data/temp/", 1, 0) deltaP.readExclusionList("tests/data/temp/exclusionListDelta_1_0.csv") dEx = DeltaExtractor( "tests/data/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted.mgf" ) dEx.createDeltaMasterSpectrum(0) dEx.exportCsv("tests/data/temp/a.csv") out = "tests/data/temp/b.csv" out = "/home/tobiass/b.csv" deltaP.patchDelta(input_path="tests/data/temp/a.csv", output_path=out) with open(out, 'r') as csvfile: readr = csv.reader(csvfile) header = True for row in readr: if header: header = False else: if '71.' in row[0]: raise ValueError('A very specific bad thing happened')
def createDeltaMasterSpectrum(self, min_rel_intensity, delta_func=calculate_Delta_by_ppm(20)): up = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: print(up) up += 1 int_dic = spectrum['intensity array'] mz_dic = spectrum['m/z array'] rel_int = calculateRelativeIntensity(int_dic) smallerArea = [(i, j) for i, j in zip(mz_dic, rel_int) if j >= min_rel_intensity] mz_dic = [i for i, j in smallerArea] rel_int = [j for i, j in smallerArea] for i in range(len(mz_dic) - 1, -1, -1): for j in range(i - 1, -1, -1): diff = mz_dic[i] - mz_dic[j] p = Peak(diff, rel_int[j], delta_func) # intensities are from lower peak self.masterSpectrum.add(p, 0)
def load_distiller_mgf2(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) error = 0 self.ms = MasterSpectrum() with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys()): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key() in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity, # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'], mass_2=mass, mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz, charge=spectrum['params']['charge'][0], extra_mass=extra_mass, int_list_2=spectrum['intensity array'], int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'], params2=spectrum['params'], params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params'])) found = True del(self.ms.spectrum[precursor_chrg][peak.key()][idx]) if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0: del(self.ms.spectrum[precursor_chrg][peak.key()]) if len(self.ms.spectrum[precursor_chrg]) == 0: del(self.ms.spectrum[precursor_chrg]) if found is False: limit_scan_id = scanid - 20 # could start at -19 ms_bac = MasterSpectrum() for chrg in self.ms.spectrum: for key in self.ms.spectrum[chrg].keys(): for mp in self.ms.spectrum[chrg][key]: if mp.intensity >= limit_scan_id: ms_bac.add(mp, charge=chrg) self.ms = ms_bac self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))
def test_load_exclusion_list(self): mgfP = MgfPatcher(delta_func=calculate_Delta_by_ppm(20)) mgfP.readExclusionList('tests/data/exclusionList.txt') assert_equal(mgfP.exclusionSpectrum.spectrum[0][177][0].mz, 176.309174) assert_equal(mgfP.precursorDeltas, [229])
def test_init(self): mgfP = MgfPatcher(delta_func=calculate_Delta_by_ppm(20)) a = mgfP.delta_function(1000000) assert_equal(a, 20)
def load_msconvert_mgf(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) scan_id_ary = [] problems = [] error = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if len(scan_id_ary) == 0: scan_id_ary.append(scanid) else: if scanid != scan_id_ary[-1] + 1: if len(scan_id_ary) % 2 == 1: problems.append(scan_id_ary[0]) error += 1 scan_id_ary = [] scan_id_ary.append(scanid) else: scan_id_ary = [] scan_id_ary.append(scanid) self.ms = MasterSpectrum( ) # new MS if scan_id group (seperated by ms1) is completed else: scan_id_ary.append(scanid) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys() ): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign( n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key( ) in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary( peak, 0, len(self.ms.spectrum[ precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add( Reference( ppm=self.ppm, id_2=scanid, id_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx]. intensity, # also scanid peak_list_2=spectrum[ 'm/z array'], peak_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['ms'], mass_2=mass, mass_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx].mz, charge=spectrum['params'] ['charge'][0], extra_mass=extra_mass, int_list_2=spectrum[ 'intensity array'], int_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['int'], params2=spectrum['params'], params1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['params'])) found = True del (self.ms. spectrum[precursor_chrg][ peak.key()][idx]) if len(self.ms. spectrum[precursor_chrg][ peak.key()]) == 0: del (self.ms. spectrum[precursor_chrg][ peak.key()]) if len(self.ms.spectrum[ precursor_chrg]) == 0: del (self.ms.spectrum[ precursor_chrg]) if found is False: self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))
def test_ppm_delta(self): delta_ppm_20 = calculate_Delta_by_ppm(20) assert_equal(20 * (100 / (math.pow(10, 6))), delta_ppm_20(100))
def analyze_mzid_id_vs_score_file(self, path_score, output_path="/home/tobiass/Desktop/out.csv"): """ Prerequirenment: loading mzid(spectrum and peptide info) input: path to score file (created by control.py improve_csv) Description: - finding all important scan ids - loading all scans as spectra from score file - saving delta as meta information for every peak - cheching all identifiacation (--> peptide_ref) - for peptide ref: read series (b/y, index and charge, mz) generate tag amount for every position tag series of b/y with tmt diff for series: check in scans for delta Output: none """ with open(output_path, "wt") as csvfile: writr = csv.writer(csvfile, lineterminator=os.linesep) writr.writerow(("scanid", "rank", "peak", "position", "frag", "expected by mascot", "found", "charge", "PeaksMatchedInSerie", "max_number_of_tmt_tag")) valid_scan_ids = {} delta_func = calculate_Delta_by_ppm(20) for identification_hashobject in self.identifications: valid_scan_ids[identification_hashobject[1].scan_id] = True valid_scan_ids = valid_scan_ids.keys() # quick and dirty - smarter with some kind of unique tree valid_scan_ids = [int(i) for i in valid_scan_ids] valid_spectra_info = self.read_score_file(path_score, valid_scan_ids) self.parse_mz_id_peptide_ref() for identification_hashobject in self.identifications: for i in identification_hashobject: peptide_ref = identification_hashobject[i].peptide_ref scan_id = int(identification_hashobject[i].scan_id) b_tmt, y_tmt = self.peptide_evidence[peptide_ref].get_annotated_positions() max_tmt_tag = calculate_max_tmt(b_tmt) # same num for b and y ms, dPeaks_matched = generateMS_by_score_file(valid_spectra_info[scan_id]) for ion_serie in identification_hashobject[i].ion_series_ary: tmt_masses = calculate_allowed_Mass_Diff(n=max_tmt_tag, z=ion_serie.charge) if 'b' in ion_serie.fragtype: tmt_pos_ary = b_tmt elif 'y' in ion_serie.fragtype: tmt_pos_ary = y_tmt else: raise ValueError("{0}\tis not valid fragtype".format(ion_serie.fragtype)) for pos, mz in zip(ion_serie.ions_index, ion_serie.mz_ary): num_tag_at_pos = tmt_pos_ary[pos] if num_tag_at_pos == 0: expected_mass_delta = 0 else: expected_mass_delta = tmt_masses[num_tag_at_pos][ion_serie.charge] peak = Peak(mz, 1, delta_func) if peak.key() in ms.spectrum[0]: idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0) if idx == -1: # just heavy spectra are just all peaks of the heavy labeled partner of a mix (there is no garantee that both # have the same number of peaks # BUT the number of spectra is the same msg = "Peak:\t{0}\n" msg += "frag:\t{4}\n" msg += "position:\t{5}\n" msg += "scanid:\t{1}\n" msg += "expected:\t{2}\n" msg += "found:\t{3}\n" msg += "peak was just part of heavy spectra(but same number before \".\"\n" # print(msg.format(mz, scan_id, expected_mass_delta, -2, # ion_serie.fragtype, pos)) writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, -2, ion_serie.charge, dPeaks_matched, max_tmt_tag)) # raise ValueError("identified Peak:\t{0}\nscanid:\t{1}\ncouldnt be found in spectra".format(mz, scan_id, expected_mass_delta, -1, # ion_serie.fragtype, pos)) else: found_mass_diff = ms.spectrum[0][peak.key()][idx].meta msg = "Peak:\t{0}\n" msg += "frag:\t{4}\n" msg += "position:\t{5}\n" msg += "scanid:\t{1}\n" msg += "expected:\t{2}\n" msg += "found:\t{3}\n" if found_mass_diff != expected_mass_delta: writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, found_mass_diff, ion_serie.charge, dPeaks_matched, max_tmt_tag)) # raise ValueError(msg.format(mz, scan_id, expected_mass_delta, found_mass_diff, ion_serie.fragtype, pos)) else: # here is my goal writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, found_mass_diff, ion_serie.charge, dPeaks_matched, max_tmt_tag)) else: # just heavy spectra are just all peaks of the heavy labeled partner of a mix (there is no garantee that both # have the same number of peaks # BUT the number of spectra is the same msg = "Peak:\t{0}\n" msg += "frag:\t{4}\n" msg += "position:\t{5}\n" msg += "scanid:\t{1}\n" msg += "expected:\t{2}\n" msg += "found:\t{3}\n" msg += "peak was just part of heavy spectra\n" # print(msg.format(mz, scan_id, expected_mass_delta, -1, # ion_serie.fragtype, pos)) writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, -1, ion_serie.charge, dPeaks_matched, max_tmt_tag))
def __init__(self, delta_func=calculate_Delta_by_ppm(20)): self.exclusionSpectrum = MasterSpectrum() self.delta_function = delta_func self.precursorDeltas = []