class DeltaExtractor(object): def __init__(self, path): self.path = path self.masterSpectrum = MasterSpectrum() def createDeltaMasterSpectrum(self, min_rel_intensity, delta_func=calculate_Delta_by_ppm(20)): up = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: print(up) up += 1 int_dic = spectrum['intensity array'] mz_dic = spectrum['m/z array'] rel_int = calculateRelativeIntensity(int_dic) smallerArea = [(i, j) for i, j in zip(mz_dic, rel_int) if j >= min_rel_intensity] mz_dic = [i for i, j in smallerArea] rel_int = [j for i, j in smallerArea] for i in range(len(mz_dic) - 1, -1, -1): for j in range(i - 1, -1, -1): diff = mz_dic[i] - mz_dic[j] p = Peak(diff, rel_int[j], delta_func) # intensities are from lower peak self.masterSpectrum.add(p, 0) def exportCsv(self, output_path): self.masterSpectrum.export_to_csv(output_path)
def __init__(self, path, output_path, min_rel_similarity, ppm=10): self.ppm = ppm self.ms = MasterSpectrum() self.path = path self.out = [] self.output_path = output_path self.references = sortedlist(key=lambda i: i.id_1) self.min_rel_similarity = min_rel_similarity
def request_ms(self): ms = MasterSpectrum() delta_func = calculate_Delta_by_ppm(20) for m in self.mz_ary: p = Peak(float(m), 1, delta_func) ms.add(p, 0) return ms
def load_recalibrate(self): fc = calculate_Delta_by_ppm(self.ppm) tmt_mass = calculate_tag_tmt10() with mgf.read(self.path) as spectra: for spectrum in spectra: ms = MasterSpectrum() params = spectrum['params'] for mass, intensity in zip(spectrum['m/z array'], spectrum['intensity array']): ms.add(Peak(mass, intensity, fc)) peak = Peak(tmt_mass, 0.5, fc) if peak.key() not in ms.spectrum[0]: recalibrate = False else: idx, bin_to_ack, a, b = ms.binary( peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0) if idx == -1: recalibrate = False else: recalibrate = True recalibration_mass = ms.spectrum[0][peak.key()][idx].mz diff = tmt_mass - recalibration_mass print(params['title']) print("original={0}\tdiff={1}".format( recalibration_mass, diff)) mass_list = [] int_list = [] if recalibrate: ppm_shift = calculate_ppm_shift(diff, tmt_mass) for key in ms.spectrum[0].keys(): for mp in ms.spectrum[0][key]: if recalibrate: if self.type == 'ppm': diff = calculate_da_shift(mp.mz, ppm_shift) mass_list.append(mp.mz + diff) elif self.type == 'absolute': diff = diff mass_list.append(mp.mz + diff) else: print(self.type) raise ValueError("what did you dooooo") else: mass_list.append(mp.mz) int_list.append(mp.intensity) print("len is:\t{0}".format(len(mass_list))) mgf.write(spectra=[{ 'm/z array': mass_list, 'intensity array': int_list, 'params': params }], output=self.file_out)
def test_weird_summing_mgf(self): path = "tests/data/weird_sum.mgf" output = "/home/tobiass/weird_sum.csv" output_mgf = "tests/data/temp/weird_mgf.mgf" ma = MgfAnnotater(path, output) ma.load_msconvert_mgf() ma.export_annotated_spectra_to_mgf(output_mgf) ms = MasterSpectrum() ms.load_from_mgf("tests/data/temp/weird_mgf.mgf", ignoreCharges=True) assert_equal(round(ms.spectrum[0][179][0].intensity, 4), round(0.0025, 4))
class DeltaPatcher(object): def __init__(self, delta_func=calculate_Delta_by_ppm(20)): self.exclusionSpectrum = MasterSpectrum() self.delta_function = delta_func self.precursorDeltas = [] def readExclusionList(self, path): ''' exclusionList: 3 columns m/z , comments ''' with open(path, 'r') as csvfile: readr = csv.reader(csvfile) header = True for row in readr: if header: header = False else: if row[2] == 'absolute': p = Peak(float(row[0]), 1.0, self.delta_function) mp = MasterPeak(p) # 0 for no differentiation of charge states self.exclusionSpectrum.add(mp, 0) elif row[2] == 'precursor': self.precursorDeltas.append(float(row[0])) else: raise ValueError('A very specific bad thing happened') def patchDelta(self, input_path, output_path): ''' ''' with open(output_path, "wt") as csvfile: writr = csv.writer(csvfile, lineterminator=os.linesep) with open(input_path, 'r') as r_csvfile: readr = csv.reader(r_csvfile) header = True for row in readr: if header: header = False writr.writerow(row) else: peak = Peak(float(row[0]), 0, self.delta_function) if peak.key() in self.exclusionSpectrum.spectrum[0]: idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary( peak, 0, len(self.exclusionSpectrum.spectrum[0][ peak.key()]) - 1, 0) if idx != -1: # found print("found it") else: # not found writr.writerow(row) else: writr.writerow(row)
def just_create_heavy_ms(self): ms = MasterSpectrum() for mass, i in zip(self.peak_list_1, self.int_list_1): ms.add( Peak(mass, i, self.fc, meta={ 'delta': 511, 'mass': -1, 'decharged': False })) return ms
def generateMS_by_score_file(score_info_object): """ every object has an mz array every object has an diff array """ ms = MasterSpectrum() delta_func = calculate_Delta_by_ppm(20) dPeaks_matched = 0 for m, diff in zip(score_info_object['mz'], score_info_object['diff']): p = Peak(float(m), 1, delta_func, meta=diff) ms.add(p, 0) dPeaks_matched += 1 return ms, dPeaks_matched
class PrecursorDeltaPeaksExtractor(object): def __init__(self, path): self.path = path self.masterSpectrum = MasterSpectrum() def createDeltaPrecursorMasterSpectrum( self, delta_func=calculate_Delta_by_ppm(20)): with mgf.read(self.path) as spectra: for spectrum in spectra: int_dic = spectrum['intensity array'] mz_dic = spectrum['m/z array'] chrg_spec = spectrum['params']['charge'][0] precursor = calculatePrecursor( mz=spectrum['params']['pepmass'][0], charge=chrg_spec) rel_int = calculateRelativeIntensity(int_dic) for m, i in zip(mz_dic, rel_int): p = Peak(precursor - float(m), float(i), delta_func) self.masterSpectrum.add(p, 0) def exportCsv(self, output_path): self.masterSpectrum.export_to_csv(output_path)
def test_patch_mgf(self): ms = MasterSpectrum() ms.load_from_mgf( 'tests/data/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted.mgf', ignoreCharges=True) assert_equal(177 in ms.spectrum[0].keys(), True) mgfP = MgfPatcher(delta_func=calculate_Delta_by_ppm(20)) mgfP.readExclusionList('tests/data/exclusionList.txt') mgfP.patchMgf( 'tests/data/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted.mgf', 'tests/data/temp/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted_patched.mgf' ) ms = MasterSpectrum() ms.load_from_mgf( 'tests/data/temp/cetsa_101287_A01_P013190_S00_N01_R1_TMT10_3spectra_shorted_patched.mgf', ignoreCharges=True) assert_equal(177 in ms.spectrum[0].keys(), False) assert_equal(433 in ms.spectrum[0].keys(), False)
def load_distiller_mgf2(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) error = 0 self.ms = MasterSpectrum() with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys()): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key() in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity, # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'], mass_2=mass, mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz, charge=spectrum['params']['charge'][0], extra_mass=extra_mass, int_list_2=spectrum['intensity array'], int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'], params2=spectrum['params'], params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params'])) found = True del(self.ms.spectrum[precursor_chrg][peak.key()][idx]) if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0: del(self.ms.spectrum[precursor_chrg][peak.key()]) if len(self.ms.spectrum[precursor_chrg]) == 0: del(self.ms.spectrum[precursor_chrg]) if found is False: limit_scan_id = scanid - 20 # could start at -19 ms_bac = MasterSpectrum() for chrg in self.ms.spectrum: for key in self.ms.spectrum[chrg].keys(): for mp in self.ms.spectrum[chrg][key]: if mp.intensity >= limit_scan_id: ms_bac.add(mp, charge=chrg) self.ms = ms_bac self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))
class MgfPatcher(object): def __init__(self, delta_func=calculate_Delta_by_ppm(20)): self.exclusionSpectrum = MasterSpectrum() self.delta_function = delta_func self.precursorDeltas = [] def readExclusionList(self, path): ''' exclusionList: 2 columns m/z , comments ''' with open(path, 'r') as csvfile: readr = csv.reader(csvfile) header = True for row in readr: if header: header = False else: if row[2] == 'absolute': p = Peak(float(row[0]), 1.0, self.delta_function) mp = MasterPeak(p) # 0 for no differentiation of charge states self.exclusionSpectrum.add(mp, 0) elif row[2] == 'precursor': self.precursorDeltas.append(float(row[0])) else: raise ValueError('A very specific bad thing happened') def patchMgf(self, input_path, output_path): ''' ''' with mgf.read(input_path) as spectra: spectra_out = [] for spectrum in spectra: int_dic = spectrum['intensity array'] mz_dic = spectrum['m/z array'] param_dic = spectrum['params'] chrg_spec = spectrum['params']['charge'][0] precursor = calculatePrecursor( mz=spectrum['params']['pepmass'][0], charge=chrg_spec) pos = 0 del_array = [] for m in mz_dic: peak = Peak(m, 0, self.delta_function) if peak.key() in self.exclusionSpectrum.spectrum[0]: idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary( peak, 0, len(self.exclusionSpectrum.spectrum[0][peak.key()]) - 1, 0) if idx != -1: # found del_array.append(pos) else: mp = MasterPeak(peak) for precursorDelta in self.precursorDeltas: if mp.isInsideMz(precursor - precursorDelta): del_array.append(pos) else: pass pos += 1 int_dic = np.delete(int_dic, del_array, 0) mz_dic = np.delete(mz_dic, del_array, 0) spectra_out.append({ 'm/z array': mz_dic, 'intensity array': int_dic, 'params': param_dic }) mgf.write(spectra=spectra_out, output=output_path)
def __init__(self, delta_func=calculate_Delta_by_ppm(20)): self.exclusionSpectrum = MasterSpectrum() self.delta_function = delta_func self.precursorDeltas = []
class MgfAnnotater(object): def __init__(self, path, output_path, min_rel_similarity, ppm=10): self.ppm = ppm self.ms = MasterSpectrum() self.path = path self.out = [] self.output_path = output_path self.references = sortedlist(key=lambda i: i.id_1) self.min_rel_similarity = min_rel_similarity def load_msconvert_mgf(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) scan_id_ary = [] problems = [] error = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if len(scan_id_ary) == 0: scan_id_ary.append(scanid) else: if scanid != scan_id_ary[-1] + 1: if len(scan_id_ary) % 2 == 1: problems.append(scan_id_ary[0]) error += 1 scan_id_ary = [] scan_id_ary.append(scanid) else: scan_id_ary = [] scan_id_ary.append(scanid) self.ms = MasterSpectrum( ) # new MS if scan_id group (seperated by ms1) is completed else: scan_id_ary.append(scanid) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys() ): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign( n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key( ) in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary( peak, 0, len(self.ms.spectrum[ precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add( Reference( ppm=self.ppm, id_2=scanid, id_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx]. intensity, # also scanid peak_list_2=spectrum[ 'm/z array'], peak_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['ms'], mass_2=mass, mass_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx].mz, charge=spectrum['params'] ['charge'][0], extra_mass=extra_mass, int_list_2=spectrum[ 'intensity array'], int_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['int'], params2=spectrum['params'], params1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['params'])) found = True del (self.ms. spectrum[precursor_chrg][ peak.key()][idx]) if len(self.ms. spectrum[precursor_chrg][ peak.key()]) == 0: del (self.ms. spectrum[precursor_chrg][ peak.key()]) if len(self.ms.spectrum[ precursor_chrg]) == 0: del (self.ms.spectrum[ precursor_chrg]) if found is False: self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error)) # raise ValueError("taddaaa") def export_annotated_spectra_to_csv(self): """ id1 id2 mz intensityy annotation: delta mass mz2: decharged: True/False similarity: intensity similaritz of mz1 vs mz2 originated mz: if decharged the original mz is saved """ print("start exporting information") with open(self.output_path, "wt") as csvfile: writr = csv.writer(csvfile, lineterminator=os.linesep) writr.writerow( ("id1", "id2", "mz", "intensity", "annotation", "mz2", "decharged", 'similarity', 'originated mz', "extra_mass")) for ref in self.references: ms = ref.create_ms(iMin_similarity=self.min_rel_similarity) for chrg in ms.spectrum: for key in ms.spectrum[chrg].keys(): for mp in ms.spectrum[chrg][key]: if mp.meta['mass'] != -1: writr.writerow( (ref.id_1, ref.id_2, mp.mz, mp.intensity, mp.meta['delta'], mp.meta['mass'], mp.meta['decharged'], mp.meta['similarity'], mp.meta['originated_mz'], ref.extra_mass)) def export_annotated_spectra_to_mgf(self, mgf_path, report_just_heavy=False): spectra_out = [] for ref in self.references: if report_just_heavy: ms = ref.just_create_heavy_ms() else: ms = ref.create_ms(iMin_similarity=self.min_rel_similarity) buf_peaks = [] buf_int = [] for chrg in ms.spectrum: for key in ms.spectrum[chrg].keys(): for mp in ms.spectrum[chrg][key]: if report_just_heavy: buf_peaks.append(mp.mz) buf_int.append(mp.intensity) else: if mp.meta['mass'] != -1: buf_peaks.append(mp.mz) buf_int.append(mp.intensity) if len(buf_peaks) != 0: spectra_out.append({ 'm/z array': buf_peaks, 'intensity array': buf_int, 'params': ref.params }) mgf.write(spectra=spectra_out, output=mgf_path)
def __init__(self, path): self.path = path self.masterSpectrum = MasterSpectrum()
def load_msconvert_mgf(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) scan_id_ary = [] problems = [] error = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if len(scan_id_ary) == 0: scan_id_ary.append(scanid) else: if scanid != scan_id_ary[-1] + 1: if len(scan_id_ary) % 2 == 1: problems.append(scan_id_ary[0]) error += 1 scan_id_ary = [] scan_id_ary.append(scanid) else: scan_id_ary = [] scan_id_ary.append(scanid) self.ms = MasterSpectrum( ) # new MS if scan_id group (seperated by ms1) is completed else: scan_id_ary.append(scanid) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys() ): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign( n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key( ) in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary( peak, 0, len(self.ms.spectrum[ precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add( Reference( ppm=self.ppm, id_2=scanid, id_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx]. intensity, # also scanid peak_list_2=spectrum[ 'm/z array'], peak_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['ms'], mass_2=mass, mass_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx].mz, charge=spectrum['params'] ['charge'][0], extra_mass=extra_mass, int_list_2=spectrum[ 'intensity array'], int_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['int'], params2=spectrum['params'], params1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['params'])) found = True del (self.ms. spectrum[precursor_chrg][ peak.key()][idx]) if len(self.ms. spectrum[precursor_chrg][ peak.key()]) == 0: del (self.ms. spectrum[precursor_chrg][ peak.key()]) if len(self.ms.spectrum[ precursor_chrg]) == 0: del (self.ms.spectrum[ precursor_chrg]) if found is False: self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))
def create_ms(self, iMin_similarity): """ int list 1 and mz list 1 are the ones with a higher precursor mass int/mz list1 will create a master spectrum for every peak in list2 check all allowed mass diffs (depending on charge and number of tags) -> find peak + delta --> for all save a similarity value (calculating ratio of int1/int2) depending which is bigger -> sort for highest similarity score -> check if masterspectrum peak already has an refering peak2 --> if new similarity is higher: replace a peak must have at least int_similarity = 0.5 """ req_min_similarity = iMin_similarity ms = MasterSpectrum() rel_int = calculateRelativeIntensity(self.int_list_1) for mass, rel_int, i in zip(self.peak_list_1, rel_int, self.int_list_1): ms.add( Peak(mass, i, self.fc, meta={ 'delta': 511, 'mass': -1, 'decharged': False, 'similarity': -1, 'originated_mz': mass, 'rel_int': rel_int })) num = round((abs(self.extra_mass) / calculate_Mass_Diff())) # num of tags possible self.params["#numtags"] = "{0}".format(num) deltas = mass_diff_decharging_stuff(n=int(num), z=self.charge) dd = deltas.keys() dd = list(dd) dd.sort() rel_int = calculateRelativeIntensity(self.int_list_2) for mass, rel_int, i in zip(self.peak_list_2, rel_int, self.int_list_2): similarity_most_similar = -1 idx_most_similar = -1 peak_key_most_similar = -1 delta_most_similar = -1 mass_most_similar = -1 for delta in dd: peak = Peak(mass + delta, i, self.fc) if peak.key() in ms.spectrum[0]: idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary( peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0) if idx != -1: # found rel_int1 = ms.spectrum[0][ peak.key()][idx].meta['rel_int'] ratio = rel_int1 / rel_int if ratio > 1: # i want ratio to be between 0 - 1 ratio = 1 / ratio if ratio > similarity_most_similar: similarity_most_similar = ratio idx_most_similar = idx peak_key_most_similar = peak.key() mass_most_similar = mass delta_most_similar = delta if similarity_most_similar > req_min_similarity: if similarity_most_similar > ms.spectrum[0][ peak_key_most_similar][idx_most_similar].meta[ 'similarity']: ms.spectrum[0][peak_key_most_similar][ idx_most_similar].meta = { 'delta': delta_most_similar, 'mass': mass_most_similar, 'decharged': deltas[delta_most_similar]['decharge']['state'], 'similarity': similarity_most_similar, 'originated_mz': ms.spectrum[0][peak_key_most_similar] [idx_most_similar].mz, 'rel_int': ratio } if deltas[delta_most_similar]['decharge']['state']: ms.add( Peak(calculatePrecursor( ms.spectrum[0][peak_key_most_similar] [idx_most_similar].mz, deltas[delta_most_similar]['decharge']['z']), ms.spectrum[0][peak_key_most_similar] [idx_most_similar].intensity, self.fc, meta=ms.spectrum[0][peak_key_most_similar] [idx_most_similar].meta)) del (ms.spectrum[0][peak_key_most_similar] [idx_most_similar]) if len(ms.spectrum[0][peak_key_most_similar]) == 0: del (ms.spectrum[0][peak_key_most_similar]) return ms
class MgfAnnotaterDistiller(MgfAnnotater): """ def __init__(self, path, output_path, ppm=10): self.ppm = ppm self.ms = MasterSpectrum() self.path = path self.out = [] self.output_path = output_path self.references = sortedlist(key=lambda i: i.id_1) """ def load_improved_csv(self, path_score): spectra_to_be_referenced = {} with open(path_score) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if row['id1'] > row['id2']: spectra_to_be_referenced[int(row['id1'])] = int(row['id2']) else: spectra_to_be_referenced[int(row['id2'])] = int(row['id1']) self.ids_to_be_referenced = spectra_to_be_referenced def load_distiller_mgf(self): """ creates references based on improved csv a missing scanid means an ms1 event """ data = {} alm = [i for i in gen_allowed_mass_diff_with_sign(n=4, z=1)] with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if scanid in self.ids_to_be_referenced: if self.ids_to_be_referenced[scanid] in data: mass1 = data[self.ids_to_be_referenced[scanid]]['params']['pepmass'][0] precursor_chrg1 = int(data[self.ids_to_be_referenced[scanid]]['params']['charge'][0]) mass1 = calculatePrecursor(mass1, precursor_chrg1) diff = abs(mass1 - mass) diff2 = [abs(diff - abs(i)) for i in alm] pos = diff2.index(min(diff2)) p = "mass1:\t {0}\n" p += "mass:\t {1}\n" p += "scanid:\t {2}\n" p += "charge:\t {3}\n" p += "charge2:\t {4}\n" p += "scanid2:\t {5}\n" if diff > 21: # distiller changes precursor charge therefore precurosr mass calculation is wrong print(p.format(mass1, mass, scanid, precursor_chrg1, spectrum['params']['charge'][0], self.ids_to_be_referenced[scanid])) print(diff) print(diff2) print("----------------") else: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ids_to_be_referenced[scanid], # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=data[self.ids_to_be_referenced[scanid]]['m/z array'], mass_2=mass, mass_1=mass1, charge=spectrum['params']['charge'][0], extra_mass=alm[pos], int_list_2=spectrum['intensity array'], int_list_1=data[self.ids_to_be_referenced[scanid]]['intensity array'], params2=spectrum['params'], params1=data[self.ids_to_be_referenced[scanid]]['params'])) del(data[self.ids_to_be_referenced[scanid]]) del(self.ids_to_be_referenced[scanid]) else: data[scanid] = spectrum def load_distiller_mgf2(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) error = 0 self.ms = MasterSpectrum() with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys()): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key() in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity, # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'], mass_2=mass, mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz, charge=spectrum['params']['charge'][0], extra_mass=extra_mass, int_list_2=spectrum['intensity array'], int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'], params2=spectrum['params'], params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params'])) found = True del(self.ms.spectrum[precursor_chrg][peak.key()][idx]) if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0: del(self.ms.spectrum[precursor_chrg][peak.key()]) if len(self.ms.spectrum[precursor_chrg]) == 0: del(self.ms.spectrum[precursor_chrg]) if found is False: limit_scan_id = scanid - 20 # could start at -19 ms_bac = MasterSpectrum() for chrg in self.ms.spectrum: for key in self.ms.spectrum[chrg].keys(): for mp in self.ms.spectrum[chrg][key]: if mp.intensity >= limit_scan_id: ms_bac.add(mp, charge=chrg) self.ms = ms_bac self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))