def parse_mz_id(self): data = mzid.read(self.path) max_rank = 2 for d in data: title = d['spectrum title'] scan_id = parse_scan_id(title) fragments = [] len_frag = len(d['SpectrumIdentificationItem']) pos = 1 while(pos <= min(max_rank, len_frag)): for fragmentation in d['SpectrumIdentificationItem'][pos - 1]['IonType']: # 0 because just first rank for f in fragmentation['FragmentArray']: if f['measure_ref'] == 'm_mz': mz = f['values'] elif f['measure_ref'] == 'm_error': error = f['values'] else: pass fragments.append(Fragment(name=fragmentation['name'], indice=fragmentation['index'], charge=fragmentation['charge'], mz=mz, error=error, scanid=parse_scan_id(title))) if scan_id in self.scan1.data: self.scan1.data[scan_id][pos] = fragments else: self.scan1.data[scan_id] = {} self.scan1.data[scan_id][pos] = fragments pos += 1
def parse_mz_id(self): """ reading mzid saving every spectrum identification (but just rank 1) returns: None """ data = mzid.read(self.path) for d in data: title = parse_scan_id(d['spectrum title']) ident = {} len_ranks = len(d['SpectrumIdentificationItem']) if len_ranks > 1: for i in [0, 1]: identification = d['SpectrumIdentificationItem'][i] # 0 because just first rank peptide_ref = identification['peptide_ref'] ident[i + 1] = Identification(mzid_info_lvl_fragmentation=identification['IonType'], peptide_ref=peptide_ref, title=title) else: identification = d['SpectrumIdentificationItem'][0] # 0 because just first rank peptide_ref = identification['peptide_ref'] ident[1] = Identification(mzid_info_lvl_fragmentation=identification['IonType'], peptide_ref=peptide_ref, title=title) self.identifications.append(ident)
def load_distiller_mgf(self): """ creates references based on improved csv a missing scanid means an ms1 event """ data = {} alm = [i for i in gen_allowed_mass_diff_with_sign(n=4, z=1)] with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if scanid in self.ids_to_be_referenced: if self.ids_to_be_referenced[scanid] in data: mass1 = data[self.ids_to_be_referenced[scanid]]['params']['pepmass'][0] precursor_chrg1 = int(data[self.ids_to_be_referenced[scanid]]['params']['charge'][0]) mass1 = calculatePrecursor(mass1, precursor_chrg1) diff = abs(mass1 - mass) diff2 = [abs(diff - abs(i)) for i in alm] pos = diff2.index(min(diff2)) p = "mass1:\t {0}\n" p += "mass:\t {1}\n" p += "scanid:\t {2}\n" p += "charge:\t {3}\n" p += "charge2:\t {4}\n" p += "scanid2:\t {5}\n" if diff > 21: # distiller changes precursor charge therefore precurosr mass calculation is wrong print(p.format(mass1, mass, scanid, precursor_chrg1, spectrum['params']['charge'][0], self.ids_to_be_referenced[scanid])) print(diff) print(diff2) print("----------------") else: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ids_to_be_referenced[scanid], # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=data[self.ids_to_be_referenced[scanid]]['m/z array'], mass_2=mass, mass_1=mass1, charge=spectrum['params']['charge'][0], extra_mass=alm[pos], int_list_2=spectrum['intensity array'], int_list_1=data[self.ids_to_be_referenced[scanid]]['intensity array'], params2=spectrum['params'], params1=data[self.ids_to_be_referenced[scanid]]['params'])) del(data[self.ids_to_be_referenced[scanid]]) del(self.ids_to_be_referenced[scanid]) else: data[scanid] = spectrum
def read_enhanced_spectrum(self, path): """ saving a masterSpectrum for every spectrum is not a memory efficient idea 4 GB for 8247 spectra instead creating on request (saving reference and spectra object) """ with mgf.read(path) as spectra: for spectrum in spectra: # charge_of_spectrum = str(spectrum['params']['charge'][0]) scan_id = parse_scan_id(spectrum['params']['title']) self.mgf_reads[scan_id] = Reference(scan_id, spectrum['m/z array'])
def select_mgf(self): spectra_out = [] with mgf.read(self.path_mgf_in) as spectra: for spectrum in spectra: scanid = int(parse_scan_id(spectrum['params']['title'])) if scanid in self.list_chosen: spectra_out.append({'m/z array': spectrum['m/z array'], 'intensity array': spectrum['intensity array'], 'params': spectrum['params']}) mgf.write(spectra=spectra_out, output=self.path_mgf_out)
def load_export(self): with mgf.read(self.mgf_path) as spectra, open(self.output_csv, "wt") as csvfile: writr = csv.writer(csvfile, lineterminator=os.linesep) writr.writerow(("scanid", "peak", "rel_int")) for spectrum in spectra: scan_id = parse_scan_id(spectrum['params']['title']) st = "scanid:\t{0}\n" print(st.format(scan_id)) rel_int = calculateRelativeIntensity( spectrum['intensity array']) for m, i in zip(spectrum['m/z array'], rel_int): writr.writerow((scan_id, m, i))
def test_splitting(self): test = "\"controllerType=0 controllerNumber=1 scan=1316\"" assert_equal(parse_scan_id(test), '1316')
def load_msconvert_mgf(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) scan_id_ary = [] problems = [] error = 0 with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) if len(scan_id_ary) == 0: scan_id_ary.append(scanid) else: if scanid != scan_id_ary[-1] + 1: if len(scan_id_ary) % 2 == 1: problems.append(scan_id_ary[0]) error += 1 scan_id_ary = [] scan_id_ary.append(scanid) else: scan_id_ary = [] scan_id_ary.append(scanid) self.ms = MasterSpectrum( ) # new MS if scan_id group (seperated by ms1) is completed else: scan_id_ary.append(scanid) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys() ): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign( n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key( ) in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary( peak, 0, len(self.ms.spectrum[ precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add( Reference( ppm=self.ppm, id_2=scanid, id_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx]. intensity, # also scanid peak_list_2=spectrum[ 'm/z array'], peak_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['ms'], mass_2=mass, mass_1=self.ms. spectrum[precursor_chrg][ peak.key()][idx].mz, charge=spectrum['params'] ['charge'][0], extra_mass=extra_mass, int_list_2=spectrum[ 'intensity array'], int_list_1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['int'], params2=spectrum['params'], params1=self.ms. spectrum[precursor_chrg][ peak.key()] [idx].meta['params'])) found = True del (self.ms. spectrum[precursor_chrg][ peak.key()][idx]) if len(self.ms. spectrum[precursor_chrg][ peak.key()]) == 0: del (self.ms. spectrum[precursor_chrg][ peak.key()]) if len(self.ms.spectrum[ precursor_chrg]) == 0: del (self.ms.spectrum[ precursor_chrg]) if found is False: self.ms.add(Peak(mass, scanid, fc, meta={ 'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params'] }), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))
def load_distiller_mgf2(self): """ creates references based on precursor mass a missing scanid means an ms1 event by default referencing works just within one ms2 block """ fc = calculate_Delta_by_ppm(self.ppm) error = 0 self.ms = MasterSpectrum() with mgf.read(self.path) as spectra: for spectrum in spectra: mass = spectrum['params']['pepmass'][0] precursor_chrg = int(spectrum['params']['charge'][0]) mass = calculatePrecursor(mass, precursor_chrg) scanid = int(parse_scan_id(spectrum['params']['title'])) found = False if len(self.ms.spectrum) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: if (precursor_chrg in self.ms.spectrum.keys()): # react to charge !!!!!! if len(self.ms.spectrum[precursor_chrg]) == 0: peak = Peak(mass, scanid, fc) self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) found = True else: for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1): if found is False: peak = Peak(mass + extra_mass, 0.5, fc) if peak.key() in self.ms.spectrum[precursor_chrg]: print(precursor_chrg) idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg) if idx != -1: self.references.add(Reference(ppm=self.ppm, id_2=scanid, id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity, # also scanid peak_list_2=spectrum['m/z array'], peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'], mass_2=mass, mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz, charge=spectrum['params']['charge'][0], extra_mass=extra_mass, int_list_2=spectrum['intensity array'], int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'], params2=spectrum['params'], params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params'])) found = True del(self.ms.spectrum[precursor_chrg][peak.key()][idx]) if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0: del(self.ms.spectrum[precursor_chrg][peak.key()]) if len(self.ms.spectrum[precursor_chrg]) == 0: del(self.ms.spectrum[precursor_chrg]) if found is False: limit_scan_id = scanid - 20 # could start at -19 ms_bac = MasterSpectrum() for chrg in self.ms.spectrum: for key in self.ms.spectrum[chrg].keys(): for mp in self.ms.spectrum[chrg][key]: if mp.intensity >= limit_scan_id: ms_bac.add(mp, charge=chrg) self.ms = ms_bac self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}), charge=precursor_chrg) if error > 0: print(" delete valid information {0}".format(error))