Example #1
0
 def parse_mz_id(self):
     data = mzid.read(self.path)
     max_rank = 2
     for d in data:
         title = d['spectrum title']
         scan_id = parse_scan_id(title)
         fragments = []
         len_frag = len(d['SpectrumIdentificationItem'])
         pos = 1
         while(pos <= min(max_rank, len_frag)):
             for fragmentation in d['SpectrumIdentificationItem'][pos - 1]['IonType']:  # 0 because just first rank
                 for f in fragmentation['FragmentArray']:
                     if f['measure_ref'] == 'm_mz':
                         mz = f['values']
                     elif f['measure_ref'] == 'm_error':
                         error = f['values']
                     else:
                         pass
                 fragments.append(Fragment(name=fragmentation['name'],
                                  indice=fragmentation['index'],
                                  charge=fragmentation['charge'],
                                  mz=mz,
                                  error=error,
                                  scanid=parse_scan_id(title)))
             if scan_id in self.scan1.data:
                 self.scan1.data[scan_id][pos] = fragments
             else:
                 self.scan1.data[scan_id] = {}
                 self.scan1.data[scan_id][pos] = fragments
             pos += 1
Example #2
0
    def parse_mz_id(self):
        """
        reading mzid
        saving every spectrum identification (but just rank 1)
        returns:
        None
        """
        data = mzid.read(self.path)

        for d in data:
            title = parse_scan_id(d['spectrum title'])
            ident = {}
            len_ranks = len(d['SpectrumIdentificationItem'])
            if len_ranks > 1:
                for i in [0, 1]:
                    identification = d['SpectrumIdentificationItem'][i]  # 0 because just first rank
                    peptide_ref = identification['peptide_ref']
                    ident[i + 1] = Identification(mzid_info_lvl_fragmentation=identification['IonType'],
                                                  peptide_ref=peptide_ref,
                                                  title=title)
            else:
                identification = d['SpectrumIdentificationItem'][0]  # 0 because just first rank
                peptide_ref = identification['peptide_ref']
                ident[1] = Identification(mzid_info_lvl_fragmentation=identification['IonType'],
                                          peptide_ref=peptide_ref,
                                          title=title)

            self.identifications.append(ident)
    def load_distiller_mgf(self):
        """
        creates references based on improved csv
        a missing scanid means an ms1 event
        """
        data = {}

        alm = [i for i in gen_allowed_mass_diff_with_sign(n=4, z=1)]

        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))

                if scanid in self.ids_to_be_referenced:
                    if self.ids_to_be_referenced[scanid] in data:
                        mass1 = data[self.ids_to_be_referenced[scanid]]['params']['pepmass'][0]
                        precursor_chrg1 = int(data[self.ids_to_be_referenced[scanid]]['params']['charge'][0])
                        mass1 = calculatePrecursor(mass1, precursor_chrg1)
                        diff = abs(mass1 - mass)
                        diff2 = [abs(diff - abs(i)) for i in alm]
                        pos = diff2.index(min(diff2))
                        p = "mass1:\t {0}\n"
                        p += "mass:\t {1}\n"
                        p += "scanid:\t {2}\n"
                        p += "charge:\t {3}\n"
                        p += "charge2:\t {4}\n"
                        p += "scanid2:\t {5}\n"
                        if diff > 21:  # distiller changes precursor charge therefore precurosr mass calculation is wrong
                            print(p.format(mass1, mass, scanid, precursor_chrg1, spectrum['params']['charge'][0], self.ids_to_be_referenced[scanid]))
                            print(diff)
                            print(diff2)
                            print("----------------")
                        else:
                            self.references.add(Reference(ppm=self.ppm,
                                                          id_2=scanid,
                                                          id_1=self.ids_to_be_referenced[scanid],  # also scanid
                                                          peak_list_2=spectrum['m/z array'],
                                                          peak_list_1=data[self.ids_to_be_referenced[scanid]]['m/z array'],
                                                          mass_2=mass,
                                                          mass_1=mass1,
                                                          charge=spectrum['params']['charge'][0],
                                                          extra_mass=alm[pos],
                                                          int_list_2=spectrum['intensity array'],
                                                          int_list_1=data[self.ids_to_be_referenced[scanid]]['intensity array'],
                                                          params2=spectrum['params'],
                                                          params1=data[self.ids_to_be_referenced[scanid]]['params']))
                        del(data[self.ids_to_be_referenced[scanid]])
                        del(self.ids_to_be_referenced[scanid])
                else:
                    data[scanid] = spectrum
Example #4
0
    def read_enhanced_spectrum(self, path):
        """
        saving a masterSpectrum for every spectrum is not a memory efficient idea
        4 GB for 8247 spectra
        instead creating on request (saving reference and spectra object)

        """
        with mgf.read(path) as spectra:
            for spectrum in spectra:
                # charge_of_spectrum = str(spectrum['params']['charge'][0])
                scan_id = parse_scan_id(spectrum['params']['title'])
                self.mgf_reads[scan_id] = Reference(scan_id, spectrum['m/z array'])
    def select_mgf(self):
        spectra_out = []
        with mgf.read(self.path_mgf_in) as spectra:
            for spectrum in spectra:

                scanid = int(parse_scan_id(spectrum['params']['title']))
                if scanid in self.list_chosen:
                    spectra_out.append({'m/z array': spectrum['m/z array'],
                                        'intensity array': spectrum['intensity array'],
                                        'params': spectrum['params']})

        mgf.write(spectra=spectra_out, output=self.path_mgf_out)
Example #6
0
 def load_export(self):
     with mgf.read(self.mgf_path) as spectra, open(self.output_csv,
                                                   "wt") as csvfile:
         writr = csv.writer(csvfile, lineterminator=os.linesep)
         writr.writerow(("scanid", "peak", "rel_int"))
         for spectrum in spectra:
             scan_id = parse_scan_id(spectrum['params']['title'])
             st = "scanid:\t{0}\n"
             print(st.format(scan_id))
             rel_int = calculateRelativeIntensity(
                 spectrum['intensity array'])
             for m, i in zip(spectrum['m/z array'], rel_int):
                 writr.writerow((scan_id, m, i))
Example #7
0
 def test_splitting(self):
     test = "\"controllerType=0 controllerNumber=1 scan=1316\""
     assert_equal(parse_scan_id(test), '1316')
Example #8
0
    def load_msconvert_mgf(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        scan_id_ary = []
        problems = []
        error = 0
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                if len(scan_id_ary) == 0:
                    scan_id_ary.append(scanid)
                else:
                    if scanid != scan_id_ary[-1] + 1:
                        if len(scan_id_ary) % 2 == 1:
                            problems.append(scan_id_ary[0])
                            error += 1
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        else:
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        self.ms = MasterSpectrum(
                        )  # new MS if scan_id group (seperated by ms1) is completed
                    else:
                        scan_id_ary.append(scanid)

                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()
                        ):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass,
                                             scanid,
                                             fc,
                                             meta={
                                                 'ms': spectrum['m/z array'],
                                                 'int':
                                                 spectrum['intensity array'],
                                                 'params': spectrum['params']
                                             }),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(
                                    n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key(
                                    ) in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(
                                            peak, 0,
                                            len(self.ms.spectrum[
                                                precursor_chrg][peak.key()]) -
                                            1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(
                                                Reference(
                                                    ppm=self.ppm,
                                                    id_2=scanid,
                                                    id_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].
                                                    intensity,  # also scanid
                                                    peak_list_2=spectrum[
                                                        'm/z array'],
                                                    peak_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['ms'],
                                                    mass_2=mass,
                                                    mass_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].mz,
                                                    charge=spectrum['params']
                                                    ['charge'][0],
                                                    extra_mass=extra_mass,
                                                    int_list_2=spectrum[
                                                        'intensity array'],
                                                    int_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['int'],
                                                    params2=spectrum['params'],
                                                    params1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['params']))
                                            found = True
                                            del (self.ms.
                                                 spectrum[precursor_chrg][
                                                     peak.key()][idx])
                                            if len(self.ms.
                                                   spectrum[precursor_chrg][
                                                       peak.key()]) == 0:
                                                del (self.ms.
                                                     spectrum[precursor_chrg][
                                                         peak.key()])
                                                if len(self.ms.spectrum[
                                                        precursor_chrg]) == 0:
                                                    del (self.ms.spectrum[
                                                        precursor_chrg])

                if found is False:
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))
    def load_distiller_mgf2(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        error = 0
        self.ms = MasterSpectrum()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key() in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(Reference(ppm=self.ppm,
                                                                          id_2=scanid,
                                                                          id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity,  # also scanid
                                                                          peak_list_2=spectrum['m/z array'],
                                                                          peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'],
                                                                          mass_2=mass,
                                                                          mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz,
                                                                          charge=spectrum['params']['charge'][0],
                                                                          extra_mass=extra_mass,
                                                                          int_list_2=spectrum['intensity array'],
                                                                          int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'],
                                                                          params2=spectrum['params'],
                                                                          params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params']))
                                            found = True
                                            del(self.ms.spectrum[precursor_chrg][peak.key()][idx])
                                            if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0:
                                                del(self.ms.spectrum[precursor_chrg][peak.key()])
                                                if len(self.ms.spectrum[precursor_chrg]) == 0:
                                                    del(self.ms.spectrum[precursor_chrg])

                if found is False:
                    limit_scan_id = scanid - 20  # could start at -19
                    ms_bac = MasterSpectrum()
                    for chrg in self.ms.spectrum:
                        for key in self.ms.spectrum[chrg].keys():
                            for mp in self.ms.spectrum[chrg][key]:
                                if mp.intensity >= limit_scan_id:
                                    ms_bac.add(mp, charge=chrg)
                    self.ms = ms_bac
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))