Example #1
0
 def patchDelta(self, input_path, output_path):
     '''
     '''
     with open(output_path, "wt") as csvfile:
         writr = csv.writer(csvfile, lineterminator=os.linesep)
         with open(input_path, 'r') as r_csvfile:
             readr = csv.reader(r_csvfile)
             header = True
             for row in readr:
                 if header:
                     header = False
                     writr.writerow(row)
                 else:
                     peak = Peak(float(row[0]), 0, self.delta_function)
                     if peak.key() in self.exclusionSpectrum.spectrum[0]:
                         idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary(
                             peak, 0,
                             len(self.exclusionSpectrum.spectrum[0][
                                 peak.key()]) - 1, 0)
                         if idx != -1:  # found
                             print("found it")
                         else:  # not found
                             writr.writerow(row)
                     else:
                         writr.writerow(row)
Example #2
0
    def load_recalibrate(self):
        fc = calculate_Delta_by_ppm(self.ppm)
        tmt_mass = calculate_tag_tmt10()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                ms = MasterSpectrum()
                params = spectrum['params']
                for mass, intensity in zip(spectrum['m/z array'],
                                           spectrum['intensity array']):
                    ms.add(Peak(mass, intensity, fc))

                peak = Peak(tmt_mass, 0.5, fc)
                if peak.key() not in ms.spectrum[0]:
                    recalibrate = False
                else:
                    idx, bin_to_ack, a, b = ms.binary(
                        peak, 0,
                        len(ms.spectrum[0][peak.key()]) - 1, 0)
                    if idx == -1:
                        recalibrate = False
                    else:
                        recalibrate = True
                        recalibration_mass = ms.spectrum[0][peak.key()][idx].mz
                        diff = tmt_mass - recalibration_mass
                        print(params['title'])
                        print("original={0}\tdiff={1}".format(
                            recalibration_mass, diff))

                mass_list = []
                int_list = []
                if recalibrate:
                    ppm_shift = calculate_ppm_shift(diff, tmt_mass)

                for key in ms.spectrum[0].keys():
                    for mp in ms.spectrum[0][key]:
                        if recalibrate:
                            if self.type == 'ppm':
                                diff = calculate_da_shift(mp.mz, ppm_shift)
                                mass_list.append(mp.mz + diff)
                            elif self.type == 'absolute':
                                diff = diff
                                mass_list.append(mp.mz + diff)
                            else:
                                print(self.type)
                                raise ValueError("what did you dooooo")
                        else:
                            mass_list.append(mp.mz)
                        int_list.append(mp.intensity)
                print("len is:\t{0}".format(len(mass_list)))
                mgf.write(spectra=[{
                    'm/z array': mass_list,
                    'intensity array': int_list,
                    'params': params
                }],
                          output=self.file_out)
Example #3
0
    def patchMgf(self, input_path, output_path):
        '''
        '''

        with mgf.read(input_path) as spectra:
            spectra_out = []
            for spectrum in spectra:
                int_dic = spectrum['intensity array']
                mz_dic = spectrum['m/z array']
                param_dic = spectrum['params']

                chrg_spec = spectrum['params']['charge'][0]
                precursor = calculatePrecursor(
                    mz=spectrum['params']['pepmass'][0], charge=chrg_spec)
                pos = 0
                del_array = []
                for m in mz_dic:
                    peak = Peak(m, 0, self.delta_function)
                    if peak.key() in self.exclusionSpectrum.spectrum[0]:
                        idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary(
                            peak, 0,
                            len(self.exclusionSpectrum.spectrum[0][peak.key()])
                            - 1, 0)
                        if idx != -1:  # found
                            del_array.append(pos)
                    else:
                        mp = MasterPeak(peak)
                        for precursorDelta in self.precursorDeltas:
                            if mp.isInsideMz(precursor - precursorDelta):
                                del_array.append(pos)
                            else:
                                pass
                    pos += 1

                int_dic = np.delete(int_dic, del_array, 0)
                mz_dic = np.delete(mz_dic, del_array, 0)

                spectra_out.append({
                    'm/z array': mz_dic,
                    'intensity array': int_dic,
                    'params': param_dic
                })

        mgf.write(spectra=spectra_out, output=output_path)
Example #4
0
    def analyse_mzid_vs_mgf(self):
        delta_func = calculate_Delta_by_ppm(20)
        if self.mgf_reads == {}:
            raise ValueError("need the mgf beforehand read_enhanced_spectrum")
        else:
            for ids in self.identifications:
                mzs = ids.report_all_mzs()
                ms = self.mgf_reads[ids.scan_id].request_ms()

                for mz in mzs:

                    peak = Peak(mz, 2, delta_func)
                    if peak.key() in ms.spectrum[0]:
                        idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0)
                        if idx == -1:
                            error = "mz:\t{0}\nscan_id:\t{1}".format(mz, ids.scan_id)
                            raise ValueError(error)
                        else:
                            pass
                    else:
                        error = "mz:\t{0}\nscan_id:\t{1}".format(mz, ids.scan_id)
                        raise ValueError(error)
Example #5
0
    def analyze_mzid_id_vs_score_file(self, path_score, output_path="/home/tobiass/Desktop/out.csv"):
        """
        Prerequirenment:
        loading mzid(spectrum and peptide info)

        input:
        path to score file (created by control.py improve_csv)

        Description:
        - finding all important scan ids
        - loading all scans as spectra from score file
            - saving delta as meta information for every peak
        -
        cheching all identifiacation (--> peptide_ref)
            -   for peptide ref:
                    read series (b/y, index and charge, mz)
                    generate tag amount for every position
                    tag series of b/y with tmt diff

                    for series:
                        check in scans for delta

        Output:
        none
        """
        with open(output_path, "wt") as csvfile:
            writr = csv.writer(csvfile, lineterminator=os.linesep)
            writr.writerow(("scanid", "rank", "peak", "position", "frag", "expected by mascot", "found", "charge", "PeaksMatchedInSerie", "max_number_of_tmt_tag"))
            valid_scan_ids = {}
            delta_func = calculate_Delta_by_ppm(20)

            for identification_hashobject in self.identifications:
                valid_scan_ids[identification_hashobject[1].scan_id] = True

            valid_scan_ids = valid_scan_ids.keys()  # quick and dirty - smarter with some kind of unique tree
            valid_scan_ids = [int(i) for i in valid_scan_ids]
            valid_spectra_info = self.read_score_file(path_score, valid_scan_ids)

            self.parse_mz_id_peptide_ref()

            for identification_hashobject in self.identifications:
                for i in identification_hashobject:
                    peptide_ref = identification_hashobject[i].peptide_ref
                    scan_id = int(identification_hashobject[i].scan_id)

                    b_tmt, y_tmt = self.peptide_evidence[peptide_ref].get_annotated_positions()
                    max_tmt_tag = calculate_max_tmt(b_tmt)  # same num for b and y
                    ms, dPeaks_matched = generateMS_by_score_file(valid_spectra_info[scan_id])
                    for ion_serie in identification_hashobject[i].ion_series_ary:
                        tmt_masses = calculate_allowed_Mass_Diff(n=max_tmt_tag, z=ion_serie.charge)
                        if 'b' in ion_serie.fragtype:
                            tmt_pos_ary = b_tmt
                        elif 'y' in ion_serie.fragtype:
                            tmt_pos_ary = y_tmt
                        else:
                            raise ValueError("{0}\tis not valid fragtype".format(ion_serie.fragtype))

                        for pos, mz in zip(ion_serie.ions_index, ion_serie.mz_ary):
                            num_tag_at_pos = tmt_pos_ary[pos]
                            if num_tag_at_pos == 0:
                                expected_mass_delta = 0
                            else:
                                expected_mass_delta = tmt_masses[num_tag_at_pos][ion_serie.charge]
                            peak = Peak(mz, 1, delta_func)
                            if peak.key() in ms.spectrum[0]:
                                idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(peak, 0, len(ms.spectrum[0][peak.key()]) - 1, 0)
                                if idx == -1:
                                    # just heavy spectra are just all peaks of the heavy labeled partner of a mix (there is no garantee that both
                                    # have the same number of peaks
                                    # BUT the number of spectra is the same
                                    msg = "Peak:\t{0}\n"
                                    msg += "frag:\t{4}\n"
                                    msg += "position:\t{5}\n"
                                    msg += "scanid:\t{1}\n"
                                    msg += "expected:\t{2}\n"
                                    msg += "found:\t{3}\n"
                                    msg += "peak was just part of heavy spectra(but same number before \".\"\n"
                                    # print(msg.format(mz, scan_id, expected_mass_delta, -2,
                                    #                 ion_serie.fragtype, pos))
                                    writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, -2, ion_serie.charge, dPeaks_matched, max_tmt_tag))
                                    # raise ValueError("identified Peak:\t{0}\nscanid:\t{1}\ncouldnt be found in spectra".format(mz, scan_id, expected_mass_delta, -1,
                                    #                                                                                           ion_serie.fragtype, pos))
                                else:
                                    found_mass_diff = ms.spectrum[0][peak.key()][idx].meta
                                    msg = "Peak:\t{0}\n"
                                    msg += "frag:\t{4}\n"
                                    msg += "position:\t{5}\n"
                                    msg += "scanid:\t{1}\n"
                                    msg += "expected:\t{2}\n"
                                    msg += "found:\t{3}\n"
                                    if found_mass_diff != expected_mass_delta:
                                        writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, found_mass_diff, ion_serie.charge, dPeaks_matched, max_tmt_tag))
                                        # raise ValueError(msg.format(mz, scan_id, expected_mass_delta, found_mass_diff, ion_serie.fragtype, pos))
                                    else:
                                        # here is my goal
                                        writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, found_mass_diff, ion_serie.charge, dPeaks_matched, max_tmt_tag))
                            else:
                                # just heavy spectra are just all peaks of the heavy labeled partner of a mix (there is no garantee that both
                                # have the same number of peaks
                                # BUT the number of spectra is the same
                                msg = "Peak:\t{0}\n"
                                msg += "frag:\t{4}\n"
                                msg += "position:\t{5}\n"
                                msg += "scanid:\t{1}\n"
                                msg += "expected:\t{2}\n"
                                msg += "found:\t{3}\n"
                                msg += "peak was just part of heavy spectra\n"
                                # print(msg.format(mz, scan_id, expected_mass_delta, -1,
                                #                 ion_serie.fragtype, pos))
                                writr.writerow((scan_id, i, mz, pos, ion_serie.fragtype, expected_mass_delta, -1, ion_serie.charge, dPeaks_matched, max_tmt_tag))
Example #6
0
 def test_key(self):
     p = Peak(100, 0.5, calculate_Delta_based_MZ)
     assert_not_equal(p.key(), 99)
     assert_equal(p.key(), 100)
Example #7
0
    def create_ms(self, iMin_similarity):
        """
        int list 1 and mz list 1 are the ones with a higher precursor mass
        int/mz list1 will create a master spectrum

        for every peak in list2
        check all allowed mass diffs (depending on charge and number of tags)
        -> find peak + delta
        --> for all save a similarity value (calculating ratio of int1/int2) depending which is bigger

        -> sort for highest similarity score
        -> check if masterspectrum peak already has an refering peak2
        --> if new similarity is higher: replace

        a peak must have at least int_similarity = 0.5

        """
        req_min_similarity = iMin_similarity
        ms = MasterSpectrum()
        rel_int = calculateRelativeIntensity(self.int_list_1)
        for mass, rel_int, i in zip(self.peak_list_1, rel_int,
                                    self.int_list_1):
            ms.add(
                Peak(mass,
                     i,
                     self.fc,
                     meta={
                         'delta': 511,
                         'mass': -1,
                         'decharged': False,
                         'similarity': -1,
                         'originated_mz': mass,
                         'rel_int': rel_int
                     }))

        num = round((abs(self.extra_mass) /
                     calculate_Mass_Diff()))  # num of tags possible
        self.params["#numtags"] = "{0}".format(num)
        deltas = mass_diff_decharging_stuff(n=int(num), z=self.charge)
        dd = deltas.keys()
        dd = list(dd)
        dd.sort()
        rel_int = calculateRelativeIntensity(self.int_list_2)
        for mass, rel_int, i in zip(self.peak_list_2, rel_int,
                                    self.int_list_2):
            similarity_most_similar = -1
            idx_most_similar = -1
            peak_key_most_similar = -1
            delta_most_similar = -1
            mass_most_similar = -1

            for delta in dd:
                peak = Peak(mass + delta, i, self.fc)
                if peak.key() in ms.spectrum[0]:
                    idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(
                        peak, 0,
                        len(ms.spectrum[0][peak.key()]) - 1, 0)
                    if idx != -1:  # found
                        rel_int1 = ms.spectrum[0][
                            peak.key()][idx].meta['rel_int']
                        ratio = rel_int1 / rel_int
                        if ratio > 1:  # i want ratio to be between 0 - 1
                            ratio = 1 / ratio

                        if ratio > similarity_most_similar:
                            similarity_most_similar = ratio
                            idx_most_similar = idx
                            peak_key_most_similar = peak.key()
                            mass_most_similar = mass
                            delta_most_similar = delta

            if similarity_most_similar > req_min_similarity:
                if similarity_most_similar > ms.spectrum[0][
                        peak_key_most_similar][idx_most_similar].meta[
                            'similarity']:
                    ms.spectrum[0][peak_key_most_similar][
                        idx_most_similar].meta = {
                            'delta':
                            delta_most_similar,
                            'mass':
                            mass_most_similar,
                            'decharged':
                            deltas[delta_most_similar]['decharge']['state'],
                            'similarity':
                            similarity_most_similar,
                            'originated_mz':
                            ms.spectrum[0][peak_key_most_similar]
                            [idx_most_similar].mz,
                            'rel_int':
                            ratio
                        }

                    if deltas[delta_most_similar]['decharge']['state']:
                        ms.add(
                            Peak(calculatePrecursor(
                                ms.spectrum[0][peak_key_most_similar]
                                [idx_most_similar].mz,
                                deltas[delta_most_similar]['decharge']['z']),
                                 ms.spectrum[0][peak_key_most_similar]
                                 [idx_most_similar].intensity,
                                 self.fc,
                                 meta=ms.spectrum[0][peak_key_most_similar]
                                 [idx_most_similar].meta))
                        del (ms.spectrum[0][peak_key_most_similar]
                             [idx_most_similar])
                        if len(ms.spectrum[0][peak_key_most_similar]) == 0:
                            del (ms.spectrum[0][peak_key_most_similar])

        return ms
Example #8
0
    def load_msconvert_mgf(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        scan_id_ary = []
        problems = []
        error = 0
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                if len(scan_id_ary) == 0:
                    scan_id_ary.append(scanid)
                else:
                    if scanid != scan_id_ary[-1] + 1:
                        if len(scan_id_ary) % 2 == 1:
                            problems.append(scan_id_ary[0])
                            error += 1
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        else:
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        self.ms = MasterSpectrum(
                        )  # new MS if scan_id group (seperated by ms1) is completed
                    else:
                        scan_id_ary.append(scanid)

                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()
                        ):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass,
                                             scanid,
                                             fc,
                                             meta={
                                                 'ms': spectrum['m/z array'],
                                                 'int':
                                                 spectrum['intensity array'],
                                                 'params': spectrum['params']
                                             }),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(
                                    n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key(
                                    ) in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(
                                            peak, 0,
                                            len(self.ms.spectrum[
                                                precursor_chrg][peak.key()]) -
                                            1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(
                                                Reference(
                                                    ppm=self.ppm,
                                                    id_2=scanid,
                                                    id_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].
                                                    intensity,  # also scanid
                                                    peak_list_2=spectrum[
                                                        'm/z array'],
                                                    peak_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['ms'],
                                                    mass_2=mass,
                                                    mass_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].mz,
                                                    charge=spectrum['params']
                                                    ['charge'][0],
                                                    extra_mass=extra_mass,
                                                    int_list_2=spectrum[
                                                        'intensity array'],
                                                    int_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['int'],
                                                    params2=spectrum['params'],
                                                    params1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['params']))
                                            found = True
                                            del (self.ms.
                                                 spectrum[precursor_chrg][
                                                     peak.key()][idx])
                                            if len(self.ms.
                                                   spectrum[precursor_chrg][
                                                       peak.key()]) == 0:
                                                del (self.ms.
                                                     spectrum[precursor_chrg][
                                                         peak.key()])
                                                if len(self.ms.spectrum[
                                                        precursor_chrg]) == 0:
                                                    del (self.ms.spectrum[
                                                        precursor_chrg])

                if found is False:
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))
    def load_distiller_mgf2(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        error = 0
        self.ms = MasterSpectrum()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key() in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(Reference(ppm=self.ppm,
                                                                          id_2=scanid,
                                                                          id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity,  # also scanid
                                                                          peak_list_2=spectrum['m/z array'],
                                                                          peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'],
                                                                          mass_2=mass,
                                                                          mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz,
                                                                          charge=spectrum['params']['charge'][0],
                                                                          extra_mass=extra_mass,
                                                                          int_list_2=spectrum['intensity array'],
                                                                          int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'],
                                                                          params2=spectrum['params'],
                                                                          params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params']))
                                            found = True
                                            del(self.ms.spectrum[precursor_chrg][peak.key()][idx])
                                            if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0:
                                                del(self.ms.spectrum[precursor_chrg][peak.key()])
                                                if len(self.ms.spectrum[precursor_chrg]) == 0:
                                                    del(self.ms.spectrum[precursor_chrg])

                if found is False:
                    limit_scan_id = scanid - 20  # could start at -19
                    ms_bac = MasterSpectrum()
                    for chrg in self.ms.spectrum:
                        for key in self.ms.spectrum[chrg].keys():
                            for mp in self.ms.spectrum[chrg][key]:
                                if mp.intensity >= limit_scan_id:
                                    ms_bac.add(mp, charge=chrg)
                    self.ms = ms_bac
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))