Beispiel #1
0
class DeltaExtractor(object):
    def __init__(self, path):
        self.path = path
        self.masterSpectrum = MasterSpectrum()

    def createDeltaMasterSpectrum(self,
                                  min_rel_intensity,
                                  delta_func=calculate_Delta_by_ppm(20)):
        up = 0
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                print(up)
                up += 1
                int_dic = spectrum['intensity array']
                mz_dic = spectrum['m/z array']

                rel_int = calculateRelativeIntensity(int_dic)
                smallerArea = [(i, j) for i, j in zip(mz_dic, rel_int)
                               if j >= min_rel_intensity]
                mz_dic = [i for i, j in smallerArea]
                rel_int = [j for i, j in smallerArea]
                for i in range(len(mz_dic) - 1, -1, -1):
                    for j in range(i - 1, -1, -1):
                        diff = mz_dic[i] - mz_dic[j]
                        p = Peak(diff, rel_int[j],
                                 delta_func)  # intensities are from lower peak
                        self.masterSpectrum.add(p, 0)

    def exportCsv(self, output_path):
        self.masterSpectrum.export_to_csv(output_path)
Beispiel #2
0
    def request_ms(self):
        ms = MasterSpectrum()

        delta_func = calculate_Delta_by_ppm(20)
        for m in self.mz_ary:
            p = Peak(float(m), 1, delta_func)
            ms.add(p, 0)

        return ms
    def load_recalibrate(self):
        fc = calculate_Delta_by_ppm(self.ppm)
        tmt_mass = calculate_tag_tmt10()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                ms = MasterSpectrum()
                params = spectrum['params']
                for mass, intensity in zip(spectrum['m/z array'],
                                           spectrum['intensity array']):
                    ms.add(Peak(mass, intensity, fc))

                peak = Peak(tmt_mass, 0.5, fc)
                if peak.key() not in ms.spectrum[0]:
                    recalibrate = False
                else:
                    idx, bin_to_ack, a, b = ms.binary(
                        peak, 0,
                        len(ms.spectrum[0][peak.key()]) - 1, 0)
                    if idx == -1:
                        recalibrate = False
                    else:
                        recalibrate = True
                        recalibration_mass = ms.spectrum[0][peak.key()][idx].mz
                        diff = tmt_mass - recalibration_mass
                        print(params['title'])
                        print("original={0}\tdiff={1}".format(
                            recalibration_mass, diff))

                mass_list = []
                int_list = []
                if recalibrate:
                    ppm_shift = calculate_ppm_shift(diff, tmt_mass)

                for key in ms.spectrum[0].keys():
                    for mp in ms.spectrum[0][key]:
                        if recalibrate:
                            if self.type == 'ppm':
                                diff = calculate_da_shift(mp.mz, ppm_shift)
                                mass_list.append(mp.mz + diff)
                            elif self.type == 'absolute':
                                diff = diff
                                mass_list.append(mp.mz + diff)
                            else:
                                print(self.type)
                                raise ValueError("what did you dooooo")
                        else:
                            mass_list.append(mp.mz)
                        int_list.append(mp.intensity)
                print("len is:\t{0}".format(len(mass_list)))
                mgf.write(spectra=[{
                    'm/z array': mass_list,
                    'intensity array': int_list,
                    'params': params
                }],
                          output=self.file_out)
Beispiel #4
0
class DeltaPatcher(object):
    def __init__(self, delta_func=calculate_Delta_by_ppm(20)):
        self.exclusionSpectrum = MasterSpectrum()
        self.delta_function = delta_func
        self.precursorDeltas = []

    def readExclusionList(self, path):
        '''
        exclusionList: 3 columns
        m/z , comments
        '''
        with open(path, 'r') as csvfile:
            readr = csv.reader(csvfile)
            header = True
            for row in readr:
                if header:
                    header = False
                else:
                    if row[2] == 'absolute':
                        p = Peak(float(row[0]), 1.0, self.delta_function)
                        mp = MasterPeak(p)
                        # 0 for no differentiation of charge states
                        self.exclusionSpectrum.add(mp, 0)
                    elif row[2] == 'precursor':
                        self.precursorDeltas.append(float(row[0]))
                    else:
                        raise ValueError('A very specific bad thing happened')

    def patchDelta(self, input_path, output_path):
        '''
        '''
        with open(output_path, "wt") as csvfile:
            writr = csv.writer(csvfile, lineterminator=os.linesep)
            with open(input_path, 'r') as r_csvfile:
                readr = csv.reader(r_csvfile)
                header = True
                for row in readr:
                    if header:
                        header = False
                        writr.writerow(row)
                    else:
                        peak = Peak(float(row[0]), 0, self.delta_function)
                        if peak.key() in self.exclusionSpectrum.spectrum[0]:
                            idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary(
                                peak, 0,
                                len(self.exclusionSpectrum.spectrum[0][
                                    peak.key()]) - 1, 0)
                            if idx != -1:  # found
                                print("found it")
                            else:  # not found
                                writr.writerow(row)
                        else:
                            writr.writerow(row)
Beispiel #5
0
 def just_create_heavy_ms(self):
     ms = MasterSpectrum()
     for mass, i in zip(self.peak_list_1, self.int_list_1):
         ms.add(
             Peak(mass,
                  i,
                  self.fc,
                  meta={
                      'delta': 511,
                      'mass': -1,
                      'decharged': False
                  }))
     return ms
Beispiel #6
0
def generateMS_by_score_file(score_info_object):
    """
    every object has an mz array
    every object has an diff array
    """
    ms = MasterSpectrum()

    delta_func = calculate_Delta_by_ppm(20)
    dPeaks_matched = 0
    for m, diff in zip(score_info_object['mz'], score_info_object['diff']):
        p = Peak(float(m), 1, delta_func, meta=diff)
        ms.add(p, 0)
        dPeaks_matched += 1

    return ms, dPeaks_matched
class PrecursorDeltaPeaksExtractor(object):
    def __init__(self, path):
        self.path = path
        self.masterSpectrum = MasterSpectrum()

    def createDeltaPrecursorMasterSpectrum(
        self, delta_func=calculate_Delta_by_ppm(20)):
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                int_dic = spectrum['intensity array']
                mz_dic = spectrum['m/z array']
                chrg_spec = spectrum['params']['charge'][0]
                precursor = calculatePrecursor(
                    mz=spectrum['params']['pepmass'][0], charge=chrg_spec)

                rel_int = calculateRelativeIntensity(int_dic)
                for m, i in zip(mz_dic, rel_int):
                    p = Peak(precursor - float(m), float(i), delta_func)
                    self.masterSpectrum.add(p, 0)

    def exportCsv(self, output_path):
        self.masterSpectrum.export_to_csv(output_path)
Beispiel #8
0
class MgfPatcher(object):
    def __init__(self, delta_func=calculate_Delta_by_ppm(20)):
        self.exclusionSpectrum = MasterSpectrum()
        self.delta_function = delta_func
        self.precursorDeltas = []

    def readExclusionList(self, path):
        '''
        exclusionList: 2 columns
        m/z , comments
        '''

        with open(path, 'r') as csvfile:
            readr = csv.reader(csvfile)
            header = True
            for row in readr:
                if header:
                    header = False
                else:
                    if row[2] == 'absolute':
                        p = Peak(float(row[0]), 1.0, self.delta_function)
                        mp = MasterPeak(p)
                        # 0 for no differentiation of charge states
                        self.exclusionSpectrum.add(mp, 0)
                    elif row[2] == 'precursor':
                        self.precursorDeltas.append(float(row[0]))
                    else:
                        raise ValueError('A very specific bad thing happened')

    def patchMgf(self, input_path, output_path):
        '''
        '''

        with mgf.read(input_path) as spectra:
            spectra_out = []
            for spectrum in spectra:
                int_dic = spectrum['intensity array']
                mz_dic = spectrum['m/z array']
                param_dic = spectrum['params']

                chrg_spec = spectrum['params']['charge'][0]
                precursor = calculatePrecursor(
                    mz=spectrum['params']['pepmass'][0], charge=chrg_spec)
                pos = 0
                del_array = []
                for m in mz_dic:
                    peak = Peak(m, 0, self.delta_function)
                    if peak.key() in self.exclusionSpectrum.spectrum[0]:
                        idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = self.exclusionSpectrum.binary(
                            peak, 0,
                            len(self.exclusionSpectrum.spectrum[0][peak.key()])
                            - 1, 0)
                        if idx != -1:  # found
                            del_array.append(pos)
                    else:
                        mp = MasterPeak(peak)
                        for precursorDelta in self.precursorDeltas:
                            if mp.isInsideMz(precursor - precursorDelta):
                                del_array.append(pos)
                            else:
                                pass
                    pos += 1

                int_dic = np.delete(int_dic, del_array, 0)
                mz_dic = np.delete(mz_dic, del_array, 0)

                spectra_out.append({
                    'm/z array': mz_dic,
                    'intensity array': int_dic,
                    'params': param_dic
                })

        mgf.write(spectra=spectra_out, output=output_path)
Beispiel #9
0
    def create_ms(self, iMin_similarity):
        """
        int list 1 and mz list 1 are the ones with a higher precursor mass
        int/mz list1 will create a master spectrum

        for every peak in list2
        check all allowed mass diffs (depending on charge and number of tags)
        -> find peak + delta
        --> for all save a similarity value (calculating ratio of int1/int2) depending which is bigger

        -> sort for highest similarity score
        -> check if masterspectrum peak already has an refering peak2
        --> if new similarity is higher: replace

        a peak must have at least int_similarity = 0.5

        """
        req_min_similarity = iMin_similarity
        ms = MasterSpectrum()
        rel_int = calculateRelativeIntensity(self.int_list_1)
        for mass, rel_int, i in zip(self.peak_list_1, rel_int,
                                    self.int_list_1):
            ms.add(
                Peak(mass,
                     i,
                     self.fc,
                     meta={
                         'delta': 511,
                         'mass': -1,
                         'decharged': False,
                         'similarity': -1,
                         'originated_mz': mass,
                         'rel_int': rel_int
                     }))

        num = round((abs(self.extra_mass) /
                     calculate_Mass_Diff()))  # num of tags possible
        self.params["#numtags"] = "{0}".format(num)
        deltas = mass_diff_decharging_stuff(n=int(num), z=self.charge)
        dd = deltas.keys()
        dd = list(dd)
        dd.sort()
        rel_int = calculateRelativeIntensity(self.int_list_2)
        for mass, rel_int, i in zip(self.peak_list_2, rel_int,
                                    self.int_list_2):
            similarity_most_similar = -1
            idx_most_similar = -1
            peak_key_most_similar = -1
            delta_most_similar = -1
            mass_most_similar = -1

            for delta in dd:
                peak = Peak(mass + delta, i, self.fc)
                if peak.key() in ms.spectrum[0]:
                    idx, bin_to_ack, should_merge_left_peak, should_merge_right_peak = ms.binary(
                        peak, 0,
                        len(ms.spectrum[0][peak.key()]) - 1, 0)
                    if idx != -1:  # found
                        rel_int1 = ms.spectrum[0][
                            peak.key()][idx].meta['rel_int']
                        ratio = rel_int1 / rel_int
                        if ratio > 1:  # i want ratio to be between 0 - 1
                            ratio = 1 / ratio

                        if ratio > similarity_most_similar:
                            similarity_most_similar = ratio
                            idx_most_similar = idx
                            peak_key_most_similar = peak.key()
                            mass_most_similar = mass
                            delta_most_similar = delta

            if similarity_most_similar > req_min_similarity:
                if similarity_most_similar > ms.spectrum[0][
                        peak_key_most_similar][idx_most_similar].meta[
                            'similarity']:
                    ms.spectrum[0][peak_key_most_similar][
                        idx_most_similar].meta = {
                            'delta':
                            delta_most_similar,
                            'mass':
                            mass_most_similar,
                            'decharged':
                            deltas[delta_most_similar]['decharge']['state'],
                            'similarity':
                            similarity_most_similar,
                            'originated_mz':
                            ms.spectrum[0][peak_key_most_similar]
                            [idx_most_similar].mz,
                            'rel_int':
                            ratio
                        }

                    if deltas[delta_most_similar]['decharge']['state']:
                        ms.add(
                            Peak(calculatePrecursor(
                                ms.spectrum[0][peak_key_most_similar]
                                [idx_most_similar].mz,
                                deltas[delta_most_similar]['decharge']['z']),
                                 ms.spectrum[0][peak_key_most_similar]
                                 [idx_most_similar].intensity,
                                 self.fc,
                                 meta=ms.spectrum[0][peak_key_most_similar]
                                 [idx_most_similar].meta))
                        del (ms.spectrum[0][peak_key_most_similar]
                             [idx_most_similar])
                        if len(ms.spectrum[0][peak_key_most_similar]) == 0:
                            del (ms.spectrum[0][peak_key_most_similar])

        return ms
Beispiel #10
0
class MgfAnnotater(object):
    def __init__(self, path, output_path, min_rel_similarity, ppm=10):
        self.ppm = ppm
        self.ms = MasterSpectrum()
        self.path = path
        self.out = []
        self.output_path = output_path
        self.references = sortedlist(key=lambda i: i.id_1)
        self.min_rel_similarity = min_rel_similarity

    def load_msconvert_mgf(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        scan_id_ary = []
        problems = []
        error = 0
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                if len(scan_id_ary) == 0:
                    scan_id_ary.append(scanid)
                else:
                    if scanid != scan_id_ary[-1] + 1:
                        if len(scan_id_ary) % 2 == 1:
                            problems.append(scan_id_ary[0])
                            error += 1
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        else:
                            scan_id_ary = []
                            scan_id_ary.append(scanid)
                        self.ms = MasterSpectrum(
                        )  # new MS if scan_id group (seperated by ms1) is completed
                    else:
                        scan_id_ary.append(scanid)

                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()
                        ):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass,
                                             scanid,
                                             fc,
                                             meta={
                                                 'ms': spectrum['m/z array'],
                                                 'int':
                                                 spectrum['intensity array'],
                                                 'params': spectrum['params']
                                             }),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(
                                    n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key(
                                    ) in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(
                                            peak, 0,
                                            len(self.ms.spectrum[
                                                precursor_chrg][peak.key()]) -
                                            1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(
                                                Reference(
                                                    ppm=self.ppm,
                                                    id_2=scanid,
                                                    id_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].
                                                    intensity,  # also scanid
                                                    peak_list_2=spectrum[
                                                        'm/z array'],
                                                    peak_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['ms'],
                                                    mass_2=mass,
                                                    mass_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()][idx].mz,
                                                    charge=spectrum['params']
                                                    ['charge'][0],
                                                    extra_mass=extra_mass,
                                                    int_list_2=spectrum[
                                                        'intensity array'],
                                                    int_list_1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['int'],
                                                    params2=spectrum['params'],
                                                    params1=self.ms.
                                                    spectrum[precursor_chrg][
                                                        peak.key()]
                                                    [idx].meta['params']))
                                            found = True
                                            del (self.ms.
                                                 spectrum[precursor_chrg][
                                                     peak.key()][idx])
                                            if len(self.ms.
                                                   spectrum[precursor_chrg][
                                                       peak.key()]) == 0:
                                                del (self.ms.
                                                     spectrum[precursor_chrg][
                                                         peak.key()])
                                                if len(self.ms.spectrum[
                                                        precursor_chrg]) == 0:
                                                    del (self.ms.spectrum[
                                                        precursor_chrg])

                if found is False:
                    self.ms.add(Peak(mass,
                                     scanid,
                                     fc,
                                     meta={
                                         'ms': spectrum['m/z array'],
                                         'int': spectrum['intensity array'],
                                         'params': spectrum['params']
                                     }),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))
                # raise ValueError("taddaaa")

    def export_annotated_spectra_to_csv(self):
        """
        id1
        id2
        mz
        intensityy
        annotation:     delta mass
        mz2:
        decharged:      True/False
        similarity:     intensity similaritz of mz1 vs mz2
        originated mz:  if decharged the original mz is saved
        """
        print("start exporting information")
        with open(self.output_path, "wt") as csvfile:
            writr = csv.writer(csvfile, lineterminator=os.linesep)
            writr.writerow(
                ("id1", "id2", "mz", "intensity", "annotation", "mz2",
                 "decharged", 'similarity', 'originated mz', "extra_mass"))
            for ref in self.references:
                ms = ref.create_ms(iMin_similarity=self.min_rel_similarity)
                for chrg in ms.spectrum:
                    for key in ms.spectrum[chrg].keys():
                        for mp in ms.spectrum[chrg][key]:
                            if mp.meta['mass'] != -1:
                                writr.writerow(
                                    (ref.id_1, ref.id_2, mp.mz, mp.intensity,
                                     mp.meta['delta'], mp.meta['mass'],
                                     mp.meta['decharged'],
                                     mp.meta['similarity'],
                                     mp.meta['originated_mz'], ref.extra_mass))

    def export_annotated_spectra_to_mgf(self,
                                        mgf_path,
                                        report_just_heavy=False):
        spectra_out = []
        for ref in self.references:
            if report_just_heavy:
                ms = ref.just_create_heavy_ms()
            else:
                ms = ref.create_ms(iMin_similarity=self.min_rel_similarity)
            buf_peaks = []
            buf_int = []
            for chrg in ms.spectrum:
                for key in ms.spectrum[chrg].keys():
                    for mp in ms.spectrum[chrg][key]:
                        if report_just_heavy:
                            buf_peaks.append(mp.mz)
                            buf_int.append(mp.intensity)
                        else:
                            if mp.meta['mass'] != -1:
                                buf_peaks.append(mp.mz)
                                buf_int.append(mp.intensity)
                if len(buf_peaks) != 0:
                    spectra_out.append({
                        'm/z array': buf_peaks,
                        'intensity array': buf_int,
                        'params': ref.params
                    })
        mgf.write(spectra=spectra_out, output=mgf_path)
    def load_distiller_mgf2(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        error = 0
        self.ms = MasterSpectrum()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key() in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(Reference(ppm=self.ppm,
                                                                          id_2=scanid,
                                                                          id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity,  # also scanid
                                                                          peak_list_2=spectrum['m/z array'],
                                                                          peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'],
                                                                          mass_2=mass,
                                                                          mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz,
                                                                          charge=spectrum['params']['charge'][0],
                                                                          extra_mass=extra_mass,
                                                                          int_list_2=spectrum['intensity array'],
                                                                          int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'],
                                                                          params2=spectrum['params'],
                                                                          params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params']))
                                            found = True
                                            del(self.ms.spectrum[precursor_chrg][peak.key()][idx])
                                            if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0:
                                                del(self.ms.spectrum[precursor_chrg][peak.key()])
                                                if len(self.ms.spectrum[precursor_chrg]) == 0:
                                                    del(self.ms.spectrum[precursor_chrg])

                if found is False:
                    limit_scan_id = scanid - 20  # could start at -19
                    ms_bac = MasterSpectrum()
                    for chrg in self.ms.spectrum:
                        for key in self.ms.spectrum[chrg].keys():
                            for mp in self.ms.spectrum[chrg][key]:
                                if mp.intensity >= limit_scan_id:
                                    ms_bac.add(mp, charge=chrg)
                    self.ms = ms_bac
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))
class MgfAnnotaterDistiller(MgfAnnotater):
    """
    def __init__(self, path, output_path, ppm=10):
        self.ppm = ppm
        self.ms = MasterSpectrum()
        self.path = path
        self.out = []
        self.output_path = output_path
        self.references = sortedlist(key=lambda i: i.id_1)
    """

    def load_improved_csv(self, path_score):
        spectra_to_be_referenced = {}
        with open(path_score) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if row['id1'] > row['id2']:
                    spectra_to_be_referenced[int(row['id1'])] = int(row['id2'])
                else:
                    spectra_to_be_referenced[int(row['id2'])] = int(row['id1'])

        self.ids_to_be_referenced = spectra_to_be_referenced

    def load_distiller_mgf(self):
        """
        creates references based on improved csv
        a missing scanid means an ms1 event
        """
        data = {}

        alm = [i for i in gen_allowed_mass_diff_with_sign(n=4, z=1)]

        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))

                if scanid in self.ids_to_be_referenced:
                    if self.ids_to_be_referenced[scanid] in data:
                        mass1 = data[self.ids_to_be_referenced[scanid]]['params']['pepmass'][0]
                        precursor_chrg1 = int(data[self.ids_to_be_referenced[scanid]]['params']['charge'][0])
                        mass1 = calculatePrecursor(mass1, precursor_chrg1)
                        diff = abs(mass1 - mass)
                        diff2 = [abs(diff - abs(i)) for i in alm]
                        pos = diff2.index(min(diff2))
                        p = "mass1:\t {0}\n"
                        p += "mass:\t {1}\n"
                        p += "scanid:\t {2}\n"
                        p += "charge:\t {3}\n"
                        p += "charge2:\t {4}\n"
                        p += "scanid2:\t {5}\n"
                        if diff > 21:  # distiller changes precursor charge therefore precurosr mass calculation is wrong
                            print(p.format(mass1, mass, scanid, precursor_chrg1, spectrum['params']['charge'][0], self.ids_to_be_referenced[scanid]))
                            print(diff)
                            print(diff2)
                            print("----------------")
                        else:
                            self.references.add(Reference(ppm=self.ppm,
                                                          id_2=scanid,
                                                          id_1=self.ids_to_be_referenced[scanid],  # also scanid
                                                          peak_list_2=spectrum['m/z array'],
                                                          peak_list_1=data[self.ids_to_be_referenced[scanid]]['m/z array'],
                                                          mass_2=mass,
                                                          mass_1=mass1,
                                                          charge=spectrum['params']['charge'][0],
                                                          extra_mass=alm[pos],
                                                          int_list_2=spectrum['intensity array'],
                                                          int_list_1=data[self.ids_to_be_referenced[scanid]]['intensity array'],
                                                          params2=spectrum['params'],
                                                          params1=data[self.ids_to_be_referenced[scanid]]['params']))
                        del(data[self.ids_to_be_referenced[scanid]])
                        del(self.ids_to_be_referenced[scanid])
                else:
                    data[scanid] = spectrum

    def load_distiller_mgf2(self):
        """
        creates references based on precursor mass
        a missing scanid means an ms1 event
        by default referencing works just within one ms2 block
        """
        fc = calculate_Delta_by_ppm(self.ppm)
        error = 0
        self.ms = MasterSpectrum()
        with mgf.read(self.path) as spectra:
            for spectrum in spectra:
                mass = spectrum['params']['pepmass'][0]
                precursor_chrg = int(spectrum['params']['charge'][0])
                mass = calculatePrecursor(mass, precursor_chrg)

                scanid = int(parse_scan_id(spectrum['params']['title']))
                found = False
                if len(self.ms.spectrum) == 0:
                    peak = Peak(mass, scanid, fc)
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
                    found = True
                else:
                    if (precursor_chrg in self.ms.spectrum.keys()):  # react to charge !!!!!!
                        if len(self.ms.spectrum[precursor_chrg]) == 0:
                            peak = Peak(mass, scanid, fc)
                            self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                        charge=precursor_chrg)
                            found = True
                        else:
                            for extra_mass in gen_allowed_mass_diff_with_sign(n=4, z=1):
                                if found is False:
                                    peak = Peak(mass + extra_mass, 0.5, fc)
                                    if peak.key() in self.ms.spectrum[precursor_chrg]:
                                        print(precursor_chrg)
                                        idx, bin_to_ack, a, b = self.ms.binary(peak, 0, len(self.ms.spectrum[precursor_chrg][peak.key()]) - 1, precursor_chrg)
                                        if idx != -1:
                                            self.references.add(Reference(ppm=self.ppm,
                                                                          id_2=scanid,
                                                                          id_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].intensity,  # also scanid
                                                                          peak_list_2=spectrum['m/z array'],
                                                                          peak_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['ms'],
                                                                          mass_2=mass,
                                                                          mass_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].mz,
                                                                          charge=spectrum['params']['charge'][0],
                                                                          extra_mass=extra_mass,
                                                                          int_list_2=spectrum['intensity array'],
                                                                          int_list_1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['int'],
                                                                          params2=spectrum['params'],
                                                                          params1=self.ms.spectrum[precursor_chrg][peak.key()][idx].meta['params']))
                                            found = True
                                            del(self.ms.spectrum[precursor_chrg][peak.key()][idx])
                                            if len(self.ms.spectrum[precursor_chrg][peak.key()]) == 0:
                                                del(self.ms.spectrum[precursor_chrg][peak.key()])
                                                if len(self.ms.spectrum[precursor_chrg]) == 0:
                                                    del(self.ms.spectrum[precursor_chrg])

                if found is False:
                    limit_scan_id = scanid - 20  # could start at -19
                    ms_bac = MasterSpectrum()
                    for chrg in self.ms.spectrum:
                        for key in self.ms.spectrum[chrg].keys():
                            for mp in self.ms.spectrum[chrg][key]:
                                if mp.intensity >= limit_scan_id:
                                    ms_bac.add(mp, charge=chrg)
                    self.ms = ms_bac
                    self.ms.add(Peak(mass, scanid, fc, meta={'ms': spectrum['m/z array'], 'int': spectrum['intensity array'], 'params': spectrum['params']}),
                                charge=precursor_chrg)
            if error > 0:
                print(" delete valid information {0}".format(error))