Beispiel #1
0
def extract_peaks(file,time_range):
    peak_data = {}

    run = pymzml.run.Reader(file)
    peak_data[file] = {}
    print(file)
    for spec in run:

        if spec.get('filter string')!= None and spec.get('total ion current') !=  None and spec.get('scan time') >= time_range[0] and spec.get('scan time') <= time_range[1]:

            if spec.get('filter string') in peak_data[file]:
                peak_data[file][spec.get('filter string')]['scan_count'] += 1
                peak_data[file][spec.get('filter string')]['scan'] = mspy.combine(peak_data[file][spec.get('filter string')]['scan'], np.array(spec.peaks))

            else:
                peak_data[file][spec.get('filter string')] = {}
                peak_data[file][spec.get('filter string')]['scan_count'] = 1
                peak_data[file][spec.get('filter string')]['scan'] = np.array(spec.peaks)


    for scan in peak_data[file].keys():
        peak_data[file][scan]['scan'] = mspy.reduce(peak_data[file][scan]['scan'])
        peak_data[file][scan]['scan'] = mspy.multiply(peak_data[file][scan]['scan'],y=1.0/peak_data[file][scan]['scan_count'])
        with open(file+scan+'.np', 'wb') as f:
            np.save(f,peak_data[file][scan]['scan'])
 def integrate_match_data(self):
     # Creates peptide inensities dictionary with timeline of intensities
     # and sum of intensities for all charge states for each peptide
     self.peptide_intensities = {}
     self.peptide_intensities_int = {}
     # Preparing data structures
     for peptide in self.matched_peptides.keys():
         self.peptide_intensities[peptide] = {}
         self.peptide_intensities_int[peptide] = 0.0
         for z in self.matched_peptides[peptide].keys():
             self.peptide_intensities[peptide][z] = []
             self.peptide_ms1_profiles[peptide][z] = mspy.reduce(
                 self.peptide_ms1_profiles[peptide][z])
         # Summing over each data point
     for i, time in enumerate(self.retention_times):
         for peptide in self.matched_peptides.keys():
             for z in self.matched_peptides[peptide].keys():
                 if peptide in self.match_data[i].keys() and z in self.match_data[i][peptide].keys():
                     self.peptide_intensities[peptide][z].append(self.match_data[i][peptide][z].basepeak)
                     self.peptide_intensities_int[peptide] += self.match_data[i][peptide][z].basepeak
                 else:
                     self.peptide_intensities[peptide][z].append(0.0)
Beispiel #3
0
def match_peptides_in_scans(
    peptides, PatternObjects, ScanList, ms1ScanList, profiles, charge_min, charge_max, mz_min, mz_max, files
):
    # Matches expected isotopic distributions of peptides in MS1 spectra

    t0 = timer.time()
    RetentionTimeData = {}
    BasepeakData = {}
    RmsdData = {}
    ProfileData = {}
    IntIntensity = {}
    # Iterate through peptide and initiate data structures
    for peptide in peptides:
        RetentionTimeData[peptide] = {}
        BasepeakData[peptide] = {}
        RmsdData[peptide] = {}
        ProfileData[peptide] = {}
        IntIntensity[peptide] = defaultdict(lambda: 1)
        # Iterate through charge states and initiate data structures
        for z in range(charge_min, charge_max + 1):
            RetentionTimeData[peptide][z] = {}
            BasepeakData[peptide][z] = {}
            RmsdData[peptide][z] = {}
            ProfileData[peptide][z] = {}

            # Iterate through files and initiate data structures
            for file_iter in files:
                RetentionTimeData[peptide][z][file_iter] = []
                BasepeakData[peptide][z][file_iter] = []
                RmsdData[peptide][z][file_iter] = []
                ProfileData[peptide][z][file_iter] = None
                IntIntensity[peptide][file_iter] += 0
                if PatternObjects[peptide][z][0][0] < mz_min or PatternObjects[peptide][z][-1][0] > mz_max:
                    continue
                # Iterate through scans
                for scan_number in ms1ScanList[file_iter]:
                    RetentionTimeData[peptide][z][file_iter].append(ScanList[file_iter][scan_number]["retentionTime"])
                    checkPatternResult = mspy.checkpattern_fast(
                        signal=profiles[file_iter][scan_number], pattern=PatternObjects[peptide][z]
                    )
                    if checkPatternResult is not None:
                        RmsdData[peptide][z][file_iter].append(checkPatternResult.rmsd)
                        BasepeakData[peptide][z][file_iter].append(checkPatternResult.basepeak)
                        if checkPatternResult.rmsd < RmsdThreshold:
                            if ProfileData[peptide][z][file_iter] is None:
                                ProfileData[peptide][z][file_iter] = mspy.crop(
                                    profiles[file_iter][scan_number],
                                    PatternObjects[peptide][z][0][0] - 0.1,
                                    PatternObjects[peptide][z][-1][0] + 0.1,
                                )
                            else:
                                ProfileData[peptide][z][file_iter] = mspy.combine(
                                    ProfileData[peptide][z][file_iter],
                                    mspy.crop(
                                        profiles[file_iter][scan_number],
                                        PatternObjects[peptide][z][0][0] - 0.1,
                                        PatternObjects[peptide][z][-1][0] + 0.1,
                                    ),
                                )
                            IntIntensity[peptide][file_iter] += checkPatternResult.basepeak
                    else:
                        RmsdData[peptide][z][file_iter].append(1)
                        BasepeakData[peptide][z][file_iter].append(0)
                if ProfileData[peptide][z][file_iter] is not None:
                    ProfileData[peptide][z][file_iter] = mspy.reduce(ProfileData[peptide][z][file_iter])
    t1 = timer.time() - t0

    print "Loaded files in %s " % t1

    MatchData = {}
    MatchData["RetentionTime"] = RetentionTimeData
    MatchData["Basepeak"] = BasepeakData
    MatchData["Rmsd"] = RmsdData
    MatchData["Profile"] = ProfileData
    MatchData["IntIntensity"] = IntIntensity
    return MatchData