コード例 #1
0
    def pick_experiment(self, exp: ms.MSExperiment, peak_radius: int = 1, window_radius: float = 0.015,
            pp_mode: str = 'int', min_int_mult: float = 0.10, strict: bool = True) -> ms.MSExperiment():
        """Peak picks an experiment.

        Keyword arguments:
        exp: the experiment to peak pick
        peak_radius: the minimum peak radius of a peak set
        window_radius: the maximum m/z window radius of a peak set
        pp_mode: the mode to use ('ltr' or 'int')
        min_int_mult: a multiplier to the maximum peak intensity in a set (for differentiating
            between signal and noise)
        strict: if False, allow a single increase in intensity in either direction

        Returns: the peak picked experiment.
        """
        exp.sortSpectra()
        spectra = exp.getSpectra()

        picked_exp = ms.MSExperiment()
        for spec in spectra:
            if spec.getMSLevel() != 1:
                continue
            picked_exp.addSpectrum(self.pick_spectra(spec, peak_radius, window_radius, pp_mode, min_int_mult, strict))

        return picked_exp
コード例 #2
0
def getBasicQuality(exp: oms.MSExperiment, verbose: bool=False) -> mzqc.RunQuality:
    """
    getBasicQuality calculates the basic QualityMetrics from a mass spectrometry peak file and creates the related RunQuality object.

    Calculated basic QC metrics and proto-metrics necessary to calculate more elaborate QC metrics with additional data (e.g. ID).

    Parameters
    ----------
    exp : oms.MSExperiment
        The mass spectrometry peak file to calculate metrics from
    verbose : bool, optional
        switches on verbose logging, by default False

    Returns
    -------
    mzqc.RunQuality
        A RunQuality object containing the list of metrics calculated and metadata collected, ready for integration into a mzQC file object.
    """
    metrics: List[mzqc.QualityMetric] = list()
    if exp.getExperimentalSettings().getSourceFiles():
        parent_base_name: str = basename(exp.getExperimentalSettings().getSourceFiles()[0].getNameOfFile())
        parent_chksm: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksum()
        parent_chksm_type: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksumType()

    instr_srl: str = exp.getInstrument().getMetaValue('instrument serial number') \
        if exp.getInstrument().metaValueExists('instrument serial number') else 'unknown'  # MS:1000529 in mzML

    input_loc: str = exp.getExperimentalSettings().getLoadedFilePath()
    base_name: str = basename(input_loc)
    chksm: str = utils.sha256fromfile(exp.getExperimentalSettings().getLoadedFilePath())
    cmpltn: str = exp.getDateTime().get()
    # strt:datetime.datetime = datetime.datetime.strptime(cmpltn, '%Y-%m-%d %H:%M:%S') - datetime.timedelta(seconds=exp.getChromatograms()[0][exp.getChromatograms()[0].size()-1].getRT()*60)

    meta: mzqc.MetaDataParameters = mzqc.MetaDataParameters(
        inputFiles=[
            mzqc.InputFile(name=base_name,location=input_loc,
                        fileFormat=mzqc.CvParameter("MS", "MS:1000584", "mzML format"),
                        fileProperties=[
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000747",
                                name="completion time",
                                value=cmpltn
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000569",
                                name="SHA-256",
                                value=chksm
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000031",
                                name="instrument model",
                                value=exp.getInstrument().getName()
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000529",
                                name="instrument serial number",
                                value=instr_srl
                            )
                            # TODO integrate parent location and checksum
                            # id: MS:1002846 (Associated raw file URI) N.B. definition is PRIDE specific - WTF
                            # fitting checksum cv missing
                        ]
            )
        ],
        analysisSoftware=[
            mzqc.AnalysisSoftware(cvRef="MS", accession="MS:1000752", name="TOPP software", version=oms.__version__, uri="openms.de")
        ]
    )

    # this is mighty important to sort by RT
    exp.sortSpectra()

    min_mz: float = sys.maxsize
    max_mz: float = 0
    mslevelcounts: Dict[int,int] = defaultdict(int)

    spectrum_acquisition_metrics_MS1: Dict[str,List[Any]] = defaultdict(list)
    spectrum_acquisition_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    spectrum_topn: Dict[str,List[Any]] = defaultdict(list)
    tandem_spectrum_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    trap_metrics_MS1: Dict[str,List[Any]] = defaultdict(list)
    trap_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    isolation_window_metrics: Dict[str,List[Any]] = defaultdict(list)
    tic_tab: Dict[str,List[Any]] = defaultdict(list)

    # ActivationMethod look-up dict
    ams = {getattr(ActivationMethod,i): i for i in dir(ActivationMethod) if type(getattr(ActivationMethod,i))==int }

    intens_sum: np.float = 0
    last_surveyscan_index:int = 0
    for spin, spec in enumerate(exp):
        mslevelcounts[spec.getMSLevel()] += 1

        iontraptime = utils.getTrapTime(spec)
        intens_max = spec.get_peaks()[1].max()
        intens_min = spec.get_peaks()[1].min()
        intens_sum = spec.get_peaks()[1].sum()

        if spec.getMSLevel() == 1:
            last_surveyscan_index = spin
            last_surveyscan_intensity = intens_sum
            last_surveyscan_max = intens_max

            spectrum_acquisition_metrics_MS1['RT'].append(spec.getRT())
            spectrum_acquisition_metrics_MS1['SN'].append(noiseqc.getSN_medianmethod(spec))
            spectrum_acquisition_metrics_MS1['peakcount'].append(spec.size())
            spectrum_acquisition_metrics_MS1['int'].append(intens_sum.item())  # .item() for dtype to pytype

            trap_metrics_MS1['RT'].append(spec.getRT())
            trap_metrics_MS1['traptime'].append(iontraptime)

            tic_tab['RT'].append(spec.getRT())
            tic_tab['int'].append(intens_sum)

        if (spec.getMSLevel() == 2):
            if (spec.getPrecursors()[0].getMZ() < min_mz):
                min_mz = spec.getPrecursors()[0].getMZ()
            if (spec.getPrecursors()[0].getMZ() > max_mz):
                max_mz = spec.getPrecursors()[0].getMZ()

            spectrum_acquisition_metrics_MS2['RT'].append(spec.getRT())
            spectrum_acquisition_metrics_MS2['SN'].append(noiseqc.getSN_medianmethod(spec))
            spectrum_acquisition_metrics_MS2['peakcount'].append(spec.size())
            spectrum_acquisition_metrics_MS2['int'].append(intens_sum.item())  # .item() for dtype to pytype
            spectrum_acquisition_metrics_MS2['native_id'].append(utils.spec_native_id(spec))

            rank = spin - last_surveyscan_index
            spectrum_acquisition_metrics_MS2['rank'].append(rank)

            trap_metrics_MS2['RT'].append(spec.getRT())
            trap_metrics_MS2['traptime'].append(iontraptime)
            trap_metrics_MS2['activation_method'].append(ams.get(next(iter(spec.getPrecursors()[0].getActivationMethods()), None),'unknown'))
            trap_metrics_MS2['activation_energy'].append(spec.getPrecursors()[0].getMetaValue('collision energy') if \
                spec.getPrecursors()[0].metaValueExists('collision energy') else -1)

            precursor_index = np.searchsorted(exp[last_surveyscan_index].get_peaks()[0], [exp[spin].getPrecursors()[0].getMZ()])[0]
            if precursor_index != np.array(exp[last_surveyscan_index].get_peaks()).shape[1]:
                precursor_err = spec.getPrecursors()[0].getMZ() - np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][0]
                precursor_int = np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][1]
            else:
                precursor_err = np.nan
                precursor_int = np.nan

            tandem_spectrum_metrics_MS2['RT'].append(spec.getRT())
            tandem_spectrum_metrics_MS2['precursor_intensity'].append(precursor_int)  # TODO different from mzid->mzml getPrecursors[0].getIntensity() ? YES, latter one usually zero
            tandem_spectrum_metrics_MS2['precursor_error'].append(precursor_err)
            tandem_spectrum_metrics_MS2['precursor_mz'].append(spec.getPrecursors()[0].getMZ())
            tandem_spectrum_metrics_MS2['precursor_c'].append(spec.getPrecursors()[0].getCharge())

            tandem_spectrum_metrics_MS2['surveyscan_intensity_sum'].append(last_surveyscan_intensity)
            tandem_spectrum_metrics_MS2['surveyscan_intensity_max'].append(last_surveyscan_max)

            isolation_window_metrics['RT'].append(spec.getRT())
            isolation_window_metrics['isolation_target'].append(spec.getPrecursors()[0].getMZ())  # https://github.com/OpenMS/OpenMS/blob/d17cc251fd0c4068eb253b03c9fb107897771fdc/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp#L1992
            isolation_window_metrics['isolation_lower'].append(spec.getPrecursors()[0].getIsolationWindowLowerOffset())
            isolation_window_metrics['isolation_upper'].append(spec.getPrecursors()[0].getIsolationWindowUpperOffset())
            lower = spec.getPrecursors()[0].getMZ() - spec.getPrecursors()[0].getIsolationWindowLowerOffset()
            upper = spec.getPrecursors()[0].getMZ() + spec.getPrecursors()[0].getIsolationWindowUpperOffset()

            s = np.array([(i.getMZ(),i.getIntensity()) for i in exp[last_surveyscan_index]], ndmin = 2)
            s = s[np.where(np.logical_and(s[:, 0]>=lower, s[:, 0]<=upper))[0]]
            isolation_window_metrics['peaks_in_window'].append(np.shape(s)[0])

            int_sort_desc = np.flip(np.argsort(s[:,1]))
            if np.shape(s)[0] > 1:
                isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append(
                    s[int_sort_desc][:-1,1]/s[int_sort_desc][1:,1][0])  # intensity ratio between top1&2, 2&3, ...
            else:
                isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append(0)  # bigger is better, though best is 0

            isolation_window_metrics['summed_window_intensity'].append(np.sum(s[int_sort_desc][:,1]))
            isolation_window_metrics['isolation_target_intensity'].append(spec.getPrecursors()[0].getIntensity())

            # TODO this needs to go outside
            tol = 0.5
            if spec.metaValueExists('filter string'):
                if 'FTMS' in spec.getMetaValue('filter string'):
                    tol = 0.05
                elif 'ITMS' in spec.getMetaValue('filter string'):
                    tol = 0.5
                elif 'QTOF' in spec.getMetaValue('filter string'):  #TOFMS, SQMS, TQMS, SectorMS
                    tol = 0.1

            # ms2 peaks directly from isolation window?
            unfragmented = np.any([np.isclose(i[0],[x.getMZ() for x in spec], atol=tol) for i in s])
            isolation_window_metrics['peaks_in_window_in_ms2'].append(str(unfragmented))

    ## Spectra detail numbers
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectrum acquisition metric values - MS1",
                value=spectrum_acquisition_metrics_MS1)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectrum acquisition metric values - MS2",
                value=spectrum_acquisition_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectra topn ranks",
                value=spectrum_topn)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Tandem spectrum metric values - MS2",
                value=tandem_spectrum_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Trap metric values - MS1",
                value=trap_metrics_MS1)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Trap metric values - MS2",
                value=trap_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="isolation window metrics",
                value=isolation_window_metrics)
    )

    ## Spectra numbers
    for levels in mslevelcounts.keys():
        metrics.append(
            mzqc.QualityMetric(cvRef="QC",
                    accession="QC:0000000",
                    name="Number of MS{l} spectra".format(l=str(levels)),
                    value=mslevelcounts[levels])
        )

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Number of chromatograms",
                value=len(exp.getChromatograms()))
    )

    ## Ranges
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="MZ aquisition range",
                value=[min_mz,max_mz])
    )

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="RT aquisition range",
                value=[exp[0].getRT(),exp[exp.size()-1].getRT()])
    )

    # TIC
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Total ion current",
                value=tic_tab)
    )

    # Chrom
    chrom_tab: Dict[str,List[Any]] = defaultdict(list)
    chroms = exp.getChromatograms()
    for t in chroms:
      if t.getChromatogramType() == oms.ChromatogramSettings.ChromatogramType.TOTAL_ION_CURRENT_CHROMATOGRAM:
        for chro_peak in t:
            chrom_tab['RT'].append(chro_peak.getRT())
            chrom_tab['int'].append(chro_peak.getIntensity())
        break

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Chromatogram",
                value=chrom_tab)
    )
    # TODO is there a difference between TIC as defined in MS:1000235 and the chromatogram you get from TRP?? In MZML it says its a MS:1000235 (ion current detected in each of a series of mass spectra) but is it?
    # TODO consider collection of spectrum_native_id
    return mzqc.RunQuality(metadata=meta, qualityMetrics=metrics)
コード例 #3
0
    def store(self, ofname: str, peakMap: pyopenms.MSExperiment):

        #sort peakMap if necessary
        if not peakMap.isSorted():
            peakMap.sortSpectra()
            peakMap.updateRanges()

        outF = open(ofname, 'w')

        firstScan = self._getScan(
            peakMap.getSpectrum(0).getNativeID().decode('utf-8'))
        dataType = peakMap.getSpectrum(0).getType()
        dataType = 'Centroid' if dataType == 1 else 'Profile' if dataType == 2 else 'Unknown'
        lastScan = self._getScan(
            peakMap.getSpectrum(peakMap.getNrSpectra() -
                                1).getNativeID().decode('utf-8'))
        precursorFile = basename(ofname).replace('ms2', 'ms1')

        #print header
        outF.write(
            self._writeValue(
                MS2File._h_tag, 'CreationDate',
                datetime.datetime.now().strftime('%m/%d/%Y %I:%M:%S %p')))
        outF.write(self._writeValue(MS2File._h_tag, 'Extractor', 'msConvert'))
        outF.write(self._writeValue(MS2File._h_tag, 'ExtractorVersion', '0.1'))
        outF.write(
            self._writeValue(MS2File._h_tag, 'Comments',
                             'msConvert was written by Aaron Maurais, 2019'))
        outF.write(self._writeValue(MS2File._h_tag, 'ExtractorOptions', 'MS2'))
        outF.write(
            self._writeValue(MS2File._h_tag, 'AcquisitionMethod',
                             'Data-Dependent'))
        outF.write(
            self._writeValue(MS2File._h_tag, 'InstrumentType', 'Unknown'))
        outF.write(self._writeValue(MS2File._h_tag, 'ScanType', 'MS2'))
        outF.write(self._writeValue(MS2File._h_tag, 'DataType', dataType))
        outF.write(self._writeValue(MS2File._h_tag, 'FirstScan', firstScan))
        outF.write(self._writeValue(MS2File._h_tag, 'LastScan', lastScan))

        #iterate through spectra
        preScan = 'Unknown'
        for i, scan in enumerate(peakMap.getSpectra()):
            if scan.getMSLevel() == 1:
                preScan = self._getScan(scan.getNativeID().decode('utf-8'))

            if scan.getMSLevel() == 2:
                #write header info
                curScan = self._getScan(scan.getNativeID().decode('utf-8'))
                precursors = scan.getPrecursors()
                preCharge = int(precursors[0].getCharge())
                preMZ = precursors[0].getMZ()

                #print scan line
                outF.write('{}\t{}\t{}\t{}\n'.format(MS2File._s_tag,
                                                     curScan.zfill(6),
                                                     curScan.zfill(6), preMZ))
                #print scan info
                outF.write(
                    self._writeValue(MS2File._i_tag, 'RetTime', scan.getRT()))
                outF.write(
                    self._writeValue(MS2File._i_tag, 'PrecursorInt',
                                     precursors[0].getIntensity()))
                outF.write(
                    self._writeValue(MS2File._i_tag, 'IonInjectionTime',
                                     'Unknown'))
                ameth = list(precursors[0].getActivationMethods())
                ameth = ' '.join(
                    [MS2File._activationMethods[x] for x in ameth])
                if not ameth:
                    ameth = 'Unknown'
                outF.write(
                    self._writeValue(MS2File._i_tag, 'ActivationType', ameth))
                outF.write(
                    self._writeValue(MS2File._i_tag, 'PrecursorFile',
                                     precursorFile))
                outF.write(
                    self._writeValue(MS2File._i_tag, 'PrecursorScan', preScan))
                outF.write(
                    self._writeValue(MS2File._i_tag, 'InstrumentType',
                                     'Unknown'))

                #write z line
                #after charge, the M+H m/z for the ion is listed, so calculate that here
                outF.write(
                    self._writeValue(
                        MS2File._z_tag, preCharge, (float(preMZ) * preCharge) -
                        (preCharge * MS2File._H_mass) + MS2File._H_mass))

                #write ions
                for ion in scan:
                    outF.write('{0:.4f} {1:.1f}\n'.format(
                        round(ion.getMZ(), 4), round(ion.getIntensity(), 1)))