Beispiel #1
0
def test_SyntaxCheck():
    cvt = qc.CvParameter(cvRef="REF",
                         accession="TEST:123",
                         name="testname",
                         value=99)
    infi = qc.InputFile(name="file.raw",
                        location="file:///dev/null",
                        fileFormat=qc.CvParameter("MS", "MS:1000584",
                                                  "mzML format"),
                        fileProperties=[
                            qc.CvParameter(cvRef="MS",
                                           accession="MS:1000747",
                                           name="completion time",
                                           value="2017-12-08-T15:38:57Z")
                        ])
    anso = qc.AnalysisSoftware(
        cvRef="QC",
        accession="QC:9999999",
        name="bigwhopqc",
        version="1.2.3",
        uri="file:///dev/null")  # isn't requiring a uri a bit too much?
    meta = qc.MetaDataParameters(inputFiles=[infi], analysisSoftware=[anso])
    qm = qc.QualityMetric(cvRef="QC",
                          accession="QC:4000053",
                          name="RT duration",
                          value=99)
    qm2 = qc.QualityMetric(cvRef="QC",
                           accession="QC:4000061",
                           name="Maximal MS2 frequency",
                           value=999)
    qm3 = qc.QualityMetric(cvRef="QC",
                           accession="QC:4000055",
                           name="MS1 quantiles RT fraction",
                           value=9)
    rq = qc.RunQuality(metadata=meta, qualityMetrics=[qm, qm2])
    sq = qc.SetQuality(metadata=meta, qualityMetrics=[qm3])
    cv = qc.ControlledVocabulary(ref="QC", name="QCvocab", uri="www.qc.ml")
    cv2 = qc.ControlledVocabulary(ref="REF", name="TEST", uri="www.eff.off")
    mzqc = qc.MzQcFile(version="0.0.11",
                       runQualities=[rq],
                       setQualities=[sq],
                       controlledVocabularies=[cv, cv2])
    # with open('tests/mzqc_lib_out.mzqc', 'w') as f:
    #     f.write("{ \"mzQC\": " + qc.JsonSerialisable.ToJson(mzqc) + " }")

    syn_check = sy.SyntacticCheck()
    syn_check.validate("{ \"mzQC\": " + qc.JsonSerialisable.ToJson(mzqc) +
                       " }")
Beispiel #2
0
                                       accession="MS:1000747",
                                       name="completion time",
                                       value="2017-12-08-T15:38:57Z")
                    ])
anso = qc.AnalysisSoftware(
    cvRef="QC",
    accession="QC:9999999",
    name="bigwhopqc",
    version="1.2.3",
    uri="file:///dev/null")  # isn't requiring a uri a bit too much?
meta = qc.MetaDataParameters(inputFiles=[infi], analysisSoftware=[anso])
qm = qc.QualityMetric(cvRef="QC",
                      accession="QC:4000053",
                      name="RT duration",
                      value=99)
rq = qc.RunQuality(metadata=meta, qualityMetrics=[qm])
sq = qc.SetQuality(metadata=meta, qualityMetrics=[qm])
cv = qc.ControlledVocabulary(ref="REF", name="TEST", uri="www.eff.off")
mzqc = qc.MzQcFile(version="0.0.11",
                   runQualities=[rq],
                   setQualities=[sq],
                   controlledVocabularies=[cv])


class TestSerialisation:
    def test_ControlledVocabulary(self):
        assert qc.JsonSerialisable.ToJson(cv) == CV

    def test_CvParameter(self):
        assert qc.JsonSerialisable.ToJson(cvt) == CVT
Beispiel #3
0
def getBasicQuality(exp: oms.MSExperiment, verbose: bool=False) -> mzqc.RunQuality:
    """
    getBasicQuality calculates the basic QualityMetrics from a mass spectrometry peak file and creates the related RunQuality object.

    Calculated basic QC metrics and proto-metrics necessary to calculate more elaborate QC metrics with additional data (e.g. ID).

    Parameters
    ----------
    exp : oms.MSExperiment
        The mass spectrometry peak file to calculate metrics from
    verbose : bool, optional
        switches on verbose logging, by default False

    Returns
    -------
    mzqc.RunQuality
        A RunQuality object containing the list of metrics calculated and metadata collected, ready for integration into a mzQC file object.
    """
    metrics: List[mzqc.QualityMetric] = list()
    if exp.getExperimentalSettings().getSourceFiles():
        parent_base_name: str = basename(exp.getExperimentalSettings().getSourceFiles()[0].getNameOfFile())
        parent_chksm: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksum()
        parent_chksm_type: str = exp.getExperimentalSettings().getSourceFiles()[0].getChecksumType()

    instr_srl: str = exp.getInstrument().getMetaValue('instrument serial number') \
        if exp.getInstrument().metaValueExists('instrument serial number') else 'unknown'  # MS:1000529 in mzML

    input_loc: str = exp.getExperimentalSettings().getLoadedFilePath()
    base_name: str = basename(input_loc)
    chksm: str = utils.sha256fromfile(exp.getExperimentalSettings().getLoadedFilePath())
    cmpltn: str = exp.getDateTime().get()
    # strt:datetime.datetime = datetime.datetime.strptime(cmpltn, '%Y-%m-%d %H:%M:%S') - datetime.timedelta(seconds=exp.getChromatograms()[0][exp.getChromatograms()[0].size()-1].getRT()*60)

    meta: mzqc.MetaDataParameters = mzqc.MetaDataParameters(
        inputFiles=[
            mzqc.InputFile(name=base_name,location=input_loc,
                        fileFormat=mzqc.CvParameter("MS", "MS:1000584", "mzML format"),
                        fileProperties=[
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000747",
                                name="completion time",
                                value=cmpltn
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000569",
                                name="SHA-256",
                                value=chksm
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000031",
                                name="instrument model",
                                value=exp.getInstrument().getName()
                            ),
                            mzqc.CvParameter(cvRef="MS",
                                accession="MS:1000529",
                                name="instrument serial number",
                                value=instr_srl
                            )
                            # TODO integrate parent location and checksum
                            # id: MS:1002846 (Associated raw file URI) N.B. definition is PRIDE specific - WTF
                            # fitting checksum cv missing
                        ]
            )
        ],
        analysisSoftware=[
            mzqc.AnalysisSoftware(cvRef="MS", accession="MS:1000752", name="TOPP software", version=oms.__version__, uri="openms.de")
        ]
    )

    # this is mighty important to sort by RT
    exp.sortSpectra()

    min_mz: float = sys.maxsize
    max_mz: float = 0
    mslevelcounts: Dict[int,int] = defaultdict(int)

    spectrum_acquisition_metrics_MS1: Dict[str,List[Any]] = defaultdict(list)
    spectrum_acquisition_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    spectrum_topn: Dict[str,List[Any]] = defaultdict(list)
    tandem_spectrum_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    trap_metrics_MS1: Dict[str,List[Any]] = defaultdict(list)
    trap_metrics_MS2: Dict[str,List[Any]] = defaultdict(list)
    isolation_window_metrics: Dict[str,List[Any]] = defaultdict(list)
    tic_tab: Dict[str,List[Any]] = defaultdict(list)

    # ActivationMethod look-up dict
    ams = {getattr(ActivationMethod,i): i for i in dir(ActivationMethod) if type(getattr(ActivationMethod,i))==int }

    intens_sum: np.float = 0
    last_surveyscan_index:int = 0
    for spin, spec in enumerate(exp):
        mslevelcounts[spec.getMSLevel()] += 1

        iontraptime = utils.getTrapTime(spec)
        intens_max = spec.get_peaks()[1].max()
        intens_min = spec.get_peaks()[1].min()
        intens_sum = spec.get_peaks()[1].sum()

        if spec.getMSLevel() == 1:
            last_surveyscan_index = spin
            last_surveyscan_intensity = intens_sum
            last_surveyscan_max = intens_max

            spectrum_acquisition_metrics_MS1['RT'].append(spec.getRT())
            spectrum_acquisition_metrics_MS1['SN'].append(noiseqc.getSN_medianmethod(spec))
            spectrum_acquisition_metrics_MS1['peakcount'].append(spec.size())
            spectrum_acquisition_metrics_MS1['int'].append(intens_sum.item())  # .item() for dtype to pytype

            trap_metrics_MS1['RT'].append(spec.getRT())
            trap_metrics_MS1['traptime'].append(iontraptime)

            tic_tab['RT'].append(spec.getRT())
            tic_tab['int'].append(intens_sum)

        if (spec.getMSLevel() == 2):
            if (spec.getPrecursors()[0].getMZ() < min_mz):
                min_mz = spec.getPrecursors()[0].getMZ()
            if (spec.getPrecursors()[0].getMZ() > max_mz):
                max_mz = spec.getPrecursors()[0].getMZ()

            spectrum_acquisition_metrics_MS2['RT'].append(spec.getRT())
            spectrum_acquisition_metrics_MS2['SN'].append(noiseqc.getSN_medianmethod(spec))
            spectrum_acquisition_metrics_MS2['peakcount'].append(spec.size())
            spectrum_acquisition_metrics_MS2['int'].append(intens_sum.item())  # .item() for dtype to pytype
            spectrum_acquisition_metrics_MS2['native_id'].append(utils.spec_native_id(spec))

            rank = spin - last_surveyscan_index
            spectrum_acquisition_metrics_MS2['rank'].append(rank)

            trap_metrics_MS2['RT'].append(spec.getRT())
            trap_metrics_MS2['traptime'].append(iontraptime)
            trap_metrics_MS2['activation_method'].append(ams.get(next(iter(spec.getPrecursors()[0].getActivationMethods()), None),'unknown'))
            trap_metrics_MS2['activation_energy'].append(spec.getPrecursors()[0].getMetaValue('collision energy') if \
                spec.getPrecursors()[0].metaValueExists('collision energy') else -1)

            precursor_index = np.searchsorted(exp[last_surveyscan_index].get_peaks()[0], [exp[spin].getPrecursors()[0].getMZ()])[0]
            if precursor_index != np.array(exp[last_surveyscan_index].get_peaks()).shape[1]:
                precursor_err = spec.getPrecursors()[0].getMZ() - np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][0]
                precursor_int = np.array(exp[last_surveyscan_index].get_peaks())[:,precursor_index][1]
            else:
                precursor_err = np.nan
                precursor_int = np.nan

            tandem_spectrum_metrics_MS2['RT'].append(spec.getRT())
            tandem_spectrum_metrics_MS2['precursor_intensity'].append(precursor_int)  # TODO different from mzid->mzml getPrecursors[0].getIntensity() ? YES, latter one usually zero
            tandem_spectrum_metrics_MS2['precursor_error'].append(precursor_err)
            tandem_spectrum_metrics_MS2['precursor_mz'].append(spec.getPrecursors()[0].getMZ())
            tandem_spectrum_metrics_MS2['precursor_c'].append(spec.getPrecursors()[0].getCharge())

            tandem_spectrum_metrics_MS2['surveyscan_intensity_sum'].append(last_surveyscan_intensity)
            tandem_spectrum_metrics_MS2['surveyscan_intensity_max'].append(last_surveyscan_max)

            isolation_window_metrics['RT'].append(spec.getRT())
            isolation_window_metrics['isolation_target'].append(spec.getPrecursors()[0].getMZ())  # https://github.com/OpenMS/OpenMS/blob/d17cc251fd0c4068eb253b03c9fb107897771fdc/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp#L1992
            isolation_window_metrics['isolation_lower'].append(spec.getPrecursors()[0].getIsolationWindowLowerOffset())
            isolation_window_metrics['isolation_upper'].append(spec.getPrecursors()[0].getIsolationWindowUpperOffset())
            lower = spec.getPrecursors()[0].getMZ() - spec.getPrecursors()[0].getIsolationWindowLowerOffset()
            upper = spec.getPrecursors()[0].getMZ() + spec.getPrecursors()[0].getIsolationWindowUpperOffset()

            s = np.array([(i.getMZ(),i.getIntensity()) for i in exp[last_surveyscan_index]], ndmin = 2)
            s = s[np.where(np.logical_and(s[:, 0]>=lower, s[:, 0]<=upper))[0]]
            isolation_window_metrics['peaks_in_window'].append(np.shape(s)[0])

            int_sort_desc = np.flip(np.argsort(s[:,1]))
            if np.shape(s)[0] > 1:
                isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append(
                    s[int_sort_desc][:-1,1]/s[int_sort_desc][1:,1][0])  # intensity ratio between top1&2, 2&3, ...
            else:
                isolation_window_metrics['int_ratio_ranked_peaks_in_window'].append(0)  # bigger is better, though best is 0

            isolation_window_metrics['summed_window_intensity'].append(np.sum(s[int_sort_desc][:,1]))
            isolation_window_metrics['isolation_target_intensity'].append(spec.getPrecursors()[0].getIntensity())

            # TODO this needs to go outside
            tol = 0.5
            if spec.metaValueExists('filter string'):
                if 'FTMS' in spec.getMetaValue('filter string'):
                    tol = 0.05
                elif 'ITMS' in spec.getMetaValue('filter string'):
                    tol = 0.5
                elif 'QTOF' in spec.getMetaValue('filter string'):  #TOFMS, SQMS, TQMS, SectorMS
                    tol = 0.1

            # ms2 peaks directly from isolation window?
            unfragmented = np.any([np.isclose(i[0],[x.getMZ() for x in spec], atol=tol) for i in s])
            isolation_window_metrics['peaks_in_window_in_ms2'].append(str(unfragmented))

    ## Spectra detail numbers
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectrum acquisition metric values - MS1",
                value=spectrum_acquisition_metrics_MS1)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectrum acquisition metric values - MS2",
                value=spectrum_acquisition_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Spectra topn ranks",
                value=spectrum_topn)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Tandem spectrum metric values - MS2",
                value=tandem_spectrum_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Trap metric values - MS1",
                value=trap_metrics_MS1)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Trap metric values - MS2",
                value=trap_metrics_MS2)
    )
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="isolation window metrics",
                value=isolation_window_metrics)
    )

    ## Spectra numbers
    for levels in mslevelcounts.keys():
        metrics.append(
            mzqc.QualityMetric(cvRef="QC",
                    accession="QC:0000000",
                    name="Number of MS{l} spectra".format(l=str(levels)),
                    value=mslevelcounts[levels])
        )

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Number of chromatograms",
                value=len(exp.getChromatograms()))
    )

    ## Ranges
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="MZ aquisition range",
                value=[min_mz,max_mz])
    )

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="RT aquisition range",
                value=[exp[0].getRT(),exp[exp.size()-1].getRT()])
    )

    # TIC
    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Total ion current",
                value=tic_tab)
    )

    # Chrom
    chrom_tab: Dict[str,List[Any]] = defaultdict(list)
    chroms = exp.getChromatograms()
    for t in chroms:
      if t.getChromatogramType() == oms.ChromatogramSettings.ChromatogramType.TOTAL_ION_CURRENT_CHROMATOGRAM:
        for chro_peak in t:
            chrom_tab['RT'].append(chro_peak.getRT())
            chrom_tab['int'].append(chro_peak.getIntensity())
        break

    metrics.append(
        mzqc.QualityMetric(cvRef="QC",
                accession="QC:0000000",
                name="Chromatogram",
                value=chrom_tab)
    )
    # TODO is there a difference between TIC as defined in MS:1000235 and the chromatogram you get from TRP?? In MZML it says its a MS:1000235 (ion current detected in each of a series of mass spectra) but is it?
    # TODO consider collection of spectrum_native_id
    return mzqc.RunQuality(metadata=meta, qualityMetrics=metrics)