Ejemplo n.º 1
0
 def __call__(self, spectrum: SpectrumType,
              reference_spectrum: SpectrumType) -> bool:
     """Compare parent masses"""
     parentmass = spectrum.get("parent_mass")
     parentmass_ref = reference_spectrum.get("parent_mass")
     assert parentmass is not None and parentmass_ref is not None, "Missing parent mass."
     return abs(parentmass - parentmass_ref) <= self.tolerance
Ejemplo n.º 2
0
    def pair(self, reference: SpectrumType, query: SpectrumType) -> float:
        """Compare precursor m/z between reference and query spectrum.

        Parameters
        ----------
        reference
            Single reference spectrum.
        query
            Single query spectrum.
        """
        entry_ref = reference.get(self.field)
        entry_query = query.get(self.field)
        if entry_ref is None or entry_query is None:
            return np.asarray(False, dtype=self.score_datatype)

        if self.matching_type == "equal_match":
            score = (entry_ref == entry_query)
            return np.asarray(score, dtype=self.score_datatype)

        if isinstance(entry_ref,
                      (int, float)) and isinstance(entry_query, (int, float)):
            score = abs(entry_ref - entry_query) <= self.tolerance
            return np.asarray(score, dtype=self.score_datatype)

        logger.warning(
            "Non-numerical entry not compatible with 'difference' method")
        return np.asarray(False, dtype=self.score_datatype)
Ejemplo n.º 3
0
    def pair(self, reference: SpectrumType, query: SpectrumType) -> float:
        """Compare parent masses between reference and query spectrum.

        Parameters
        ----------
        reference
            Single reference spectrum.
        query
            Single query spectrum.
        """
        parentmass_ref = reference.get("parent_mass")
        parentmass_query = query.get("parent_mass")
        assert parentmass_ref is not None and parentmass_query is not None, "Missing parent mass."

        return abs(parentmass_ref - parentmass_query) <= self.tolerance
Ejemplo n.º 4
0
    def pair(self, reference: SpectrumType, query: SpectrumType) -> float:
        """Compare precursor m/z between reference and query spectrum.

        Parameters
        ----------
        reference
            Single reference spectrum.
        query
            Single query spectrum.
        """
        precursormz_ref = reference.get("precursor_mz")
        precursormz_query = query.get("precursor_mz")
        assert precursormz_ref is not None and precursormz_query is not None, "Missing precursor m/z."

        if self.type == "Dalton":
            return abs(precursormz_ref - precursormz_query) <= self.tolerance

        mean_mz = (precursormz_ref + precursormz_query) / 2
        score = abs(precursormz_ref -
                    precursormz_query) / mean_mz <= self.tolerance
        return numpy.asarray(score, dtype=self.score_datatype)
Ejemplo n.º 5
0
    def pair(self, reference: SpectrumType, query: SpectrumType) -> float:
        """Calculate fingerprint based similarity score between two spectra.

        Parameters
        ----------
        reference
            Single reference spectrum.
        query
            Single query spectrum.
        """
        fingerprint_ref = reference.get("fingerprint")
        fingerprint_query = query.get("fingerprint")
        if self.similarity_measure == "jaccard":
            return jaccard_index(fingerprint_ref, fingerprint_query)

        if self.similarity_measure == "dice":
            return dice_similarity(fingerprint_ref, fingerprint_query)

        if self.similarity_measure == "cosine":
            return cosine_similarity(fingerprint_ref, fingerprint_query)

        raise NotImplementedError
Ejemplo n.º 6
0
    def get_tanimoto_for_spectrum_ids(
            self, query_spectrum: SpectrumType,
            spectra_ids_list: List[str]) -> pd.DataFrame:
        """Returns a dataframe with tanimoto scores

        Spectra in spectra_ids_list without inchikey are removed.
        Args:
        ------
        query_spectrum:
            Single Spectrum, the tanimoto scores are calculated between this
            spectrum and the spectra in match_spectrum_ids.
        match_spectrum_ids:
            list of spectrum_ids, which are preselected matches of the
            query_spectrum
        """
        query_inchikey14 = query_spectrum.get("inchikey")[:14]
        assert len(query_inchikey14) == 14, \
            f"Expected inchikey of length 14, " \
            f"got inchikey = {query_inchikey14}"

        # Get inchikeys belonging to spectra ids
        metadata_dict = get_metadata_from_sqlite(
            self.sqlite_file_name, spectra_ids_list,
            self.settings["spectrum_id_column_name"])
        unfiltered_inchikeys = [
            metadata_dict[spectrum_id]["inchikey"]
            for spectrum_id in spectra_ids_list
        ]

        inchikey14s_dict = {}
        for i, inchikey in enumerate(unfiltered_inchikeys):
            # Only get the first 14 characters of the inchikeys
            inchikey14 = inchikey[:14]
            spectrum_id = spectra_ids_list[i]
            # Don't save spectra that do not have an inchikey. If a spectra has
            # no inchikey it is stored as "", so it will not be stored.
            if len(inchikey14) == 14:
                inchikey14s_dict[spectrum_id] = inchikey14

        tanimoto_scores_spectra_ids = pd.DataFrame(
            columns=["Tanimoto_score"], index=list(inchikey14s_dict.keys()))
        for spectrum_id, inchikey14 in inchikey14s_dict.items():
            tanimoto_score = self.tanimoto_scores.loc[inchikey14,
                                                      query_inchikey14]
            tanimoto_scores_spectra_ids.at[spectrum_id, "Tanimoto_score"] = \
                tanimoto_score
        return tanimoto_scores_spectra_ids