Example #1
0
 def should_include(self, lookup: str, compound: ChemblCompound,
                    data: NestedDotDict, target: Target) -> bool:
     bad_flags = {
         "potential missing data",
         "potential transcription error",
         "outside typical range",
     }
     if (data.get_as("data_validity_comment",
                     lambda s: s.lower()) in bad_flags or data.req_as(
                         "standard_relation", str) not in ["=", "<", "<="]
             or data.req_as("assay_type", str) != "B"
             or data.get("target_tax_id") is None
             or data.get_as("target_tax_id", int) not in self.tax
             or data.get("pchembl_value") is None or
             data.req_as("pchembl_value", float) < self.config.min_pchembl):
         return False
     if data.get("data_validity_comment") is not None:
         logger.warning(
             f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
         )
     # The `target_organism` doesn't always match the `assay_organism`
     # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis
     # However, it's often something like yeast expressing a human / mouse / etc receptor
     # So there's no need to filter by it
     assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
     confidence_score = assay.get("confidence_score")
     if confidence_score is None or confidence_score < self.config.min_confidence_score:
         return False
     if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3:
         logger.warning(f"Excluding {target} with type {target.type}")
         return False
     return True
Example #2
0
    def get_compound_dot_dict(self, inchikey: str) -> NestedDotDict:
        """
        Fetches info and put into a dict.

        Args:
            inchikey:

        Returns:
            **Only** ``molecule_chembl_id``, ``pref_name``, "and ``molecule_structures`` are guaranteed to exist
        """
        # CHEMBL
        kind = self.get_query_type(inchikey)
        if kind == QueryType.smiles:
            results = list(
                self.api.molecule.filter(
                    molecule_structures__canonical_smiles__flexmatch=inchikey).
                only(
                    ["molecule_chembl_id", "pref_name",
                     "molecule_structures"]))
            assert len(results) == 1, f"{len(results)} matches for {inchikey}"
            result = results[0]
        else:
            result = self.api.molecule.get(inchikey)
        if result is None:
            raise ValueError(f"Result for compound {inchikey} is null!")
        ch = NestedDotDict(result)
        # molecule_hierarchy can have the actual value None
        if ch.get("molecule_hierarchy") is not None:
            parent = ch["molecule_hierarchy"]["parent_chembl_id"]
            if parent != ch["molecule_chembl_id"]:
                ch = NestedDotDict(self.api.molecule.get(parent))
        return ch
Example #3
0
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
        """

        Args:
            lookup:
            compound:
            data:

        Returns:

        """
        if data.get("target_chembl_id") is None:
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
            return []
        chembl_id = data["target_chembl_id"]
        target_obj = TargetFactory.find(chembl_id, self.api)
        if not self.should_include(lookup, compound, data, target_obj):
            return []
        # traverse() will return the source target if it's a non-traversable type (like DNA)
        # and the subclass decided whether to filter those
        # so don't worry about that here
        ancestors = self.traversal_strategy(target_obj)
        lst = []
        for ancestor in ancestors:
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
        return lst
Example #4
0
    def find(cls, chembl: str) -> Target:
        """

        Args:
            chembl:

        Returns:

        """
        targets = cls.api().target.filter(target_chembl_id=chembl)
        assert len(targets) == 1, f"Found {len(targets)} targets for {chembl}"
        target = NestedDotDict(targets[0])
        return cls(
            chembl=target["target_chembl_id"],
            name=target.get("pref_name"),
            type=TargetType.of(target["target_type"]),
        )
Example #5
0
 def _process(self, match: ProteinHit,
              target: NestedDotDict) -> Sequence[GoHit]:
     terms = set()
     if target.get("target_components") is not None:
         for comp in target["target_components"]:
             if comp.get("target_component_xrefs") is not None:
                 for xref in comp["target_component_xrefs"]:
                     if xref["xref_src_db"] == f"Go{self.go_type.name.capitalize()}":
                         terms.add((xref["xref_id"], xref["xref_name"]))
     hits = []
     for xref_id, xref_name in terms:
         hits.append(
             GoHit(
                 None,
                 compound_id=match.compound_id,
                 inchikey=match.inchikey,
                 compound_lookup=match.compound_lookup,
                 compound_name=match.compound_name,
                 object_id=xref_id,
                 object_name=xref_name,
                 go_type=self.go_type.name,
                 protein_hit=match,
             ))
     return hits