def should_include(self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target) -> bool: bad_flags = { "potential missing data", "potential transcription error", "outside typical range", } if (data.get_as("data_validity_comment", lambda s: s.lower()) in bad_flags or data.req_as( "standard_relation", str) not in ["=", "<", "<="] or data.req_as("assay_type", str) != "B" or data.get("target_tax_id") is None or data.get_as("target_tax_id", int) not in self.tax or data.get("pchembl_value") is None or data.req_as("pchembl_value", float) < self.config.min_pchembl): return False if data.get("data_validity_comment") is not None: logger.warning( f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)" ) # The `target_organism` doesn't always match the `assay_organism` # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis # However, it's often something like yeast expressing a human / mouse / etc receptor # So there's no need to filter by it assay = self.api.assay.get(data.req_as("assay_chembl_id", str)) confidence_score = assay.get("confidence_score") if confidence_score is None or confidence_score < self.config.min_confidence_score: return False if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3: logger.warning(f"Excluding {target} with type {target.type}") return False return True
def get_compound_dot_dict(self, inchikey: str) -> NestedDotDict: """ Fetches info and put into a dict. Args: inchikey: Returns: **Only** ``molecule_chembl_id``, ``pref_name``, "and ``molecule_structures`` are guaranteed to exist """ # CHEMBL kind = self.get_query_type(inchikey) if kind == QueryType.smiles: results = list( self.api.molecule.filter( molecule_structures__canonical_smiles__flexmatch=inchikey). only( ["molecule_chembl_id", "pref_name", "molecule_structures"])) assert len(results) == 1, f"{len(results)} matches for {inchikey}" result = results[0] else: result = self.api.molecule.get(inchikey) if result is None: raise ValueError(f"Result for compound {inchikey} is null!") ch = NestedDotDict(result) # molecule_hierarchy can have the actual value None if ch.get("molecule_hierarchy") is not None: parent = ch["molecule_hierarchy"]["parent_chembl_id"] if parent != ch["molecule_chembl_id"]: ch = NestedDotDict(self.api.molecule.get(parent)) return ch
def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]: """ Args: lookup: compound: data: Returns: """ if data.get("target_chembl_id") is None: logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}") return [] chembl_id = data["target_chembl_id"] target_obj = TargetFactory.find(chembl_id, self.api) if not self.should_include(lookup, compound, data, target_obj): return [] # traverse() will return the source target if it's a non-traversable type (like DNA) # and the subclass decided whether to filter those # so don't worry about that here ancestors = self.traversal_strategy(target_obj) lst = [] for ancestor in ancestors: lst.extend(self.to_hit(lookup, compound, data, ancestor)) return lst
def find(cls, chembl: str) -> Target: """ Args: chembl: Returns: """ targets = cls.api().target.filter(target_chembl_id=chembl) assert len(targets) == 1, f"Found {len(targets)} targets for {chembl}" target = NestedDotDict(targets[0]) return cls( chembl=target["target_chembl_id"], name=target.get("pref_name"), type=TargetType.of(target["target_type"]), )
def _process(self, match: ProteinHit, target: NestedDotDict) -> Sequence[GoHit]: terms = set() if target.get("target_components") is not None: for comp in target["target_components"]: if comp.get("target_component_xrefs") is not None: for xref in comp["target_component_xrefs"]: if xref["xref_src_db"] == f"Go{self.go_type.name.capitalize()}": terms.add((xref["xref_id"], xref["xref_name"])) hits = [] for xref_id, xref_name in terms: hits.append( GoHit( None, compound_id=match.compound_id, inchikey=match.inchikey, compound_lookup=match.compound_lookup, compound_name=match.compound_name, object_id=xref_id, object_name=xref_name, go_type=self.go_type.name, protein_hit=match, )) return hits