def should_include(self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target) -> bool: bad_flags = { "potential missing data", "potential transcription error", "outside typical range", } if (data.get_as("data_validity_comment", lambda s: s.lower()) in bad_flags or data.req_as( "standard_relation", str) not in ["=", "<", "<="] or data.req_as("assay_type", str) != "B" or data.get("target_tax_id") is None or data.get_as("target_tax_id", int) not in self.tax or data.get("pchembl_value") is None or data.req_as("pchembl_value", float) < self.config.min_pchembl): return False if data.get("data_validity_comment") is not None: logger.warning( f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)" ) # The `target_organism` doesn't always match the `assay_organism` # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis # However, it's often something like yeast expressing a human / mouse / etc receptor # So there's no need to filter by it assay = self.api.assay.get(data.req_as("assay_chembl_id", str)) confidence_score = assay.get("confidence_score") if confidence_score is None or confidence_score < self.config.min_confidence_score: return False if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3: logger.warning(f"Excluding {target} with type {target.type}") return False return True
def process(self, lookup: str, compound: ChemblCompound, indication: NestedDotDict) -> IndicationHit: """ Args: lookup: compound: indication: Returns: """ return IndicationHit( indication.req_as("drugind_id", str), compound.chid, compound.inchikey, lookup, compound.name, object_id=indication.req_as("mesh_id", str), object_name=indication.req_as("mesh_heading", str).strip("\n"), max_phase=indication.req_as("max_phase_for_ind", int), )
def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict: # we know these exist from the query organism = data.req_as("target_organism", str) tax_id = data.req_as("target_tax_id", int) tax = self.tax.req(tax_id) if organism != tax.name: logger.warning(f"Target organism {organism} is not {tax.name}") return NestedDotDict( dict( record_id=data.req_as("activity_id", str), compound_id=compound.chid, inchikey=compound.inchikey, compound_name=compound.name, compound_lookup=lookup, taxon_id=tax.id, taxon_name=tax.name, pchembl=data.req_as("pchembl_value", float), std_type=data.req_as("standard_type", str), src_id=data.req_as("src_id", str), exact_target_id=data.req_as("target_chembl_id", str), ))