def should_include(self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target) -> bool: bad_flags = { "potential missing data", "potential transcription error", "outside typical range", } if (data.get_as("data_validity_comment", lambda s: s.lower()) in bad_flags or data.req_as( "standard_relation", str) not in ["=", "<", "<="] or data.req_as("assay_type", str) != "B" or data.get("target_tax_id") is None or data.get_as("target_tax_id", int) not in self.tax or data.get("pchembl_value") is None or data.req_as("pchembl_value", float) < self.config.min_pchembl): return False if data.get("data_validity_comment") is not None: logger.warning( f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)" ) # The `target_organism` doesn't always match the `assay_organism` # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis # However, it's often something like yeast expressing a human / mouse / etc receptor # So there's no need to filter by it assay = self.api.assay.get(data.req_as("assay_chembl_id", str)) confidence_score = assay.get("confidence_score") if confidence_score is None or confidence_score < self.config.min_confidence_score: return False if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3: logger.warning(f"Excluding {target} with type {target.type}") return False return True
def load(cls, data: NestedDotDict) -> Settings: # 117571 if IN_CLI: cache_path = ( Path(__file__).parent.parent.parent / "tests" / "resources" / ".mandos-cache" ) else: cache_path = Path.home() / ".mandos" / "chembl" return Settings( data.get_as("is_testing", bool, False), data.get_as("mandos.taxon", int, 7742), data.get_as("mandos.min_pchembl", float, 6.0), data.get_as("mandos.min_confidence_score", int, 4), data.get_as("mandos.min_phase", int, 3), data.get_as("chembl.cache_path", Path, cache_path), data.get_as("chembl.n_retries", int, 1), data.get_as("chembl.fast_save", bool, True), data.get_as("chembl.timeout_sec", int, 1), )