Beispiel #1
0
 def should_include(self, lookup: str, compound: ChemblCompound,
                    data: NestedDotDict, target: Target) -> bool:
     bad_flags = {
         "potential missing data",
         "potential transcription error",
         "outside typical range",
     }
     if (data.get_as("data_validity_comment",
                     lambda s: s.lower()) in bad_flags or data.req_as(
                         "standard_relation", str) not in ["=", "<", "<="]
             or data.req_as("assay_type", str) != "B"
             or data.get("target_tax_id") is None
             or data.get_as("target_tax_id", int) not in self.tax
             or data.get("pchembl_value") is None or
             data.req_as("pchembl_value", float) < self.config.min_pchembl):
         return False
     if data.get("data_validity_comment") is not None:
         logger.warning(
             f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
         )
     # The `target_organism` doesn't always match the `assay_organism`
     # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for h**o sapiens in xenopus laevis
     # However, it's often something like yeast expressing a human / mouse / etc receptor
     # So there's no need to filter by it
     assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
     confidence_score = assay.get("confidence_score")
     if confidence_score is None or confidence_score < self.config.min_confidence_score:
         return False
     if target.type.is_trash or target.type.is_strange and self.config.min_confidence_score > 3:
         logger.warning(f"Excluding {target} with type {target.type}")
         return False
     return True
Beispiel #2
0
 def load(cls, data: NestedDotDict) -> Settings:
     #  117571
     if IN_CLI:
         cache_path = (
             Path(__file__).parent.parent.parent / "tests" / "resources" / ".mandos-cache"
         )
     else:
         cache_path = Path.home() / ".mandos" / "chembl"
     return Settings(
         data.get_as("is_testing", bool, False),
         data.get_as("mandos.taxon", int, 7742),
         data.get_as("mandos.min_pchembl", float, 6.0),
         data.get_as("mandos.min_confidence_score", int, 4),
         data.get_as("mandos.min_phase", int, 3),
         data.get_as("chembl.cache_path", Path, cache_path),
         data.get_as("chembl.n_retries", int, 1),
         data.get_as("chembl.fast_save", bool, True),
         data.get_as("chembl.timeout_sec", int, 1),
     )