Example #1
0
 def to_df_by_assay(self, assay: str) -> pd.DataFrame:
     raw = cache("raw",
                 Path("tox21") / f"{assay}.zip",
                 lambda: self.download(assay))
     raw_fd = BytesIO(raw)
     with ZipFile(raw_fd) as zip_fd:
         for inner_filename in zip_fd.namelist():
             if inner_filename.endswith("aggregrated.txt"):
                 with zip_fd.open(inner_filename) as inner_fd:
                     return pd.read_csv(inner_fd, sep="\t", index_col=False)
Example #2
0
    def __init__(self, mode="bow"):
        model_file = f"{self.filename}_{mode}.pkz"

        if mode == "default":
            self.model = cache("model", model_file, self.train)
        elif mode == "bow":
            self.model = cache(
                "model",
                model_file,
                lambda: self.train(
                    ChemicalLanguageHyperparameters.from_dict({
                        "vector_algo": "bow",
                        "max_vocab": 5000,
                        "max_ngram": 4
                    })),
            )
        elif mode == "lda":
            self.model = cache(
                "model",
                model_file,
                lambda: self.train(
                    ChemicalLanguageHyperparameters.from_dict({
                        "vector_algo": "lda",
                        "topics": 1000
                    })),
            )
        elif mode == "doc2vec":
            self.model = cache(
                "model",
                model_file,
                lambda: self.train(
                    ChemicalLanguageHyperparameters.from_dict({
                        "doc_epochs": 300,
                        "vec_dims": 512
                    })),
            )
        else:
            raise ValueError("Invalid mode: " + mode)
Example #3
0
    def __init__(self, mode="prod"):
        model_file = f"{self.filename}_{mode}.pkz"

        if "doc2vec" in mode:
            self.preprocessor = ChemicalLanguage("doc2vec")
        elif "lda" in mode:
            self.preprocessor = ChemicalLanguage("lda")
        else:
            self.preprocessor = ChemicalLanguage("bow")
        if "test" in mode:
            self.safety, self.feasibility, self.bbbp = self.train(
                score=True, task_duration=12000
            )
        else:
            self.safety, self.feasibility, self.bbbp = cache(
                "model", model_file, self.train
            )
Example #4
0
 def __init__(self):
     self.preprocessor = ChemicalLanguage("bow")
     self.model = cache(MODEL_ROOT / self.filename, self.train)
Example #5
0
 def to_df(self) -> pd.DataFrame:
     name = self.filename + ".parquet"
     return cache("raw", name, self.download)
Example #6
0
 def to_df(self) -> pd.DataFrame:
     name = self.filename + ".parquet"
     return cache("constructed", name, self.construct)