tox_model = SingletaskToMultitask(tox_tasks, tox_task_types, params_dict, tox_model_dir, model_builder, verbosity=verbosity) tox_model.reload() """ Load sider models now """ base_sider_data_dir = "/home/apappu/deepchem-models/toxcast_models/sider/sider_data" sider_tasks, sider_dataset, sider_transformers = load_sider( base_sider_data_dir, reload=reload) base_sider_dir = "/home/apappu/deepchem-models/toxcast_models/sider/sider_analysis" sider_train_dir = os.path.join(base_sider_dir, "train_dataset") sider_valid_dir = os.path.join(base_sider_dir, "valid_dataset") sider_test_dir = os.path.join(base_sider_dir, "test_dataset") sider_model_dir = os.path.join(base_sider_dir, "model") sider_splitter = RandomSplitter() sider_train_dataset, sider_valid_dataset, sider_test_dataset = sider_splitter.train_valid_test_split( sider_dataset, sider_train_dir, sider_valid_dir, sider_test_dir) # Fit Logistic Regression models sider_task_types = {task: "classification" for task in sider_tasks}
""" Script that trains Sklearn multitask models on the sider dataset @Author Bharath Ramsundar, Aneesh Pappu """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import shutil import numpy as np import deepchem as dc from sider_datasets import load_sider from sklearn.ensemble import RandomForestClassifier sider_tasks, datasets, transformers = load_sider() train_dataset, valid_dataset, test_dataset = datasets metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") def model_builder(model_dir): sklearn_model = RandomForestClassifier(class_weight="balanced", n_estimators=100) return dc.models.SklearnModel(sklearn_model, model_dir) model = dc.models.SingletaskToMultitask(sider_tasks, model_builder)
""" Script that trains Sklearn multitask models on the sider dataset @Author Bharath Ramsundar, Aneesh Pappu """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import shutil import numpy as np import deepchem as dc from sider_datasets import load_sider from sklearn.ensemble import RandomForestClassifier sider_tasks, datasets, transformers = load_sider() train_dataset, valid_dataset, test_dataset = datasets metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") def model_builder(model_dir): sklearn_model = RandomForestClassifier(class_weight="balanced", n_estimators=100) return dc.models.SklearnModel(sklearn_model, model_dir) model = dc.models.SingletaskToMultitask(sider_tasks, model_builder) # Fit trained model model.fit(train_dataset) model.save()
from deepchem.datasets import Dataset from deepchem import metrics from deepchem.metrics import Metric from deepchem.utils.evaluate import Evaluator from deepchem.models.keras_models.fcnet import MultiTaskDNN from deepchem.models.keras_models import KerasModel # Set some global variables up top np.random.seed(123) reload = True verbosity = "high" model = "logistic" base_data_dir = "/tmp/sider_keras" sider_tasks, dataset, transformers = load_sider( base_data_dir, reload=reload) print("len(dataset)") print(len(dataset)) base_dir = "/tmp/sider_analysis" model_dir = os.path.join(base_dir, "model") if os.path.exists(base_dir): shutil.rmtree(base_dir) os.makedirs(base_dir) # Load SIDER data sider_tasks, sider_datasets, transformers = load_sider( base_dir, reload=reload) train_dataset, valid_dataset = sider_datasets n_features = 1024