def test_create_model_wrapper(): """ Args: params (Namespace) : Parameters passed to the model pipeline featurizer (Featurization): Object managing the featurization of compounds ds_client (DatastoreClient): Interface to the file datastore Returns: model (pipeline.Model): Wrapper for DeepChem, sklearn or other model. Raises: ValueError: Only params.model_type = 'NN' or 'RF' is supported. Dependencies: None Calls: DCNNModelWrapper, DCRFModelWrapper """ inp_params = parse.wrapper(general_params) featurization = feat.create_featurization(inp_params) mdl = model_wrapper.create_model_wrapper(inp_params, featurization) mdl.setup_model_dirs() # testing for correct attribute initialization with model_type == "NN" test = [] test.append(mdl.params.model_type == 'NN') test.append(isinstance(mdl.featurization, feat.DynamicFeaturization)) test.append(mdl.output_dir == inp_params.output_dir) test.append(mdl.model_dir == inp_params.output_dir + '/' + 'model') test.append(mdl.best_model_dir == inp_params.output_dir + '/' + 'best_model') test.append(mdl.baseline_model_dir == inp_params.output_dir + '/' + 'baseline_epoch_model') test.append(mdl.transformers == []) test.append(mdl.transformers_x == []) test.append(isinstance(mdl, model_wrapper.DCNNModelWrapper)) # testing for correct attribute initialization with model_type == "RF" temp_params = copy.deepcopy(inp_params) temp_params.model_type = 'RF' featurization = feat.create_featurization(temp_params) mdl_RF = model_wrapper.create_model_wrapper(temp_params, featurization) test.append(isinstance(mdl_RF, MP.model_wrapper.DCRFModelWrapper)) test.append(mdl_RF.params.model_type == 'RF') # assertion for all tests assert all(test) #testing for Exception with model_type not in ['NN','RF'] with pytest.raises(ValueError): temp_params.model_type = 'wrong' mdl_wrong = model_wrapper.create_model_wrapper(temp_params, featurization)
def test_super_transform_dataset(): """ Args: dataset: The DeepChem DiskDataset that contains a dataset Returns: transformed_dataset Raises: None Dependencies: model_dataset.create_transformers Calls: None """ #set up for a model wrapper with regression and NN. inp_params = parse.wrapper(general_params) featurization = feat.create_featurization(inp_params) data_obj_ecfp = model_dataset.create_model_dataset(inp_params, featurization, ds_client=None) df_delaney = data_obj_ecfp.load_full_dataset() data_obj_ecfp.get_dataset_tasks(df_delaney) data_obj_ecfp.check_task_columns(df_delaney) data_obj_ecfp.get_featurized_data() mdl = model_wrapper.create_model_wrapper(inp_params, data_obj_ecfp.featurization) mdl.setup_model_dirs() mdl.create_transformers(data_obj_ecfp) dataset = mdl.transform_dataset(data_obj_ecfp.dataset) test = [] # checking that the dataset is the correct type test.append(isinstance(dataset, DD)) # since this is not descriptor featurization, the X values for the datasets should be the same test.append((dataset.X == data_obj_ecfp.dataset.X).all()) # and the response values should be the same length: test.append(len(dataset.y) == len(data_obj_ecfp.dataset.y)) test.append(len(dataset.y) == len(dataset.ids)) assert all(test)
def test_train_NN_graphconv_scaffold_inputs(): """ Args: pipeline (ModelPipeline): The ModelPipeline instance for this model run. Dependencies: ModelPipeline creation featurization creation creation of model_wrapper mp.load_featurize_data Calls: create_perf_data perf_data.accumulate_preds perf_data.comput_perf_metrics data.combined_training-data() self._copy_model """ # checking that the layers, dropouts, and learning rate are properly added to the deepchem graphconv model general_params['featurizer'] = 'graphconv' general_params['layer_sizes'] = '100,100,10' general_params['dropouts'] = '0.3,0.3,0.1' general_params['uncertainty'] = False inp_params = parse.wrapper(general_params) mp = MP.ModelPipeline(inp_params) mp.featurization = feat.create_featurization(inp_params) mp.model_wrapper = model_wrapper.create_model_wrapper( inp_params, mp.featurization, mp.ds_client) # asserting that the correct model is created with the correct layer sizes, dropouts, model_dir, and mode by default test1 = [] test1.append(mp.model_wrapper.params.layer_sizes == [100, 100, 10]) test1.append(mp.model_wrapper.params.dropouts == [0.3, 0.3, 0.1]) # checking that parameters are properly passed to the deepchem model object test1.append(isinstance(mp.model_wrapper.model, GraphConvModel)) test1.append( mp.model_wrapper.model.model_dir == mp.model_wrapper.model_dir) test1.append( [i.out_channel for i in mp.model_wrapper.model.model.graph_convs] == [100, 100]) test1.append( [i.rate for i in mp.model_wrapper.model.model.dropouts] == [0.3, 0.3, 0.1]) test1.append(mp.model_wrapper.model.mode == 'regression') test1.append(mp.model_wrapper.model.model.dense.units == 10) assert all(test1) #*********************************************************************************** def test_super_get_train_valid_pred_results(): """ Args: perf_data: A PerfData object that stores the predicted values and metrics Returns: dict: A dictionary of the prediction results Raises: None Dependencies: create_perf_data Calls: perf_data.get_prediction_results() """ pass # should be tested in perf_data.get_prediction_results() # should still be called to make sure that the function is callable #*********************************************************************************** def test_super_get_test_perf_data(): """ Args: model_dir (str): Directory where the saved model is stored model_dataset (DiskDataset): Stores the current dataset and related methods Returns: perf_data: PerfData object containing the predicted values and metrics for the current test dataset Raises: None Dependencies: A model must be in model_dir model_dataset.test_dset must exist Calls: create_perf_data self.generate_predictions perf_data.accumulate_preds """ pass # mostly tested in accumulate_preds, but should be tested to ensure taht the predictions are properly being called #*********************************************************************************** def test_super_get_test_pred_results(): """ Args: model_dir (str): Directory where the saved model is stored model_dataset (DiskDataset): Stores the current dataset and related methods Returns: dict: A dictionary containing the prediction values and metrics for the current dataset. Raises: None Dependencies: A model must be in model_dir model_dataset.test_dset must exist Calls: self.get_test_perf_data perf_data.get_prediction_results """ pass #mostly tested in perf_data.get_prediction_results #*********************************************************************************** def test_super_get_full_dataset_perf_data(): """ Args: model_dataset (DiskDataset): Stores the current dataset and related methods Returns: perf_data: PerfData object containing the predicted values and metrics for the current full dataset Raises: None Dependencies: A model must already be trained Calls: create_perf_data self.generate_predictions self.accumulate_preds """ pass #*********************************************************************************** def test_super_get_full_dataset_pred_results(): """ Args: model_dataset (DiskDataset): Stores the current dataset and related methods Returns: dict: A dictionary containing predicted values and metrics for the current full dataset Raises: None Dependencies: A model was already be trained. Calls: get_full_dataset_perf_data self.get_prediction_results() """ pass
def test_super_create_transformers(): """ Args: model_dataset: The ModelDataset object that handles the current dataset Returns: self.transformers self.transformers_x self.params.transformer_key self.params.transformer_oid (if datastore) Raises: Exception when failing to save to the datastore Dependencies: create_featurization create_model_dataset model_dataset.load_full_dataset model_dataset.get_dataset_tasks model_dataset.check_task_columns model_dataset.get_featurized_data Requires (self.params.prediction_type == 'regression' and self.params.transformers == True) or len(self.transformers) > 0 Calls: self.featurization.create_feature_transformer dsf.upload_pickle_to_DS """ #set up for a model wrapper with regression and NN. inp_params = parse.wrapper(general_params) featurization = feat.create_featurization(inp_params) data_obj_ecfp = model_dataset.create_model_dataset(inp_params, featurization, ds_client=None) df_delaney = data_obj_ecfp.load_full_dataset() data_obj_ecfp.get_dataset_tasks(df_delaney) data_obj_ecfp.check_task_columns(df_delaney) data_obj_ecfp.get_featurized_data() mdl = model_wrapper.create_model_wrapper(inp_params, data_obj_ecfp.featurization) mdl.setup_model_dirs() #testing correct model_wrapper build with regression and NN test = [] test.append(mdl.params.prediction_type == 'regression') test.append(mdl.params.model_type == 'NN') mdl.create_transformers(data_obj_ecfp) test.append( isinstance(mdl.transformers[0], dc.trans.transformers.NormalizationTransformer)) test.append(mdl.transformers_x == []) #testing saving of transformer to correct location: transformer_path = os.path.join(mdl.output_dir, 'transformers.pkl') test.append(os.path.isfile(transformer_path)) # TODO: test proper saving of the transformer to the datastore # TODO: test when transformers is False: inp_params.prediction_type = 'classification' mdl = model_wrapper.create_model_wrapper(inp_params, featurization) test.append(mdl.transformers == []) test.append(mdl.transformers_x == []) assert all(test)