def test_model_classification(): """Test model prediction function: classification""" fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"]) model = build_model.build_model_from_featureset(fset, model_type="RandomForestClassifier") preds = predict.model_predictions(fset, model) assert all(preds.name == fset.name) assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target))) assert preds.prediction.values.dtype == np.dtype("float") classes = predict.model_predictions(fset, model, return_probs=False) assert all(classes.name == fset.name) assert classes.prediction.values.shape == (len(fset.name),) assert isinstance(classes.prediction.values[0], (str, bytes))
def test_model_classification(): """Test model prediction function: classification""" fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2']) model = build_model.build_model_from_featureset( fset, model_type='RandomForestClassifier') preds = predict.model_predictions(fset, model) assert (all(preds.name == fset.name)) assert (preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))) assert (preds.prediction.values.dtype == np.dtype('float')) classes = predict.model_predictions(fset, model, return_probs=False) assert (all(classes.name == fset.name)) assert (classes.prediction.values.shape == (len(fset.name), )) assert (isinstance(classes.prediction.values[0], (str, bytes)))
def create_test_prediction(dataset, model): """Create and yield test prediction, then delete. Params ------ dataset : `models.Dataset` instance The dataset on which prediction will be performed. model : `models.Model` instance The model to use to create prediction. """ with xr.open_dataset(model.featureset.file.uri, engine=cfg['xr_engine']) as fset_data: model_data = joblib.load(model.file.uri) pred_data = predict.model_predictions(fset_data.load(), model_data) pred_path = pjoin(cfg['paths']['predictions_folder'], '{}.nc'.format(str(uuid.uuid4()))) pred_data.to_netcdf(pred_path, engine=cfg['xr_engine']) f, created = m.File.create_or_get(uri=pred_path) pred = m.Prediction.create(file=f, dataset=dataset, project=dataset.project, model=model, finished=datetime.datetime.now()) pred.save() try: yield pred finally: pred.delete_instance()
def create_test_prediction(dataset, model): """Create and yield test prediction, then delete. Params ------ dataset : `models.Dataset` instance The dataset on which prediction will be performed. model : `models.Model` instance The model to use to create prediction. """ with featureset.from_netcdf(model.featureset.file.uri, engine=cfg['xr_engine']) as fset_data: model_data = joblib.load(model.file.uri) pred_data = predict.model_predictions(fset_data.load(), model_data) pred_path = pjoin(cfg['paths']['predictions_folder'], '{}.nc'.format(str(uuid.uuid4()))) pred_data.to_netcdf(pred_path, engine=cfg['xr_engine']) f, created = m.File.create_or_get(uri=pred_path) pred = m.Prediction.create(file=f, dataset=dataset, project=dataset.project, model=model, finished=datetime.datetime.now()) pred.save() try: yield pred finally: pred.delete_instance()
def test_model_regression(): """Test model prediction function: regression""" fset = sample_featureset(10, 1, ["amplitude"], [0.1, 0.5]) model = build_model.build_model_from_featureset(fset, model_type="RandomForestRegressor") preds = predict.model_predictions(fset, model) assert all(preds.name == fset.name) assert preds.prediction.values.dtype == np.dtype("float")
def predict_featureset(fset, model, output_path=None): """TODO""" predset = predict.model_predictions(fset, model) if output_path: predset.to_netcdf(output_path) return predset
def test_model_regression(): """Test model prediction function: regression""" fset = sample_featureset(10, 1, ['amplitude'], [0.1, 0.5]) model = build_model.build_model_from_featureset( fset, model_type='RandomForestRegressor') preds = predict.model_predictions(fset, model) assert (all(preds.name == fset.name)) assert (preds.prediction.values.dtype == np.dtype('float'))
def test_model_predictions(): """Test inner model prediction function""" fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc")) model = build_model.build_model_from_featureset( fset, model_type='RandomForestClassifier') preds = predict.model_predictions(fset, model) assert(preds.shape[0] == len(fset.name)) assert(preds.shape[1] == len(np.unique(fset.target.values))) assert(preds.values.dtype == np.dtype('float'))
def test_model_regression(): """Test model prediction function: classification""" fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2']) fset.target.values = np.random.random(len(fset.target.values)) model = build_model.build_model_from_featureset( fset, model_type='RandomForestRegressor') preds = predict.model_predictions(fset, model) assert (all(preds.name == fset.name)) assert (preds.prediction.values.dtype == np.dtype('float'))
def test_predict_optimized_model(): """Test main predict function (classification) w/ optimized model""" fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"]) model = build_model.build_model_from_featureset( fset, model_type="RandomForestClassifier", params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2 ) preds = predict.model_predictions(fset, model) assert all(preds.name == fset.name) assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target))) assert preds.prediction.values.dtype == np.dtype("float")
def test_predict_optimized_model(): """Test main predict function (classification) w/ optimized model""" fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2']) model = build_model.build_model_from_featureset( fset, model_type='RandomForestClassifier', params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2) preds = predict.model_predictions(fset, model) assert (all(preds.name == fset.name)) assert (preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))) assert (preds.prediction.values.dtype == np.dtype('float'))
model_dwt = build_model_from_featureset( fset_dwt.isel(name=train), KNeighborsClassifier(), params_to_optimize=knn_param_grid ) ############################################################################### # Prediction # ---------- # Making predictions for new time series based on these models follows the same # pattern: first the time series are featurized using ``featurize_time_series``, # and then predictions are made based on these features using # ``predict.model_predictions``. from sklearn.metrics import accuracy_score from cesium.predict import model_predictions preds_cesium = model_predictions(fset_cesium, model_cesium, return_probs=False) preds_guo = model_predictions(fset_guo, model_guo, return_probs=False) preds_dwt = model_predictions(fset_dwt, model_dwt, return_probs=False) print( "Built-in cesium features: training accuracy={:.2%}, test accuracy={:.2%}".format( accuracy_score(preds_cesium.prediction.values[train], eeg["classes"][train]), accuracy_score(preds_cesium.prediction.values[test], eeg["classes"][test]), ) ) print( "Guo et al. features: training accuracy={:.2%}, test accuracy={:.2%}".format( accuracy_score(preds_guo.prediction.values[train], eeg["classes"][train]), accuracy_score(preds_guo.prediction.values[test], eeg["classes"][test]), ) )
model_dwt = build_model_from_featureset(fset_dwt.isel(name=train), KNeighborsClassifier(), params_to_optimize=knn_param_grid) ############################################################################### # Prediction # ---------- # Making predictions for new time series based on these models follows the same # pattern: first the time series are featurized using ``featurize_time_series``, # and then predictions are made based on these features using # ``predict.model_predictions``. from sklearn.metrics import accuracy_score from cesium.predict import model_predictions preds_cesium = model_predictions(fset_cesium, model_cesium, return_probs=False) preds_guo = model_predictions(fset_guo, model_guo, return_probs=False) preds_dwt = model_predictions(fset_dwt, model_dwt, return_probs=False) print("Built-in cesium features: training accuracy={:.2%}, test accuracy={:.2%}".format( accuracy_score(preds_cesium.prediction.values[train], eeg["classes"][train]), accuracy_score(preds_cesium.prediction.values[test], eeg["classes"][test]))) print("Guo et al. features: training accuracy={:.2%}, test accuracy={:.2%}".format( accuracy_score(preds_guo.prediction.values[train], eeg["classes"][train]), accuracy_score(preds_guo.prediction.values[test], eeg["classes"][test]))) print("Wavelet transform features: training accuracy={:.2%}, test accuracy={:.2%}".format( accuracy_score(preds_dwt.prediction.values[train], eeg["classes"][train]), accuracy_score(preds_dwt.prediction.values[test], eeg["classes"][test]))) ############################################################################### # The workflow presented here is intentionally simplistic and omits many important steps