Example #1
0
def test_model_classification():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"])
    model = build_model.build_model_from_featureset(fset, model_type="RandomForestClassifier")
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))
    assert preds.prediction.values.dtype == np.dtype("float")

    classes = predict.model_predictions(fset, model, return_probs=False)
    assert all(classes.name == fset.name)
    assert classes.prediction.values.shape == (len(fset.name),)
    assert isinstance(classes.prediction.values[0], (str, bytes))
Example #2
0
def test_model_classification():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestClassifier')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.shape == (len(fset.name),
                                              len(np.unique(fset.target))))
    assert (preds.prediction.values.dtype == np.dtype('float'))

    classes = predict.model_predictions(fset, model, return_probs=False)
    assert (all(classes.name == fset.name))
    assert (classes.prediction.values.shape == (len(fset.name), ))
    assert (isinstance(classes.prediction.values[0], (str, bytes)))
Example #3
0
def create_test_prediction(dataset, model):
    """Create and yield test prediction, then delete.

    Params
    ------
    dataset : `models.Dataset` instance
        The dataset on which prediction will be performed.
    model  : `models.Model` instance
        The model to use to create prediction.

    """
    with xr.open_dataset(model.featureset.file.uri, engine=cfg['xr_engine']) as fset_data:
        model_data = joblib.load(model.file.uri)
        pred_data = predict.model_predictions(fset_data.load(), model_data)
    pred_path = pjoin(cfg['paths']['predictions_folder'],
                      '{}.nc'.format(str(uuid.uuid4())))
    pred_data.to_netcdf(pred_path, engine=cfg['xr_engine'])
    f, created = m.File.create_or_get(uri=pred_path)
    pred = m.Prediction.create(file=f, dataset=dataset, project=dataset.project,
                               model=model, finished=datetime.datetime.now())
    pred.save()
    try:
        yield pred
    finally:
        pred.delete_instance()
Example #4
0
def create_test_prediction(dataset, model):
    """Create and yield test prediction, then delete.

    Params
    ------
    dataset : `models.Dataset` instance
        The dataset on which prediction will be performed.
    model  : `models.Model` instance
        The model to use to create prediction.

    """
    with featureset.from_netcdf(model.featureset.file.uri, engine=cfg['xr_engine']) as fset_data:
        model_data = joblib.load(model.file.uri)
        pred_data = predict.model_predictions(fset_data.load(), model_data)
    pred_path = pjoin(cfg['paths']['predictions_folder'],
                      '{}.nc'.format(str(uuid.uuid4())))
    pred_data.to_netcdf(pred_path, engine=cfg['xr_engine'])
    f, created = m.File.create_or_get(uri=pred_path)
    pred = m.Prediction.create(file=f, dataset=dataset, project=dataset.project,
                               model=model, finished=datetime.datetime.now())
    pred.save()
    try:
        yield pred
    finally:
        pred.delete_instance()
Example #5
0
def test_model_regression():
    """Test model prediction function: regression"""
    fset = sample_featureset(10, 1, ["amplitude"], [0.1, 0.5])
    model = build_model.build_model_from_featureset(fset, model_type="RandomForestRegressor")
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.dtype == np.dtype("float")
Example #6
0
def predict_featureset(fset, model, output_path=None):
    """TODO"""
    predset = predict.model_predictions(fset, model)
    if output_path:
        predset.to_netcdf(output_path)

    return predset
Example #7
0
def test_model_regression():
    """Test model prediction function: regression"""
    fset = sample_featureset(10, 1, ['amplitude'], [0.1, 0.5])
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestRegressor')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.dtype == np.dtype('float'))
Example #8
0
def test_model_predictions():
    """Test inner model prediction function"""
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc"))
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestClassifier')
    preds = predict.model_predictions(fset, model)
    assert(preds.shape[0] == len(fset.name))
    assert(preds.shape[1] == len(np.unique(fset.target.values)))
    assert(preds.values.dtype == np.dtype('float'))
Example #9
0
def test_model_regression():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    fset.target.values = np.random.random(len(fset.target.values))
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestRegressor')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.dtype == np.dtype('float'))
Example #10
0
def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"])
    model = build_model.build_model_from_featureset(
        fset, model_type="RandomForestClassifier", params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2
    )
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))
    assert preds.prediction.values.dtype == np.dtype("float")
Example #11
0
def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    model = build_model.build_model_from_featureset(
        fset,
        model_type='RandomForestClassifier',
        params_to_optimize={"n_estimators": [10, 50, 100]},
        cv=2)
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.shape == (len(fset.name),
                                              len(np.unique(fset.target))))
    assert (preds.prediction.values.dtype == np.dtype('float'))
Example #12
0
model_dwt = build_model_from_featureset(
    fset_dwt.isel(name=train), KNeighborsClassifier(), params_to_optimize=knn_param_grid
)

###############################################################################
# Prediction
# ----------
# Making predictions for new time series based on these models follows the same
# pattern: first the time series are featurized using ``featurize_time_series``,
# and then predictions are made based on these features using
# ``predict.model_predictions``.

from sklearn.metrics import accuracy_score
from cesium.predict import model_predictions

preds_cesium = model_predictions(fset_cesium, model_cesium, return_probs=False)
preds_guo = model_predictions(fset_guo, model_guo, return_probs=False)
preds_dwt = model_predictions(fset_dwt, model_dwt, return_probs=False)

print(
    "Built-in cesium features: training accuracy={:.2%}, test accuracy={:.2%}".format(
        accuracy_score(preds_cesium.prediction.values[train], eeg["classes"][train]),
        accuracy_score(preds_cesium.prediction.values[test], eeg["classes"][test]),
    )
)
print(
    "Guo et al. features: training accuracy={:.2%}, test accuracy={:.2%}".format(
        accuracy_score(preds_guo.prediction.values[train], eeg["classes"][train]),
        accuracy_score(preds_guo.prediction.values[test], eeg["classes"][test]),
    )
)
Example #13
0
model_dwt = build_model_from_featureset(fset_dwt.isel(name=train),
                                        KNeighborsClassifier(),
                                        params_to_optimize=knn_param_grid)

###############################################################################
# Prediction
# ----------
# Making predictions for new time series based on these models follows the same
# pattern: first the time series are featurized using ``featurize_time_series``,
# and then predictions are made based on these features using
# ``predict.model_predictions``.

from sklearn.metrics import accuracy_score
from cesium.predict import model_predictions

preds_cesium = model_predictions(fset_cesium, model_cesium, return_probs=False)
preds_guo = model_predictions(fset_guo, model_guo, return_probs=False)
preds_dwt = model_predictions(fset_dwt, model_dwt, return_probs=False)

print("Built-in cesium features: training accuracy={:.2%}, test accuracy={:.2%}".format(
          accuracy_score(preds_cesium.prediction.values[train], eeg["classes"][train]),
          accuracy_score(preds_cesium.prediction.values[test], eeg["classes"][test])))
print("Guo et al. features: training accuracy={:.2%}, test accuracy={:.2%}".format(
          accuracy_score(preds_guo.prediction.values[train], eeg["classes"][train]),
          accuracy_score(preds_guo.prediction.values[test], eeg["classes"][test])))
print("Wavelet transform features: training accuracy={:.2%}, test accuracy={:.2%}".format(
          accuracy_score(preds_dwt.prediction.values[train], eeg["classes"][train]),
          accuracy_score(preds_dwt.prediction.values[test], eeg["classes"][test])))

###############################################################################
# The workflow presented here is intentionally simplistic and omits many important steps