Python build_model_from_featureset Examples, cesium.build_model.build_model_from_featureset Python Examples

Example #1

0

Show file

File: test_build_model.py Project: bnaul/cesium

def test_fit_multichannel():
    """Test model building helper function for multi-channel feature data."""
    fset = sample_featureset(10, 3, ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.build_model_from_featureset(fset, model)
    assert isinstance(model, RandomForestClassifier)

Example #2

0

Show file

def test_fit_multichannel():
    """Test model building helper function for multi-channel feature data."""
    fset = sample_featureset(10, 3, ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.build_model_from_featureset(fset, model)
    assert isinstance(model, RandomForestClassifier)

Example #3

0

Show file

File: test_predict.py Project: bnaul/cesium

def test_model_regression():
    """Test model prediction function: regression"""
    fset = sample_featureset(10, 1, ["amplitude"], [0.1, 0.5])
    model = build_model.build_model_from_featureset(fset, model_type="RandomForestRegressor")
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.dtype == np.dtype("float")

Example #4

0

Show file

def test_invalid_feature_values():
    """Test proper exception handling for invalid feature values"""
    fset = sample_featureset(10, 1, ['x_valid', 'x_inf', 'x_nan'], ['class1', 'class2'])
    fset.x_inf.values[0, 0] = np.inf
    fset.x_nan.values[0, 0] = np.nan
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    try:
        model = build_model.build_model_from_featureset(fset, model)
    except ValueError as e:
        assert 'x_valid' not in str(e)
        assert 'x_inf' in str(e)
        assert 'x_nan' in str(e)
    else:
        raise AssertionError("Exception should have been raised for invalid data.")

    model = build_model.build_model_from_featureset(fset.drop(['x_inf', 'x_nan']), model)
    assert isinstance(model, RandomForestClassifier)

Example #5

0

Show file

File: test_predict.py Project: chaorun/cesium

def test_model_regression():
    """Test model prediction function: regression"""
    fset = sample_featureset(10, 1, ['amplitude'], [0.1, 0.5])
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestRegressor')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.dtype == np.dtype('float'))

Example #6

0

Show file

File: test_build_model.py Project: bnaul/cesium

def test_score_model():
    """Test calculation of model training score."""
    fset = sample_featureset(10, 1, ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.build_model_from_featureset(fset, model)
    score = build_model.score_model(model, fset)
    assert isinstance(score, float)

Example #7

0

Show file

File: test_build_model.py Project: bnaul/cesium

def test_invalid_feature_values():
    """Test proper exception handling for invalid feature values"""
    fset = sample_featureset(10, 1, ['x_valid', 'x_inf', 'x_nan'], ['class1', 'class2'])
    fset.x_inf.values[0, 0] = np.inf
    fset.x_nan.values[0, 0] = np.nan
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    try:
        model = build_model.build_model_from_featureset(fset, model)
    except ValueError as e:
        assert 'x_valid' not in str(e)
        assert 'x_inf' in str(e)
        assert 'x_nan' in str(e)
    else:
        raise AssertionError("Exception should have been raised for invalid data.")

    model = build_model.build_model_from_featureset(fset.drop(['x_inf', 'x_nan']), model)
    assert isinstance(model, RandomForestClassifier)

Example #8

0

Show file

File: test_predict.py Project: ajijohn/cesium

def test_model_predictions():
    """Test inner model prediction function"""
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc"))
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestClassifier')
    preds = predict.model_predictions(fset, model)
    assert(preds.shape[0] == len(fset.name))
    assert(preds.shape[1] == len(np.unique(fset.target.values)))
    assert(preds.values.dtype == np.dtype('float'))

Example #9

0

Show file

File: test_build_model.py Project: chaorun/cesium

def test_score_model():
    """Test calculation of model training score."""
    fset = sample_featureset(10, 1,
                             ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.build_model_from_featureset(fset, model)
    score = build_model.score_model(model, fset)
    assert isinstance(score, float)

Example #10

0

Show file

def test_model_regression():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    fset.target.values = np.random.random(len(fset.target.values))
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestRegressor')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.dtype == np.dtype('float'))

Example #11

0

Show file

File: test_predict.py Project: bnaul/cesium

def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"])
    model = build_model.build_model_from_featureset(
        fset, model_type="RandomForestClassifier", params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2
    )
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))
    assert preds.prediction.values.dtype == np.dtype("float")

Example #12

0

Show file

File: celery_tasks.py Project: BenJamesbabala/cesium

def build_model_task(model_type, model_params, fset_path, output_path=None,
                     params_to_optimize=None):
    """TODO"""
    with xr.open_dataset(fset_path) as fset:
        model = build_model.build_model_from_featureset(fset,
            model_type=model_type, model_options=model_params,
            params_to_optimize=params_to_optimize)
        if output_path:
            joblib.dump(model, output_path, compress=3)

        return model

Example #13

0

Show file

def create_test_model(fset, model_type='RandomForestClassifier'):
    """Create and yield test model, then delete.

    Params
    ------
    fset : `models.Featureset` instance
        The (labeled) feature set from which to build the model.
    model_type  : str, optional
        String indicating type of model to build. Defaults to
        'RandomForestClassifier'.

    """
    model_params = {
        "RandomForestClassifier": {
            "bootstrap": True,
            "criterion": "gini",
            "oob_score": False,
            "max_features": "auto",
            "n_estimators": 10
        },
        "RandomForestRegressor": {
            "bootstrap": True,
            "criterion": "mse",
            "oob_score": False,
            "max_features": "auto",
            "n_estimators": 10
        },
        "LinearSGDClassifier": {
            "loss": "hinge"
        },
        "LinearRegressor": {
            "fit_intercept": True
        }
    }
    with featureset.from_netcdf(fset.file.uri) as fset_data:
        model_data = build_model.build_model_from_featureset(
            fset_data, model_type=model_type)
        model_path = pjoin(cfg['paths']['models_folder'],
                           '{}.pkl'.format(str(uuid.uuid4())))
        joblib.dump(model_data, model_path)
    f, created = m.File.create_or_get(uri=model_path)
    model = m.Model.create(name='test_model',
                           file=f,
                           featureset=fset,
                           project=fset.project,
                           params=model_params[model_type],
                           type=model_type,
                           finished=datetime.datetime.now())
    model.save()
    try:
        yield model
    finally:
        model.delete_instance()

Example #14

0

Show file

File: test_predict.py Project: bnaul/cesium

def test_model_classification():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ["amplitude"], ["class1", "class2"])
    model = build_model.build_model_from_featureset(fset, model_type="RandomForestClassifier")
    preds = predict.model_predictions(fset, model)
    assert all(preds.name == fset.name)
    assert preds.prediction.values.shape == (len(fset.name), len(np.unique(fset.target)))
    assert preds.prediction.values.dtype == np.dtype("float")

    classes = predict.model_predictions(fset, model, return_probs=False)
    assert all(classes.name == fset.name)
    assert classes.prediction.values.shape == (len(fset.name),)
    assert isinstance(classes.prediction.values[0], (str, bytes))

Example #15

0

Show file

File: test_predict.py Project: chaorun/cesium

def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    model = build_model.build_model_from_featureset(
        fset,
        model_type='RandomForestClassifier',
        params_to_optimize={"n_estimators": [10, 50, 100]},
        cv=2)
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.shape == (len(fset.name),
                                              len(np.unique(fset.target))))
    assert (preds.prediction.values.dtype == np.dtype('float'))

Example #16

0

Show file

File: test_predict.py Project: BenJamesbabala/cesium

def test_predict_prefeaturized():
    featureset_path = pjoin(DATA_PATH, "test_featureset.nc")
    fset = xr.open_dataset(featureset_path).load()
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestClassifier')
    model_path = pjoin(TEMP_DIR, "test.pkl")
    joblib.dump(model, model_path, compress=3)
    preds = predict_prefeaturized_task(featureset_path, model_path)()

    assert(all(preds.name == fset.name))
    assert(preds.prediction.values.shape == (len(fset.name),
                                             len(np.unique(fset.target))))
    assert(preds.prediction.values.dtype == np.dtype('float'))

Example #17

0

Show file

File: test_predict.py Project: ajijohn/cesium

def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = xr.open_dataset(pjoin(DATA_PATH, "asas_training_subset_featureset.nc"))
    model = build_model.build_model_from_featureset(fset,
                model_type="RandomForestClassifier",
                params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2)
    pred_results_dict = predict.predict_data_files(TS_TARGET_PATHS,
                                                   list(fset.data_vars), model,
                                                   custom_features_script=None)
    for fname, results in pred_results_dict.items():
        for el in results['pred_results']:
            print(el)
            assert(el[0] in ['Mira', 'W_Ursae_Maj', 'Classical_Cepheid']
                   or el in ['Mira', 'W_Ursae_Maj', 'Classical_Cepheid'])

Example #18

0

Show file

File: test_predict.py Project: BenJamesbabala/cesium

def test_predict_optimized_model():
    """Test main predict function (classification) w/ optimized model"""
    fset = xr.open_dataset(pjoin(DATA_PATH, "asas_training_subset_featureset.nc"))
    model = build_model.build_model_from_featureset(fset,
                model_type="RandomForestClassifier",
                params_to_optimize={"n_estimators": [10, 50, 100]}, cv=2)
    model_path = pjoin(TEMP_DIR, "test.pkl")
    joblib.dump(model, model_path, compress=3)
    preds = prediction_task(TS_TARGET_PATHS, list(fset.data_vars), model_path,
                            custom_features_script=None)().get()
    assert(all(preds.prediction.class_label == ['Classical_Cepheid', 'Mira',
                                                'W_Ursae_Maj']))
    assert(preds.prediction.values.shape == (len(TS_CLASS_PATHS),
                                             len(np.unique(fset.target))))

Example #19

0

Show file

File: model.py Project: weiwzhang/cesium_web

def _build_model_compute_statistics(fset_path, model_type, model_params,
                                    params_to_optimize, model_path):
    '''Build model and return summary statistics.

    Parameters
    ----------
    fset_path : str
        Path to feature set NetCDF file.
    model_type : str
        Type of model to be built, e.g. 'RandomForestClassifier'.
    model_params : dict
        Dictionary with hyperparameter values to be used in model building.
        Keys are parameter names, values are the associated parameter values.
        These hyperparameters will be passed to the model constructor as-is
        (for hyperparameter optimization, see `params_to_optimize`).
    params_to_optimize : dict or list of dict
        During hyperparameter optimization, various model parameters
        are adjusted to give an optimal fit. This dictionary gives the
        different values that should be explored for each parameter. E.g.,
        `{'alpha': [1, 2], 'beta': [4, 5, 6]}` would fit models on all
        6 combinations of alpha and beta and compare the resulting models'
        goodness-of-fit. If None, only those hyperparameters specified in
        `model_parameters` will be used (passed to model constructor as-is).
    model_path : str
        Path indicating where serialized model will be saved.

    Returns
    -------
    score : float
        The model's training score.
    best_params : dict
        Dictionary of best hyperparameter values (keys are parameter names,
        values are the corresponding best values) determined by `scikit-learn`'s
        `GridSearchCV`. If no hyperparameter optimization is performed (i.e.
        `params_to_optimize` is None or is an empty dict, this will be an empty
        dict.
    '''
    fset = featureset.from_netcdf(fset_path, engine=cfg['xr_engine'])
    computed_model = build_model.build_model_from_featureset(
        featureset=fset,
        model_type=model_type,
        model_parameters=model_params,
        params_to_optimize=params_to_optimize)
    score = build_model.score_model(computed_model, fset)
    best_params = computed_model.best_params_ if params_to_optimize else {}
    joblib.dump(computed_model, model_path)
    fset.close()

    return score, best_params

Example #20

0

Show file

File: test_predict.py Project: chaorun/cesium

def test_model_classification():
    """Test model prediction function: classification"""
    fset = sample_featureset(10, 1, ['amplitude'], ['class1', 'class2'])
    model = build_model.build_model_from_featureset(
        fset, model_type='RandomForestClassifier')
    preds = predict.model_predictions(fset, model)
    assert (all(preds.name == fset.name))
    assert (preds.prediction.values.shape == (len(fset.name),
                                              len(np.unique(fset.target))))
    assert (preds.prediction.values.dtype == np.dtype('float'))

    classes = predict.model_predictions(fset, model, return_probs=False)
    assert (all(classes.name == fset.name))
    assert (classes.prediction.values.shape == (len(fset.name), ))
    assert (isinstance(classes.prediction.values[0], (str, bytes)))

Example #21

0

Show file

File: test_predict.py Project: BenJamesbabala/cesium

def test_predict_regression():
    """Test main predict function on multiple files (regression)"""
    regressor_types = [model_type for model_type, model_class
                       in build_model.MODELS_TYPE_DICT.items()
                       if issubclass(model_class, sklearn.base.RegressorMixin)]
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_reg_featureset.nc"))
    for model_type in regressor_types:
        model = build_model.build_model_from_featureset(fset,
                                                        model_type=model_type)
        model_path = pjoin(TEMP_DIR, "test.pkl")
        joblib.dump(model, model_path, compress=3)
        preds = prediction_task(TS_TARGET_PATHS, list(fset.data_vars),
                                model_path,
                                custom_features_script=None)().get()
        assert(preds.prediction.values.shape == (len(TS_CLASS_PATHS),))
        assert(p.dtype == np.dtype('float') for p in preds.prediction)

Example #22

0

Show file

def test_fit_existing_model_optimize():
    """Test model building helper function - with param. optimization"""
    fset = sample_featureset(10, 1, ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model_options = {"criterion": "gini", "bootstrap": True}
    params_to_optimize = {"n_estimators": [10, 50, 100],
                          "min_samples_split": [2, 5],
                          "max_features": ["auto", 3]}
    model = build_model.build_model_from_featureset(fset, model, None,
                                                    model_options,
                                                    params_to_optimize)
    assert hasattr(model, "best_params_")
    assert hasattr(model, "predict_proba")
    assert isinstance(model, GridSearchCV)
    assert isinstance(model.best_estimator_, RandomForestClassifier)

Example #23

0

Show file

File: test_build_model.py Project: bnaul/cesium

def test_fit_existing_model_optimize():
    """Test model building helper function - with param. optimization"""
    fset = sample_featureset(10, 1, ['amplitude', 'maximum', 'minimum', 'median'],
                             ['class1', 'class2'])
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model_options = {"criterion": "gini", "bootstrap": True}
    params_to_optimize = {"n_estimators": [10, 50, 100],
                          "min_samples_split": [2, 5],
                          "max_features": ["auto", 3]}
    model = build_model.build_model_from_featureset(fset, model, None,
                                                    model_options,
                                                    params_to_optimize)
    assert hasattr(model, "best_params_")
    assert hasattr(model, "predict_proba")
    assert isinstance(model, GridSearchCV)
    assert isinstance(model.best_estimator_, RandomForestClassifier)

Example #24

0

Show file

File: test_predict.py Project: ajijohn/cesium

def test_predict_regression():
    """Test main predict function on multiple files (regression)"""
    regressor_types = [model_type for model_type, model_class
                       in build_model.MODELS_TYPE_DICT.items()
                       if issubclass(model_class, sklearn.base.RegressorMixin)]
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_reg_featureset.nc"))
    for model_type in regressor_types:
        model = build_model.build_model_from_featureset(fset,
                                                        model_type=model_type)
        pred_results_dict = predict.predict_data_files(TS_TARGET_PATHS,
                                                       list(fset.data_vars),
                                                       model,
                                                       custom_features_script=None)
        for fname, results in pred_results_dict.items():
            for el in results['pred_results']:
                assert(isinstance(el, float))

Example #25

0

Show file

File: test_build_model.py Project: BenJamesbabala/cesium

def test_fit_existing_model_optimize():
    """Test model building helper function - with param. optimization"""
    fset = xr.open_dataset(pjoin(DATA_PATH, "asas_training_subset_featureset.nc"))
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model_options = {"criterion": "gini",
                     "bootstrap": True}
    params_to_optimize = {"n_estimators": [10, 50, 100],
                          "min_samples_split": [2, 5],
                          "max_features": ["auto", 3]}
    model = build_model.build_model_from_featureset(fset, model, None,
                                                    model_options,
                                                    params_to_optimize)
    assert hasattr(model, "best_params_")
    assert hasattr(model, "predict_proba")
    assert isinstance(model, GridSearchCV)
    assert isinstance(model.best_estimator_, RandomForestClassifier)

Example #26

0

Show file

File: test_predict.py Project: ajijohn/cesium

def test_predict_classification():
    """Test main predict function on multiple files (classification)"""
    classifier_types = [model_type for model_type, model_class
                        in build_model.MODELS_TYPE_DICT.items()
                        if issubclass(model_class,
                                      sklearn.base.ClassifierMixin)]
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc"))
    for model_type in classifier_types:
        model = build_model.build_model_from_featureset(fset,
                                                        model_type=model_type)
        pred_results_dict = predict.predict_data_files(TS_CLASS_PATHS,
                                                       list(fset.data_vars),
                                                       model,
                                                       custom_features_script=None)
        for fname, results in pred_results_dict.items():
            for el in results['pred_results']:
                assert(el[0] in [b'class1', b'class2', b'class3']
                       or el in [b'class1', b'class2', b'class3'])

Example #27

0

Show file

def create_test_model(fset, model_type='RandomForestClassifier'):
    """Create and yield test model, then delete.

    Params
    ------
    fset : `models.Featureset` instance
        The (labeled) feature set from which to build the model.
    model_type  : str, optional
        String indicating type of model to build. Defaults to
        'RandomForestClassifier'.

    """
    model_params = {
        "RandomForestClassifier": {
            "bootstrap": True, "criterion": "gini",
            "oob_score": False, "max_features": "auto",
            "n_estimators": 10},
        "RandomForestRegressor": {
            "bootstrap": True, "criterion": "mse",
            "oob_score": False, "max_features": "auto",
            "n_estimators": 10},
        "LinearSGDClassifier": {
            "loss": "hinge"},
        "LinearRegressor": {
            "fit_intercept": True}}
    with featureset.from_netcdf(fset.file.uri, engine=cfg['xr_engine']) as fset_data:
        model_data = build_model.build_model_from_featureset(fset_data,
                                                             model_type=model_type)
        model_path = pjoin(cfg['paths']['models_folder'],
                           '{}.pkl'.format(str(uuid.uuid4())))
        joblib.dump(model_data, model_path)
    f, created = m.File.create_or_get(uri=model_path)
    model = m.Model.create(name='test_model',
                           file=f, featureset=fset, project=fset.project,
                           params=model_params[model_type], type=model_type,
                           finished=datetime.datetime.now())
    model.save()
    try:
        yield model
    finally:
        model.delete_instance()

Example #28

0

Show file

File: test_predict.py Project: BenJamesbabala/cesium

def test_predict_classification():
    """Test main predict function on multiple files (classification)"""
    classifier_types = [model_type for model_type, model_class
                        in build_model.MODELS_TYPE_DICT.items()
                        if issubclass(model_class,
                                      sklearn.base.ClassifierMixin)]
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc"))
    for model_type in classifier_types:
        model = build_model.build_model_from_featureset(fset,
                                                        model_type=model_type)
        model_path = pjoin(TEMP_DIR, "test.pkl")
        joblib.dump(model, model_path, compress=3)
        preds = prediction_task(TS_CLASS_PATHS, list(fset.data_vars),
                                model_path,
                                custom_features_script=None)().get()
        if preds.prediction.values.ravel()[0].dtype == np.dtype('float'):
            assert(all(preds.prediction.class_label == [b'class1', b'class2',
                                                        b'class3']))
            assert(preds.prediction.values.shape ==
                   (len(TS_CLASS_PATHS), len(np.unique(fset.target))))
        else:
            assert(all(p in [b'class1', b'class2', b'class3'] for p in
                       preds.prediction))

Example #29

0

Show file

File: test_build_model.py Project: BenJamesbabala/cesium

def test_fit_existing_model():
    """Test model building helper function."""
    fset = xr.open_dataset(pjoin(DATA_PATH, "test_featureset.nc"))
    model = build_model.MODELS_TYPE_DICT['RandomForestClassifier']()
    model = build_model.build_model_from_featureset(fset, model)
    assert isinstance(model, RandomForestClassifier)

Example #30

0

Show file

#
# For this example, we'll test a random forest classifier for the built-in
# ``cesium`` features, and a 3-nearest neighbors classifier for the others, as
# suggested by
# `Guo et al. (2012) <http://linkinghub.elsevier.com/retrieve/pii/S0957417411003253>`_.

from cesium.build_model import build_model_from_featureset
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import train_test_split

train, test = train_test_split(np.arange(len(eeg["classes"])), random_state=0)

rfc_param_grid = {'n_estimators': [8, 16, 32, 64, 128, 256, 512, 1024]}
model_cesium = build_model_from_featureset(fset_cesium.isel(name=train),
                                          RandomForestClassifier(max_features='auto',
                                                                 random_state=0),
                                          params_to_optimize=rfc_param_grid)
knn_param_grid = {'n_neighbors': [1, 2, 3, 4]}
model_guo = build_model_from_featureset(fset_guo.isel(name=train),
                                        KNeighborsClassifier(),
                                        params_to_optimize=knn_param_grid)
model_dwt = build_model_from_featureset(fset_dwt.isel(name=train),
                                        KNeighborsClassifier(),
                                        params_to_optimize=knn_param_grid)

###############################################################################
# Prediction
# ----------
# Making predictions for new time series based on these models follows the same
# pattern: first the time series are featurized using ``featurize_time_series``,
# and then predictions are made based on these features using

Example #31

0

Show file

File: plot_EEG_Example.py Project: bnaul/cesium

# For this example, we'll test a random forest classifier for the built-in
# ``cesium`` features, and a 3-nearest neighbors classifier for the others, as
# suggested by
# `Guo et al. (2012) <http://linkinghub.elsevier.com/retrieve/pii/S0957417411003253>`_.

from cesium.build_model import build_model_from_featureset
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

train, test = train_test_split(np.arange(len(eeg["classes"])), random_state=0)

rfc_param_grid = {"n_estimators": [8, 16, 32, 64, 128, 256, 512, 1024]}
model_cesium = build_model_from_featureset(
    fset_cesium.isel(name=train),
    RandomForestClassifier(max_features="auto", random_state=0),
    params_to_optimize=rfc_param_grid,
)
knn_param_grid = {"n_neighbors": [1, 2, 3, 4]}
model_guo = build_model_from_featureset(
    fset_guo.isel(name=train), KNeighborsClassifier(), params_to_optimize=knn_param_grid
)
model_dwt = build_model_from_featureset(
    fset_dwt.isel(name=train), KNeighborsClassifier(), params_to_optimize=knn_param_grid
)

###############################################################################
# Prediction
# ----------
# Making predictions for new time series based on these models follows the same
# pattern: first the time series are featurized using ``featurize_time_series``,