def test_model_def_factory(self):
     base = ModelDefinition(features=["a"], estimator="model", target="y")
     factory = model_definition_factory(
         base, features=[["a", "b"], ["a", "b", "c"], ["a", "b", "c", "y"]], estimator=["model2", "model3"]
     )
     mds = list(factory)
     self.assertEqual(len(mds), 6)
Example #2
0
def cv_factory(data=None,
               folds=5,
               repeat=1,
               reporters=[],
               metrics=None,
               cv_runner=None,
               **kwargs):
    """Shortcut to iterate and cross-validate models.

    All ModelDefinition kwargs should be iterables that can be
    passed to model_definition_factory.

    Parameters:
    ___________

    data:
        Raw DataFrame

    folds:
        If an int, than basic k-fold cross-validation will be done.
        Otherwise must be an iterable of tuples of pandas Indexes
        [(train_index, test_index), ...]

    repeat:
        How many times to repeat each cross-validation run of each model. Only
        makes sense if cross-validation folds are randomized.

    kwargs:
        Can be any keyword accepted by `ModelDefinition`.
        Values should be iterables.
    """
    cv_runner = cv_runner or cross_validate
    md_kwargs = {}
    for arg in ModelDefinition.params:
        if arg in kwargs:
            md_kwargs[arg] = kwargs.pop(arg)
    model_def_fact = model_definition_factory(ModelDefinition(), **md_kwargs)
    results = []
    model_defs = list(model_def_fact)
    for model_def in model_defs:
        reporters = [reporter.copy() for reporter in reporters]
        cvr = cv_runner(model_def=model_def,
                        data=data,
                        folds=folds,
                        repeat=repeat,
                        reporters=reporters,
                        metrics=metrics,
                        **kwargs)
        results.append(cvr)

    return CVComparisonResult(model_defs, results)
 def test_model_def_factory(self):
     base = ModelDefinition(features=['a'],
                            estimator=estimators.Estimator('dummy'),
                            target='y')
     factory = model_definition_factory(base,
                                        features=[
                                            ['a', 'b'],
                                            ['a', 'b', 'c'],
                                            ['a', 'b', 'c', 'y'],
                                        ],
                                        estimator=[
                                            estimators.Estimator('dummy'),
                                            estimators.Estimator('dummy2'),
                                        ])
     mds = list(factory)
     self.assertEqual(len(mds), 6)
Example #4
0
def cv_factory(data=None, folds=5, repeat=1, reporters=[], metrics=None,
               cv_runner=None, **kwargs):
    """Shortcut to iterate and cross-validate models.

    All ModelDefinition kwargs should be iterables that can be
    passed to model_definition_factory.

    Parameters:
    ___________

    data:
        Raw DataFrame

    folds:
        If an int, than basic k-fold cross-validation will be done.
        Otherwise must be an iterable of tuples of pandas Indexes
        [(train_index, test_index), ...]

    repeat:
        How many times to repeat each cross-validation run of each model. Only
        makes sense if cross-validation folds are randomized.

    kwargs:
        Can be any keyword accepted by `ModelDefinition`.
        Values should be iterables.
    """
    cv_runner = cv_runner or cross_validate
    md_kwargs = {}
    for arg in ModelDefinition.params:
        if arg in kwargs:
            md_kwargs[arg] = kwargs.pop(arg)
    model_def_fact = model_definition_factory(ModelDefinition(), **md_kwargs)
    results = []
    model_defs = list(model_def_fact)
    for model_def in model_defs:
        reporters = [reporter.copy() for reporter in reporters]
        cvr = cv_runner(model_def=model_def,
                        data=data,
                        folds=folds,
                        repeat=repeat,
                        reporters=reporters,
                        metrics=metrics,
                        **kwargs)
        results.append(cvr)

    return CVComparisonResult(model_defs, results)
Example #5
0
 def test_model_def_factory(self):
     base = ModelDefinition(
             features=['a'],
             estimator=estimators.Estimator('dummy'),
             target='y'
             )
     factory = model_definition_factory(base,
         features=[
             ['a','b'],
             ['a','b','c'],
             ['a','b','c','y'],
             ],
         estimator=[
             estimators.Estimator('dummy'),
             estimators.Estimator('dummy2'),
             ]
         )
     mds = list(factory)
     self.assertEqual(len(mds), 6)
Example #6
0
def cv_factory(data=None, folds=None, repeat=1, reporter_factories=[], **kwargs):
    """Shortcut to iterate and cross-validate models.
    
    All ModelDefinition kwargs should be iterables that can be
    passed to model_definition_factory.
    """
    cv_runner = kwargs.pop('cv_runner', modeling.cross_validate)
    md_kwargs = {}
    for arg in ModelDefinition.params:
        if arg in kwargs:
            md_kwargs[arg] = kwargs.pop(arg)
    model_def_fact = model_definition_factory(ModelDefinition(), **md_kwargs)
    ret = {}
    for model_def in model_def_fact:
        results, reporters = cv_runner(model_def, data, folds, repeat=repeat, reporters=[factory() for factory in reporter_factories], **kwargs)
        ret[model_def.summary] = {'model_def': model_def,
                                  'results': results,
                                  'reporters': reporters}
    
    return ret