def test_model_def_factory(self): base = ModelDefinition(features=["a"], estimator="model", target="y") factory = model_definition_factory( base, features=[["a", "b"], ["a", "b", "c"], ["a", "b", "c", "y"]], estimator=["model2", "model3"] ) mds = list(factory) self.assertEqual(len(mds), 6)
def cv_factory(data=None, folds=5, repeat=1, reporters=[], metrics=None, cv_runner=None, **kwargs): """Shortcut to iterate and cross-validate models. All ModelDefinition kwargs should be iterables that can be passed to model_definition_factory. Parameters: ___________ data: Raw DataFrame folds: If an int, than basic k-fold cross-validation will be done. Otherwise must be an iterable of tuples of pandas Indexes [(train_index, test_index), ...] repeat: How many times to repeat each cross-validation run of each model. Only makes sense if cross-validation folds are randomized. kwargs: Can be any keyword accepted by `ModelDefinition`. Values should be iterables. """ cv_runner = cv_runner or cross_validate md_kwargs = {} for arg in ModelDefinition.params: if arg in kwargs: md_kwargs[arg] = kwargs.pop(arg) model_def_fact = model_definition_factory(ModelDefinition(), **md_kwargs) results = [] model_defs = list(model_def_fact) for model_def in model_defs: reporters = [reporter.copy() for reporter in reporters] cvr = cv_runner(model_def=model_def, data=data, folds=folds, repeat=repeat, reporters=reporters, metrics=metrics, **kwargs) results.append(cvr) return CVComparisonResult(model_defs, results)
def test_model_def_factory(self): base = ModelDefinition(features=['a'], estimator=estimators.Estimator('dummy'), target='y') factory = model_definition_factory(base, features=[ ['a', 'b'], ['a', 'b', 'c'], ['a', 'b', 'c', 'y'], ], estimator=[ estimators.Estimator('dummy'), estimators.Estimator('dummy2'), ]) mds = list(factory) self.assertEqual(len(mds), 6)
def test_model_def_factory(self): base = ModelDefinition( features=['a'], estimator=estimators.Estimator('dummy'), target='y' ) factory = model_definition_factory(base, features=[ ['a','b'], ['a','b','c'], ['a','b','c','y'], ], estimator=[ estimators.Estimator('dummy'), estimators.Estimator('dummy2'), ] ) mds = list(factory) self.assertEqual(len(mds), 6)
def cv_factory(data=None, folds=None, repeat=1, reporter_factories=[], **kwargs): """Shortcut to iterate and cross-validate models. All ModelDefinition kwargs should be iterables that can be passed to model_definition_factory. """ cv_runner = kwargs.pop('cv_runner', modeling.cross_validate) md_kwargs = {} for arg in ModelDefinition.params: if arg in kwargs: md_kwargs[arg] = kwargs.pop(arg) model_def_fact = model_definition_factory(ModelDefinition(), **md_kwargs) ret = {} for model_def in model_def_fact: results, reporters = cv_runner(model_def, data, folds, repeat=repeat, reporters=[factory() for factory in reporter_factories], **kwargs) ret[model_def.summary] = {'model_def': model_def, 'results': results, 'reporters': reporters} return ret