Ejemplo n.º 1
0
def _setup(config=None):
    '''Return the config above and the param_grid'''
    from elm.sample_util.sample_pipeline import make_pipeline_steps
    from elm.pipeline import Pipeline
    if not config:
        config = ConfigParser(config=yaml.load(CONFIG_STR))
    sample_steps = make_pipeline_steps(config, config.run[0]['pipeline'])
    estimator = [('kmeans', MiniBatchKMeans(**config.train['kmeans']['model_init_kwargs']))]
    pipe = Pipeline(sample_steps + estimator)
    idx_to_param_grid = ea_setup(config)
    return config, pipe, idx_to_param_grid
Ejemplo n.º 2
0
def config_to_pipeline(config, client=None):
    '''
    Run the elm config's train and predict "run"
    actions with dask client and config's updates
    based on args passed to elm-main, such as --train-only
    or --predict-only, or edits to ensemble settings, such as
    --ngen 4

    Parameters:
        :config: elm.config.ConfigParser instance
        :client: dask client or None
    '''
    from elm.sample_util.sample_pipeline import make_pipeline_steps

    _makedirs(config)
    idx_to_evo_params = ea_setup(config)
    for idx, step in enumerate(config.run):
        pipeline = step['pipeline']
        if 'train' in step:
            train = config.train[step['train']]
            pipe_steps = make_pipeline_steps(config, pipeline)
            cls = import_callable(train['model_init_class'])
            estimator = cls(**(train.get('model_init_kwargs') or {}))
            pipe_steps.append((step['train'], estimator))
            ensemble_kwargs = train.get('ensemble')
            if isinstance(ensemble_kwargs, str):
                ensemble_kwargs = config.ensembles[ensemble_kwargs]
            ensemble_kwargs['client'] = client
        data_source = step['data_source']
        if not isinstance(data_source, dict):
            data_source = config.data_sources[data_source]
        data_source['sampler'] = import_callable(data_source['sampler'])
        data_source['load_meta'] = load_meta
        data_source['load_array'] = load_array
        if callable(data_source.get('args_list')):
            kw = {k: v for k, v in data_source.items() if k != 'args_list'}
            data_source['args_list'] = tuple(data_source['args_list'](**kw))
        if 'train' in step and not getattr(config, 'PREDICT_ONLY', False):
            s = train.get('model_scoring')
            if s:
                scoring = config.model_scoring[s]
                scoring_kwargs = {
                    k: v
                    for k, v in scoring.items() if k != 'scoring'
                }
                scoring = import_callable(scoring['scoring'])
            else:
                scoring = None
                scoring_kwargs = {}
            if 'method_kwargs' in train:
                method_kwargs = train['method_kwargs']
            else:
                method_kwargs = {}
            if 'classes' in train:
                method_kwargs['classes'] = train['classes']
            ensemble_kwargs['method_kwargs'] = method_kwargs
            pipe = Pipeline(pipe_steps,
                            scoring=scoring,
                            scoring_kwargs=scoring_kwargs)
            evo_params = idx_to_evo_params.get(idx, None)
            if evo_params:
                kw = dict(evo_params=evo_params)
                kw.update(data_source)
                kw.update(ensemble_kwargs)
                pipe.fit_ea(**kw)
            else:
                kw = {}
                kw.update(data_source)
                kw.update(ensemble_kwargs)
                pipe.fit_ensemble(**kw)

            serialize_pipe(pipe, config.ELM_TRAIN_PATH, step['train'])
        elif 'predict' in step and not getattr(config, 'TRAIN_ONLY', False):
            pipe = load_pipe_from_tag(config.ELM_TRAIN_PATH, step['predict'])

        else:
            logger.info(
                'Do nothing for {} (has no "train" or "predict" key)'.format(
                    step))
        if 'predict' in step:
            # serialize is called with (prediction, sample, tag)
            serialize = partial(serialize_prediction, config)
            pipe.predict_many(serialize=serialize, **data_source)
Ejemplo n.º 3
0
        'cxpb': 0.3,
        'eta': 20,
        'ngen': 2,
        'mu': 4,
        'k': 4,
        'early_stop': {
            'abs_change': [10],
            'agg': 'all'
        },
        # alternatively early_stop: {percent_change: [10], agg: all}
        # alternatively early_stop: {threshold: [10], agg: any}
    }
}

evo_params = ea_setup(param_grid=param_grid,
                      param_grid_name='param_grid_example',
                      score_weights=[-1])  # minimization


def main():
    with client_context() as client:
        fitted = pipeline.fit_ea(
            evo_params=evo_params,
            client=client,
            saved_ensemble_size=param_grid['control']['mu'],
            **data_source)
        preds = pipeline.predict_many(client=client, **data_source)
    return fitted, preds


if __name__ == '__main__':