Beispiel #1
0
def svms(**kwargs):
    steps = []
    for estimator_args in util.dict_product(dict(penalty=['l2'], 
            dual=[True, False], C=[.001,.01,.1,1])) + \
            util.dict_product(dict(
                    penalty=['l1'], dual=[False], C=[.001,.01,.1,1])):
        steps.append(Construct(name='estimator',
                __class_name__='sklearn.svm.LinearSVC', 
                **estimator_args))

    return steps
Beispiel #2
0
def svms(**kwargs):
    steps = []
    for estimator_args in util.dict_product(dict(
                 penalty=['l2'],
                 dual=[True, False], C=[.001, .01, .1, 1])) + \
            util.dict_product(dict(
                    penalty=['l1'], dual=[False], C=[.001, .01, .1, 1])):
        steps.append(Call('sklearn.svm.LinearSVC',
                          **estimator_args))

    return steps
Beispiel #3
0
def logits(**kwargs):
    steps = []
    for estimator_args in util.dict_product(dict(
            penalty=['l1', 'l2'], C=[.001, .01, .1, 1], **kwargs)):
        steps.append(Call('sklearn.linear_model.LogisticRegression',
                          **estimator_args))

    return steps
Beispiel #4
0
def logits(**kwargs):
    steps = []
    for estimator_args in util.dict_product(dict(
            penalty=['l1','l2'], C=[.001,.01,.1,1], **kwargs)):
        steps.append(Construct(name='estimator', 
                __class_name__='sklearn.linear_model.LogisticRegression',
                **estimator_args))

    return steps
Beispiel #5
0
def forests(**kwargs):
    steps = []
    d = dict(criterion=['entropy', 'gini'], max_features=['sqrt', 'log2'], n_jobs=[-1], **kwargs)
    for estimator_args in util.dict_product(d):
        steps.append(Construct(name='estimator', 
                 __class_name__='sklearn.ensemble.RandomForestClassifier',
                **estimator_args))

    return steps
Beispiel #6
0
def forests(**kwargs):
    steps = []
    d = dict(criterion=['entropy', 'gini'], max_features=['sqrt', 'log2'],
             n_jobs=[-1], **kwargs)
    for estimator_args in util.dict_product(d):
        steps.append(Call(
                     'sklearn.ensemble.RandomForestClassifier',
                     **estimator_args))

    return steps
Beispiel #7
0
def models(estimators, transform_search):
    steps = []
    for transform_args, estimator in product(
            dict_product(transform_search), estimators):
    
        transform = lead.model.transform.LeadTransform(
                month=1, day=25,
                name='transform',
                **transform_args)

        y = model.FitPredict(inputs=[estimator, transform], 
                name='y', target=True)
        steps.append(y)

    return steps
Beispiel #8
0
def models(estimators, transform_search):
    steps = []
    for transform_args, estimator in product(dict_product(transform_search),
                                             estimators):

        transform = lead.model.transform.LeadTransform(month=1,
                                                       day=25,
                                                       name='transform',
                                                       **transform_args)

        y = model.FitPredict(inputs=[estimator, transform],
                             name='y',
                             target=True)
        steps.append(y)

    return steps
Beispiel #9
0
def models(estimators, cv_search, transform_search):
    """
    Grid search prediction workflows. Used by bll6_models, test_models, and product_models.
    Args:
        estimators: collection of steps, each of which constructs an estimator
        cv_search: dictionary of arguments to LeadCrossValidate to search over
        transform_search: dictionary of arguments to LeadTransform to search over

    Returns: a list drain.model.Predict steps constructed by taking the product of
        the estimators with the the result of drain.util.dict_product on each of
        cv_search and transform_search.

        Each Predict step contains the following in its inputs graph:
            - lead.model.cv.LeadCrossValidate
            - lead.model.transform.LeadTransform
            - drain.model.Fit
    """
    steps = []
    for cv_args, transform_args, estimator in product(
            dict_product(cv_search), dict_product(transform_search),
            estimators):

        cv = lead.model.cv.LeadCrossValidate(**cv_args)
        cv.name = 'cv'

        X_train = Call('__getitem__',
                       inputs=[
                           MapResults(
                               [cv], {
                                   'X': 'obj',
                                   'train': 'key',
                                   'test': None,
                                   'aux': None
                               })
                       ])
        mean = Call('mean', inputs=[X_train])
        mean.name = 'mean'

        X_impute = Construct(data.impute,
                             inputs=[
                                 MapResults([cv], {
                                     'aux': None,
                                     'test': None,
                                     'train': None
                                 }),
                                 MapResults([mean], 'value')
                             ])

        cv_imputed = MapResults([X_impute, cv], ['X', {'X': None}])
        cv_imputed.target = True

        transform = lead.model.transform.LeadTransform(inputs=[cv_imputed],
                                                       **transform_args)
        transform.name = 'transform'

        fit = model.Fit(inputs=[estimator, transform], return_estimator=True)
        fit.name = 'fit'

        y = model.Predict(inputs=[fit, transform],
                          return_feature_importances=True)
        y.name = 'predict'
        y.target = True

        steps.append(y)

    return steps
Beispiel #10
0
def dapply(self,
           fn,
           pairwise=False,
           symmetric=True,
           diagonal=False,
           block=None,
           **kwargs):
    """
    Apply function to each step object in the index

    Args:
        fn: function to apply. If a list then each function is applied
        pairwise: whether to apply the function to pairs of steps
        symmetric, diagonal, block: passed to apply_pairwise when pairwise=True
        kwargs: a keyword arguments to pass to each function. Arguments
            with list value are grid searched using util.dict_product.

    Returns: a StepFrame or StepSeries
    """
    search_keys = [
        k for k, v in kwargs.items() if isinstance(v, list) and len(v) > 1
    ]
    functions = util.make_list(fn)
    search = list(product(functions, util.dict_product(kwargs)))

    results = []
    for fn, kw in search:
        if not pairwise:
            r = self.index.to_series().apply(lambda step: fn(step, **kw))
        else:
            r = apply_pairwise(self,
                               fn,
                               symmetric=symmetric,
                               diagonal=diagonal,
                               block=block,
                               **kw)

        name = [] if len(functions) == 1 else [fn.__name__]
        name += util.dict_subset(kw, search_keys).values()

        if isinstance(r, pd.DataFrame):
            columns = pd.MultiIndex.from_tuples(
                [tuple(name + util.make_list(c)) for c in r.columns])
            r.columns = columns
        else:
            r.name = tuple(name)
        results.append(r)

    if len(results) > 1:
        result = pd.concat(results, axis=1)
        # get subset of parameters that were searched over
        column_names = [] if len(functions) == 1 else [None]
        column_names += search_keys
        column_names += [None
                         ] * (len(result.columns.names) - len(column_names))
        result.columns.names = column_names

        return StepFrame(result)
    else:
        result = results[0]
        if isinstance(result, pd.DataFrame):
            return StepFrame(result)
        else:
            result.name = functions[0].__name__
            return StepSeries(result)