Beispiel #1
0
def check_cv(pipeline,
             X,
             y=None,
             n_folds=2,
             groups=None,
             split_start='before_transforms',
             expected_metrics={},
             **params):
    cv = CV(pipeline)
    if split_start == 'try_all':
        len_pipeline = len(pipeline.nodes)
        values_to_test = ['after_transforms', 'before_transforms']
        values_to_test.extend(list(range(len_pipeline)))
        values_to_test.extend(list(range(-len_pipeline, 0)))
        for s in values_to_test:
            graph_id = '_split_start={}'.format(str(s))
            results = cv.fit(X,
                             y,
                             cv=n_folds,
                             groups=groups,
                             split_start=s,
                             graph_id=graph_id)
            check_cv_results(cv._learner_type, results, n_folds,
                             expected_metrics)
    else:
        results = cv.fit(X,
                         y,
                         cv=n_folds,
                         groups=groups,
                         split_start=split_start,
                         **params)
        check_cv_results(cv._learner_type, results, n_folds, expected_metrics)

    return results
Beispiel #2
0
    def test_unseen_classes(self):
        # Create a dataset such that cv splits miss some of the classes
        X = random_df()
        y = random_series()
        y[95:] = range(5)

        msg = 'CV didn\'t raise Warning exception b/c of minority class issue'
        with self.assertRaises(Warning, msg=msg):
            cv = CV([FastLinearClassifier()])
            cv.fit(X, y, cv=3)
Beispiel #3
0
 def check_cv_with_non_defaults(self,
                                label_name='label',
                                group_id='groupid',
                                features='Features_1',
                                **params):
     steps = [
         ToKey(columns={
             'groupid2': group_id,
             'label2': label_name
         }),
         LightGbmRanker() << {
             Role.GroupId: 'groupid2',
             Role.Label: 'label2',
             Role.Feature: [features]
         }
     ]
     data = self.data(label_name, group_id, features)
     cv = CV(steps)
     results = cv.fit(data, groups='groupid', cv=4)
     check_cv_results(cv._learner_type,
                      results,
                      n_folds=4,
                      expected_metrics={})