コード例 #1
0
ファイル: rf_wc_sklearn.py プロジェクト: ririw/kaggle-quora
class WC_LGB(WCSklearn):
    learning_rate = hyper_helper.TuneableHyperparam(
        'WC_LGB.learning_rate',
        prior=hyperopt.hp.normal('WC_LGB.learning_rate', 0, 0.25),
        default=0.05778176353527653,
        transform=np.abs,
        disable=False)

    min_child_samples = hyper_helper.TuneableHyperparam(
        'WC_LGB.min_child_samples',
        prior=hyperopt.hp.randint('WC_LGB.min_child_samples', 6),
        default=3,
        transform=lambda v: 2**v,
        disable=False)

    def make_cls(self):
        cls = LGBMClassifier(
            n_estimators=2048,
            num_leaves=1024,
            learning_rate=self.learning_rate.get(),
            min_child_samples=self.min_child_samples.get(),
            subsample=0.75,
        )
        return AutoExitingGBMLike(cls, additional_fit_args={'verbose': False})

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_wc_lgbm',
            'lr_{:f}_mc_{:d}'.format(self.learning_rate.get(),
                                     self.min_child_samples.get()),
            str(self.fold))
        return (base_path + fname).get()
コード例 #2
0
ファイル: rf_wc_sklearn.py プロジェクト: ririw/kaggle-quora
class WC_XGB(WCSklearn):
    max_depth = hyper_helper.TuneableHyperparam('WC_XGB.max_depth',
                                                prior=hyperopt.hp.randint(
                                                    'WC_XGB.max_depth', 11),
                                                default=9,
                                                transform=lambda v: v + 1,
                                                disable=False)

    learning_rate = hyper_helper.TuneableHyperparam('WC_XGB.learning_rate',
                                                    prior=hyperopt.hp.normal(
                                                        'WC_XGB.learning_rate',
                                                        0, 0.25),
                                                    default=0.05,
                                                    transform=np.abs,
                                                    disable=False)

    def make_cls(self):
        return AutoExitingGBMLike(
            XGBClassifier(n_estimators=2048,
                          learning_rate=self.learning_rate.get(),
                          max_depth=self.max_depth.get(),
                          subsample=0.75))

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_wc_xgb', 'lr_{:f}_md_{:d}'.format(self.learning_rate.get(),
                                                  self.max_depth.get()),
            str(self.fold))
        return (base_path + fname).get()
コード例 #3
0
class AB_XTC(ABSklearn):
    min_items = hyper_helper.TuneableHyperparam(
        'AB_XTC.min_items',
        prior=hyperopt.hp.randint('AB_XTC.min_items', 9),
        transform=lambda v: 2 ** v,
        default=4,
        disable=False)
    poly_fetures = hyper_helper.TuneableHyperparam(
        'AB_XTC.poly_fetures',
        prior=hyperopt.hp.randint('AB_XTC.poly_fetures', 2),
        transform=lambda v: v + 1,
        default=1,
        disable=False)

    def make_cls(self):
        inner_cls = sklearn.ensemble.ExtraTreesClassifier(
                n_estimators=512, n_jobs=-1,
                verbose=1,
                class_weight=core.dictweights,
                min_samples_leaf=self.min_items.get())
        return pipeline.Pipeline([
            ('norm', preprocessing.MinMaxScaler(feature_range=(-1, 1))),
            ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)),
            ('xtc', inner_cls)
        ])

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_ab_xtc',
            'min_items_{:f}_pf_{:d}'.format(self.min_items.get(), self.poly_fetures.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #4
0
class AB_XGB(ABSklearn):
    max_depth = hyper_helper.TuneableHyperparam(
        'AB_XGB.max_depth',
        prior=hyperopt.hp.randint('AB_XGB.max_depth', 11),
        default=8,
        transform=lambda v: v + 1,
        disable=False)

    learning_rate = hyper_helper.TuneableHyperparam(
        'AB_XGB.learning_rate',
        prior=hyperopt.hp.normal('AB_XTC.learning_rate', 0, 0.25),
        default=-0.27617359262812374,
        transform=np.abs,
        disable=False)

    def make_cls(self):
        cls = XGBClassifier(
                n_estimators=2048,
                learning_rate=self.learning_rate.get(),
                max_depth=self.max_depth.get(),
                subsample=0.75)
        return pipeline.Pipeline([
            ('norm', preprocessing.Normalizer()),
            ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)),
            ('lgb', AutoExitingGBMLike(cls))
        ])

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_ab_xgb',
            'lr_{:f}_md_{:d}'.format(self.learning_rate.get(), self.max_depth.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #5
0
ファイル: rf_all_features.py プロジェクト: ririw/kaggle-quora
class AllFeatureLGB(RF_SKLearn):
    resources = {'cpu': 7}

    min_items = hyper_helper.TuneableHyperparam(
        'SmallFeatureLGB.min_items',
        prior=hyperopt.hp.randint('SmallFeatureLGB.min_items', 9),
        transform=lambda v: 2**v,
        default=2,
        disable=False)

    learning_rate = hyper_helper.TuneableHyperparam(
        'SmallFeatureLGB.learning_rate',
        prior=hyperopt.hp.uniform('SmallFeatureLGB.learning_rate', 0, 0.4),
        default=0.02,
        disable=False)

    def xdataset(self) -> FoldIndependent:
        return AllFeaturesTask()

    def make_cls(self):
        return AutoExitingGBMLike(
            lightgbm.sklearn.LGBMClassifier(
                n_estimators=1024,
                num_leaves=1024,
                min_child_samples=self.min_items.get(),
                learning_rate=self.learning_rate.get()))

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_all_feat', 'lgb',
            'mi_{:d}_lr_{:f}'.format(self.min_items.get(),
                                     self.learning_rate.get()), str(self.fold))
        return (base_path + fname).get()
コード例 #6
0
ファイル: rf_all_features.py プロジェクト: ririw/kaggle-quora
class AllFeatureXGB(RF_SKLearn):
    resources = {'cpu': 7}

    max_depth = hyper_helper.TuneableHyperparam(
        'SmallFeatureXGB.max_depth',
        prior=hyperopt.hp.randint('SmallFeatureXGB.max_depth', 11),
        default=8,
        transform=lambda v: v + 1,
        disable=False)

    learning_rate = hyper_helper.TuneableHyperparam(
        'SmallFeatureXGB.learning_rate',
        prior=hyperopt.hp.normal('SmallFeatureXGB.learning_rate', 0, 0.25),
        default=.05,
        transform=np.abs,
        disable=False)

    def xdataset(self) -> FoldIndependent:
        return AllFeaturesTask()

    def make_cls(self):
        return AutoExitingGBMLike(
            XGBClassifier(n_estimators=2048,
                          learning_rate=self.learning_rate.get(),
                          max_depth=self.max_depth.get(),
                          subsample=0.75))

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_all_feat', 'xgb',
            'md_{:d}_lr_{:f}'.format(self.max_depth.get(),
                                     self.learning_rate.get()), str(self.fold))
        return (base_path + fname).get()
コード例 #7
0
class SmallFeatureXTC(RF_SKLearn):
    resources = {'cpu': 7}

    min_items = hyper_helper.TuneableHyperparam(
        'SmallFeatureXTC.min_items',
        prior=hyperopt.hp.randint('SmallFeatureXTC.min_items', 9),
        transform=lambda v: 2 ** v,
        default=2,
        disable=False)

    def xdataset(self) -> FoldIndependent:
        return SmallFeaturesTask()

    def make_cls(self):
        return ensemble.ExtraTreesClassifier(
            n_estimators=512,
            n_jobs=-1,
            min_samples_leaf=self.min_items.get(),
            class_weight=core.dictweights)

    def post_fit(self, cls):
        xs = SmallFeaturesTask().load('train', 0, as_df=True)
        series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40]
        print(colors.yellow | str(series))

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_small_feat',
            'xtc',
            'mi_{:d}'.format(self.min_items.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #8
0
class SmallFeatureXGB(RF_SKLearn):
    resources = {'cpu': 7}

    max_depth = hyper_helper.TuneableHyperparam(
        'SmallFeatureXGB.max_depth',
        prior=hyperopt.hp.randint('SmallFeatureXGB.max_depth', 11),
        default=8,
        transform=lambda v: v + 1,
        disable=False)

    learning_rate = hyper_helper.TuneableHyperparam(
        'SmallFeatureXGB.learning_rate',
        prior=hyperopt.hp.normal('SmallFeatureXGB.learning_rate', 0, 0.25),
        default=.05,
        transform=np.abs,
        disable=False)

    def xdataset(self) -> FoldIndependent:
        return SmallFeaturesTask()

    def make_cls(self):
        return AutoExitingGBMLike(
            XGBClassifier(
                n_estimators=2048,
                learning_rate=self.learning_rate.get(),
                max_depth=self.max_depth.get(),
                subsample=0.75)
        )

    def post_fit(self, cls):
        xs = SmallFeaturesTask().load('train', 0, as_df=True)
        series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40]
        print(colors.yellow | str(series))

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_small_feat',
            'xgb',
            'md_{:d}_lr_{:f}'.format(self.max_depth.get(), self.learning_rate.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #9
0
class SmallFeatureLGB(RF_SKLearn):
    resources = {'cpu': 7}

    min_items = hyper_helper.TuneableHyperparam(
        'SmallFeatureLGB.min_items',
        prior=hyperopt.hp.randint('SmallFeatureLGB.min_items', 9),
        transform=lambda v: 2 ** v,
        default=2,
        disable=False)

    learning_rate = hyper_helper.TuneableHyperparam(
        'SmallFeatureLGB.learning_rate',
        prior=hyperopt.hp.uniform('SmallFeatureLGB.learning_rate', 0, 0.4),
        default=0.02,
        disable=False)

    def post_fit(self, cls):
        xs = SmallFeaturesTask().load('train', 0, as_df=True)
        series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40]
        print(colors.yellow | str(series))

    def xdataset(self) -> FoldIndependent:
        return SmallFeaturesTask()

    def make_cls(self):
        return AutoExitingGBMLike(
            lightgbm.sklearn.LGBMClassifier(
                n_estimators=1024,
                num_leaves=1024,
                min_child_samples=self.min_items.get(),
                learning_rate=self.learning_rate.get()
            )
        )

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_small_feat',
            'lgb',
            'mi_{:d}_lr_{:f}'.format(self.min_items.get(), self.learning_rate.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #10
0
class AB_LGB(ABSklearn):
    learning_rate = hyper_helper.TuneableHyperparam(
        'AB_LGB.learning_rate',
        prior=hyperopt.hp.normal('AB_LGB.learning_rate', 0, 0.25),
        default=0.0932165701272348,
        transform=np.abs,
        disable=False)

    min_child_samples = hyper_helper.TuneableHyperparam(
        'AB_LGB.min_child_samples',
        prior=hyperopt.hp.randint('AB_LGB.min_child_samples', 6),
        default=5,
        transform=lambda v: 2 ** v,
        disable=False)

    def make_cls(self):
        cls = LGBMClassifier(
            n_estimators=2048,
            num_leaves=1024,
            learning_rate=self.learning_rate.get(),
            min_child_samples=self.min_child_samples.get(),
            subsample=0.75,
        )

        return pipeline.Pipeline([
            ('norm', preprocessing.Normalizer()),
            ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)),
            ('lgb', AutoExitingGBMLike(cls, additional_fit_args={'verbose': False}))
        ])

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_ab_lgbm',
            'lr_{:f}_mc_{:d}'.format(self.learning_rate.get(), self.min_child_samples.get()),
            str(self.fold)
        )
        return (base_path + fname).get()
コード例 #11
0
ファイル: rf_wc_sklearn.py プロジェクト: ririw/kaggle-quora
class WC_XTC(WCSklearn):
    min_leaf_samples = hyper_helper.TuneableHyperparam(
        name='WC_XTC_min_leaf_samples',
        prior=hyperopt.hp.randint('WC_XTC_min_leaf_samples', 20),
        default=12,
        transform=lambda v: (v + 1) * 5)

    def make_cls(self):
        return ensemble.ExtraTreesClassifier(
            n_jobs=-1,
            n_estimators=500,
            min_samples_leaf=self.min_leaf_samples.get())

    def make_path(self, fname):
        base_path = BaseTargetBuilder(
            'rf_wc_xtc', 'ls_{:d}'.format(self.min_leaf_samples.get()),
            str(self.fold))
        return (base_path + fname).get()
コード例 #12
0
class XGBoostClassifier(FoldDependent):
    resources = {'cpu': 7}

    max_depth = hyper_helper.TuneableHyperparam(
        name='XGBoostClassifier_max_depth',
        prior=hyperopt.hp.randint('XGBoostClassifier_max_depth', 12),
        default=10,
        transform=lambda x: x + 1)
    eta = hyper_helper.TuneableHyperparam(name='XGBoostClassifier_eta',
                                          prior=hyperopt.hp.normal(
                                              'XGBoostClassifier_eta', 0,
                                              0.25),
                                          default=0.09948116387307111,
                                          transform=lambda x: np.abs(x))
    n_est = hyper_helper.TuneableHyperparam(name='XGBoostClassifier_n_est',
                                            prior=hyperopt.hp.randint(
                                                'XGBoostClassifier_n_est',
                                                750),
                                            default=583,
                                            transform=lambda x: x + 100)

    def _load(self, name, as_df):
        assert name in {'test', 'valid'}
        fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/{:s}.npy'.format(
            self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold,
            name)
        if as_df:
            return pandas.Series(np.load(fn), name='XGBoost').to_frame()
        else:
            return np.load(fn)

    def output(self):
        fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/done'.format(
            self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold)

        return luigi.LocalTarget(fn)

    def requires(self):
        yield abhishek_feats.AbhishekFeatures()
        yield xval_dataset.BaseDataset()

    def run(self):
        self.output().makedirs()
        X = abhishek_feats.AbhishekFeatures().load('train', self.fold)
        y = xval_dataset.BaseDataset().load('train', self.fold).squeeze()
        cls = xgbsk.XGBClassifier(max_depth=self.max_depth.get(),
                                  learning_rate=self.eta.get(),
                                  n_estimators=self.n_est.get())
        X_tr, X_va, y_tr, y_va = model_selection.train_test_split(
            X, y, test_size=0.05)
        cls.fit(X_tr,
                y_tr,
                sample_weight=core.weight_from(y_tr),
                eval_set=[(X_va, y_va)],
                early_stopping_rounds=10)

        validX = abhishek_feats.AbhishekFeatures().load('valid', self.fold)
        y = xval_dataset.BaseDataset().load('valid', self.fold).squeeze()
        y_pred = cls.predict_proba(validX)[:, 1]
        score = core.score_data(y, y_pred)
        scorestr = "{:s} = {:f}".format(repr(self), score)
        print(colors.green | colors.bold | scorestr)

        valid_fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/valid.npy'.format(
            self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold)

        np.save(valid_fn, y_pred)

        trainX = abhishek_feats.AbhishekFeatures().load('test', None)
        pred = cls.predict_proba(trainX)[:, 1]

        test_fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/test.npy'.format(
            self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold)
        np.save(test_fn, pred)

        with self.output().open('w') as f:
            cols = abhishek_feats.AbhishekFeatures().load('valid',
                                                          self.fold,
                                                          as_df=True).columns
            v = pandas.Series(cls.feature_importances_,
                              index=cols).sort_values()
            v.to_csv(f)
            f.write("\n\n")
            f.write(scorestr)
            f.write("\n")
        return score
コード例 #13
0
class LogitClassifier(FoldDependent, HyperTuneable):
    def score(self):
        assert self.complete()
        return self.train()[0]

    resources = {'cpu': 1}
    C = hyper_helper.TuneableHyperparam("LogitClassifier_C",
                                        prior=hyperopt.hp.normal(
                                            'LogitClassifier_C', 0, 10),
                                        default=56.9600392248474,
                                        transform=np.abs)
    npoly = hyper_helper.TuneableHyperparam("LogitClassifier_npoly",
                                            prior=hyperopt.hp.randint(
                                                'LogitClassifier_npoly', 3),
                                            default=1,
                                            transform=lambda v: v + 1)

    def _load(self, name, as_df):
        assert name in {'test', 'valid'}
        fn = 'cache/abhishek/logit/{:f}/{:d}/{:s}.npy'.format(
            self.C.get(), self.fold, name)
        if as_df:
            return pandas.DataFrame({'LogitClassifier': np.load(fn)})
        else:
            return np.load(fn)

    def output(self):
        return luigi.LocalTarget('cache/abhishek/logit/{:f}/{:d}/done'.format(
            self.C.get(), self.fold))

    def requires(self):
        yield abhishek_feats.AbhishekFeatures()
        yield xval_dataset.BaseDataset()

    def train(self):
        self.output().makedirs()
        preproc = pipeline.Pipeline([
            ('norm', preprocessing.Normalizer()),
            ('poly', preprocessing.PolynomialFeatures(self.npoly.get()))
        ])

        X = abhishek_feats.AbhishekFeatures().load('train',
                                                   self.fold,
                                                   as_df=True)
        X = preproc.fit_transform(X)
        y = xval_dataset.BaseDataset().load('train', self.fold).squeeze()
        cls = linear_model.LogisticRegression(C=self.C.get(),
                                              solver='sag',
                                              class_weight=core.dictweights)
        cls.fit(X, y)

        print('Validating')
        validX = abhishek_feats.AbhishekFeatures().load('valid', self.fold)
        validX = preproc.transform(validX)
        y = xval_dataset.BaseDataset().load('valid', self.fold).squeeze()
        y_pred = cls.predict_proba(validX)[:, 1]

        score = core.score_data(y, y_pred)
        np.save(
            'cache/abhishek/logit/{:f}/{:d}/valid.npy'.format(
                self.C.get(), self.fold), y_pred)

        return score, cls, preproc

    def run(self):
        self.output().makedirs()

        score, cls, preproc = self.train()
        scorestr = "{:s} = {:f}".format(repr(self), score)
        print(colors.green | colors.bold | scorestr)

        trainX = abhishek_feats.AbhishekFeatures().load('test', None)
        trainX = preproc.transform(trainX)
        pred = cls.predict_proba(trainX)[:, 1]
        np.save(
            'cache/abhishek/logit/{:f}/{:d}/test.npy'.format(
                self.C.get(), self.fold), pred)

        with self.output().open('w') as f:
            f.write(scorestr)
            f.write("\n")
#
        return score