class WC_LGB(WCSklearn): learning_rate = hyper_helper.TuneableHyperparam( 'WC_LGB.learning_rate', prior=hyperopt.hp.normal('WC_LGB.learning_rate', 0, 0.25), default=0.05778176353527653, transform=np.abs, disable=False) min_child_samples = hyper_helper.TuneableHyperparam( 'WC_LGB.min_child_samples', prior=hyperopt.hp.randint('WC_LGB.min_child_samples', 6), default=3, transform=lambda v: 2**v, disable=False) def make_cls(self): cls = LGBMClassifier( n_estimators=2048, num_leaves=1024, learning_rate=self.learning_rate.get(), min_child_samples=self.min_child_samples.get(), subsample=0.75, ) return AutoExitingGBMLike(cls, additional_fit_args={'verbose': False}) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_wc_lgbm', 'lr_{:f}_mc_{:d}'.format(self.learning_rate.get(), self.min_child_samples.get()), str(self.fold)) return (base_path + fname).get()
class WC_XGB(WCSklearn): max_depth = hyper_helper.TuneableHyperparam('WC_XGB.max_depth', prior=hyperopt.hp.randint( 'WC_XGB.max_depth', 11), default=9, transform=lambda v: v + 1, disable=False) learning_rate = hyper_helper.TuneableHyperparam('WC_XGB.learning_rate', prior=hyperopt.hp.normal( 'WC_XGB.learning_rate', 0, 0.25), default=0.05, transform=np.abs, disable=False) def make_cls(self): return AutoExitingGBMLike( XGBClassifier(n_estimators=2048, learning_rate=self.learning_rate.get(), max_depth=self.max_depth.get(), subsample=0.75)) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_wc_xgb', 'lr_{:f}_md_{:d}'.format(self.learning_rate.get(), self.max_depth.get()), str(self.fold)) return (base_path + fname).get()
class AB_XTC(ABSklearn): min_items = hyper_helper.TuneableHyperparam( 'AB_XTC.min_items', prior=hyperopt.hp.randint('AB_XTC.min_items', 9), transform=lambda v: 2 ** v, default=4, disable=False) poly_fetures = hyper_helper.TuneableHyperparam( 'AB_XTC.poly_fetures', prior=hyperopt.hp.randint('AB_XTC.poly_fetures', 2), transform=lambda v: v + 1, default=1, disable=False) def make_cls(self): inner_cls = sklearn.ensemble.ExtraTreesClassifier( n_estimators=512, n_jobs=-1, verbose=1, class_weight=core.dictweights, min_samples_leaf=self.min_items.get()) return pipeline.Pipeline([ ('norm', preprocessing.MinMaxScaler(feature_range=(-1, 1))), ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)), ('xtc', inner_cls) ]) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_ab_xtc', 'min_items_{:f}_pf_{:d}'.format(self.min_items.get(), self.poly_fetures.get()), str(self.fold) ) return (base_path + fname).get()
class AB_XGB(ABSklearn): max_depth = hyper_helper.TuneableHyperparam( 'AB_XGB.max_depth', prior=hyperopt.hp.randint('AB_XGB.max_depth', 11), default=8, transform=lambda v: v + 1, disable=False) learning_rate = hyper_helper.TuneableHyperparam( 'AB_XGB.learning_rate', prior=hyperopt.hp.normal('AB_XTC.learning_rate', 0, 0.25), default=-0.27617359262812374, transform=np.abs, disable=False) def make_cls(self): cls = XGBClassifier( n_estimators=2048, learning_rate=self.learning_rate.get(), max_depth=self.max_depth.get(), subsample=0.75) return pipeline.Pipeline([ ('norm', preprocessing.Normalizer()), ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)), ('lgb', AutoExitingGBMLike(cls)) ]) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_ab_xgb', 'lr_{:f}_md_{:d}'.format(self.learning_rate.get(), self.max_depth.get()), str(self.fold) ) return (base_path + fname).get()
class AllFeatureLGB(RF_SKLearn): resources = {'cpu': 7} min_items = hyper_helper.TuneableHyperparam( 'SmallFeatureLGB.min_items', prior=hyperopt.hp.randint('SmallFeatureLGB.min_items', 9), transform=lambda v: 2**v, default=2, disable=False) learning_rate = hyper_helper.TuneableHyperparam( 'SmallFeatureLGB.learning_rate', prior=hyperopt.hp.uniform('SmallFeatureLGB.learning_rate', 0, 0.4), default=0.02, disable=False) def xdataset(self) -> FoldIndependent: return AllFeaturesTask() def make_cls(self): return AutoExitingGBMLike( lightgbm.sklearn.LGBMClassifier( n_estimators=1024, num_leaves=1024, min_child_samples=self.min_items.get(), learning_rate=self.learning_rate.get())) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_all_feat', 'lgb', 'mi_{:d}_lr_{:f}'.format(self.min_items.get(), self.learning_rate.get()), str(self.fold)) return (base_path + fname).get()
class AllFeatureXGB(RF_SKLearn): resources = {'cpu': 7} max_depth = hyper_helper.TuneableHyperparam( 'SmallFeatureXGB.max_depth', prior=hyperopt.hp.randint('SmallFeatureXGB.max_depth', 11), default=8, transform=lambda v: v + 1, disable=False) learning_rate = hyper_helper.TuneableHyperparam( 'SmallFeatureXGB.learning_rate', prior=hyperopt.hp.normal('SmallFeatureXGB.learning_rate', 0, 0.25), default=.05, transform=np.abs, disable=False) def xdataset(self) -> FoldIndependent: return AllFeaturesTask() def make_cls(self): return AutoExitingGBMLike( XGBClassifier(n_estimators=2048, learning_rate=self.learning_rate.get(), max_depth=self.max_depth.get(), subsample=0.75)) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_all_feat', 'xgb', 'md_{:d}_lr_{:f}'.format(self.max_depth.get(), self.learning_rate.get()), str(self.fold)) return (base_path + fname).get()
class SmallFeatureXTC(RF_SKLearn): resources = {'cpu': 7} min_items = hyper_helper.TuneableHyperparam( 'SmallFeatureXTC.min_items', prior=hyperopt.hp.randint('SmallFeatureXTC.min_items', 9), transform=lambda v: 2 ** v, default=2, disable=False) def xdataset(self) -> FoldIndependent: return SmallFeaturesTask() def make_cls(self): return ensemble.ExtraTreesClassifier( n_estimators=512, n_jobs=-1, min_samples_leaf=self.min_items.get(), class_weight=core.dictweights) def post_fit(self, cls): xs = SmallFeaturesTask().load('train', 0, as_df=True) series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40] print(colors.yellow | str(series)) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_small_feat', 'xtc', 'mi_{:d}'.format(self.min_items.get()), str(self.fold) ) return (base_path + fname).get()
class SmallFeatureXGB(RF_SKLearn): resources = {'cpu': 7} max_depth = hyper_helper.TuneableHyperparam( 'SmallFeatureXGB.max_depth', prior=hyperopt.hp.randint('SmallFeatureXGB.max_depth', 11), default=8, transform=lambda v: v + 1, disable=False) learning_rate = hyper_helper.TuneableHyperparam( 'SmallFeatureXGB.learning_rate', prior=hyperopt.hp.normal('SmallFeatureXGB.learning_rate', 0, 0.25), default=.05, transform=np.abs, disable=False) def xdataset(self) -> FoldIndependent: return SmallFeaturesTask() def make_cls(self): return AutoExitingGBMLike( XGBClassifier( n_estimators=2048, learning_rate=self.learning_rate.get(), max_depth=self.max_depth.get(), subsample=0.75) ) def post_fit(self, cls): xs = SmallFeaturesTask().load('train', 0, as_df=True) series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40] print(colors.yellow | str(series)) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_small_feat', 'xgb', 'md_{:d}_lr_{:f}'.format(self.max_depth.get(), self.learning_rate.get()), str(self.fold) ) return (base_path + fname).get()
class SmallFeatureLGB(RF_SKLearn): resources = {'cpu': 7} min_items = hyper_helper.TuneableHyperparam( 'SmallFeatureLGB.min_items', prior=hyperopt.hp.randint('SmallFeatureLGB.min_items', 9), transform=lambda v: 2 ** v, default=2, disable=False) learning_rate = hyper_helper.TuneableHyperparam( 'SmallFeatureLGB.learning_rate', prior=hyperopt.hp.uniform('SmallFeatureLGB.learning_rate', 0, 0.4), default=0.02, disable=False) def post_fit(self, cls): xs = SmallFeaturesTask().load('train', 0, as_df=True) series = pandas.Series(cls.feature_importances_, index=xs.columns).sort_values(ascending=False)[:40] print(colors.yellow | str(series)) def xdataset(self) -> FoldIndependent: return SmallFeaturesTask() def make_cls(self): return AutoExitingGBMLike( lightgbm.sklearn.LGBMClassifier( n_estimators=1024, num_leaves=1024, min_child_samples=self.min_items.get(), learning_rate=self.learning_rate.get() ) ) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_small_feat', 'lgb', 'mi_{:d}_lr_{:f}'.format(self.min_items.get(), self.learning_rate.get()), str(self.fold) ) return (base_path + fname).get()
class AB_LGB(ABSklearn): learning_rate = hyper_helper.TuneableHyperparam( 'AB_LGB.learning_rate', prior=hyperopt.hp.normal('AB_LGB.learning_rate', 0, 0.25), default=0.0932165701272348, transform=np.abs, disable=False) min_child_samples = hyper_helper.TuneableHyperparam( 'AB_LGB.min_child_samples', prior=hyperopt.hp.randint('AB_LGB.min_child_samples', 6), default=5, transform=lambda v: 2 ** v, disable=False) def make_cls(self): cls = LGBMClassifier( n_estimators=2048, num_leaves=1024, learning_rate=self.learning_rate.get(), min_child_samples=self.min_child_samples.get(), subsample=0.75, ) return pipeline.Pipeline([ ('norm', preprocessing.Normalizer()), ('poly', preprocessing.PolynomialFeatures(2, include_bias=False)), ('lgb', AutoExitingGBMLike(cls, additional_fit_args={'verbose': False})) ]) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_ab_lgbm', 'lr_{:f}_mc_{:d}'.format(self.learning_rate.get(), self.min_child_samples.get()), str(self.fold) ) return (base_path + fname).get()
class WC_XTC(WCSklearn): min_leaf_samples = hyper_helper.TuneableHyperparam( name='WC_XTC_min_leaf_samples', prior=hyperopt.hp.randint('WC_XTC_min_leaf_samples', 20), default=12, transform=lambda v: (v + 1) * 5) def make_cls(self): return ensemble.ExtraTreesClassifier( n_jobs=-1, n_estimators=500, min_samples_leaf=self.min_leaf_samples.get()) def make_path(self, fname): base_path = BaseTargetBuilder( 'rf_wc_xtc', 'ls_{:d}'.format(self.min_leaf_samples.get()), str(self.fold)) return (base_path + fname).get()
class XGBoostClassifier(FoldDependent): resources = {'cpu': 7} max_depth = hyper_helper.TuneableHyperparam( name='XGBoostClassifier_max_depth', prior=hyperopt.hp.randint('XGBoostClassifier_max_depth', 12), default=10, transform=lambda x: x + 1) eta = hyper_helper.TuneableHyperparam(name='XGBoostClassifier_eta', prior=hyperopt.hp.normal( 'XGBoostClassifier_eta', 0, 0.25), default=0.09948116387307111, transform=lambda x: np.abs(x)) n_est = hyper_helper.TuneableHyperparam(name='XGBoostClassifier_n_est', prior=hyperopt.hp.randint( 'XGBoostClassifier_n_est', 750), default=583, transform=lambda x: x + 100) def _load(self, name, as_df): assert name in {'test', 'valid'} fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/{:s}.npy'.format( self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold, name) if as_df: return pandas.Series(np.load(fn), name='XGBoost').to_frame() else: return np.load(fn) def output(self): fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/done'.format( self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold) return luigi.LocalTarget(fn) def requires(self): yield abhishek_feats.AbhishekFeatures() yield xval_dataset.BaseDataset() def run(self): self.output().makedirs() X = abhishek_feats.AbhishekFeatures().load('train', self.fold) y = xval_dataset.BaseDataset().load('train', self.fold).squeeze() cls = xgbsk.XGBClassifier(max_depth=self.max_depth.get(), learning_rate=self.eta.get(), n_estimators=self.n_est.get()) X_tr, X_va, y_tr, y_va = model_selection.train_test_split( X, y, test_size=0.05) cls.fit(X_tr, y_tr, sample_weight=core.weight_from(y_tr), eval_set=[(X_va, y_va)], early_stopping_rounds=10) validX = abhishek_feats.AbhishekFeatures().load('valid', self.fold) y = xval_dataset.BaseDataset().load('valid', self.fold).squeeze() y_pred = cls.predict_proba(validX)[:, 1] score = core.score_data(y, y_pred) scorestr = "{:s} = {:f}".format(repr(self), score) print(colors.green | colors.bold | scorestr) valid_fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/valid.npy'.format( self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold) np.save(valid_fn, y_pred) trainX = abhishek_feats.AbhishekFeatures().load('test', None) pred = cls.predict_proba(trainX)[:, 1] test_fn = 'cache/abhishek/xgb/maxdepth_{:d}_eta_{:f}_nest_{:d}/{:d}/test.npy'.format( self.max_depth.get(), self.eta.get(), self.n_est.get(), self.fold) np.save(test_fn, pred) with self.output().open('w') as f: cols = abhishek_feats.AbhishekFeatures().load('valid', self.fold, as_df=True).columns v = pandas.Series(cls.feature_importances_, index=cols).sort_values() v.to_csv(f) f.write("\n\n") f.write(scorestr) f.write("\n") return score
class LogitClassifier(FoldDependent, HyperTuneable): def score(self): assert self.complete() return self.train()[0] resources = {'cpu': 1} C = hyper_helper.TuneableHyperparam("LogitClassifier_C", prior=hyperopt.hp.normal( 'LogitClassifier_C', 0, 10), default=56.9600392248474, transform=np.abs) npoly = hyper_helper.TuneableHyperparam("LogitClassifier_npoly", prior=hyperopt.hp.randint( 'LogitClassifier_npoly', 3), default=1, transform=lambda v: v + 1) def _load(self, name, as_df): assert name in {'test', 'valid'} fn = 'cache/abhishek/logit/{:f}/{:d}/{:s}.npy'.format( self.C.get(), self.fold, name) if as_df: return pandas.DataFrame({'LogitClassifier': np.load(fn)}) else: return np.load(fn) def output(self): return luigi.LocalTarget('cache/abhishek/logit/{:f}/{:d}/done'.format( self.C.get(), self.fold)) def requires(self): yield abhishek_feats.AbhishekFeatures() yield xval_dataset.BaseDataset() def train(self): self.output().makedirs() preproc = pipeline.Pipeline([ ('norm', preprocessing.Normalizer()), ('poly', preprocessing.PolynomialFeatures(self.npoly.get())) ]) X = abhishek_feats.AbhishekFeatures().load('train', self.fold, as_df=True) X = preproc.fit_transform(X) y = xval_dataset.BaseDataset().load('train', self.fold).squeeze() cls = linear_model.LogisticRegression(C=self.C.get(), solver='sag', class_weight=core.dictweights) cls.fit(X, y) print('Validating') validX = abhishek_feats.AbhishekFeatures().load('valid', self.fold) validX = preproc.transform(validX) y = xval_dataset.BaseDataset().load('valid', self.fold).squeeze() y_pred = cls.predict_proba(validX)[:, 1] score = core.score_data(y, y_pred) np.save( 'cache/abhishek/logit/{:f}/{:d}/valid.npy'.format( self.C.get(), self.fold), y_pred) return score, cls, preproc def run(self): self.output().makedirs() score, cls, preproc = self.train() scorestr = "{:s} = {:f}".format(repr(self), score) print(colors.green | colors.bold | scorestr) trainX = abhishek_feats.AbhishekFeatures().load('test', None) trainX = preproc.transform(trainX) pred = cls.predict_proba(trainX)[:, 1] np.save( 'cache/abhishek/logit/{:f}/{:d}/test.npy'.format( self.C.get(), self.fold), pred) with self.output().open('w') as f: f.write(scorestr) f.write("\n") # return score