def calibration(): steps = [] for n_estimators, k_folds in product(range(50,300,100), [2,5]): d = data.ClassificationData(target=True, n_samples=1000, n_features=100) est = step.Construct('sklearn.ensemble.RandomForestClassifier', n_estimators=n_estimators, name='estimator') fit = model.Fit(inputs=[est, d], return_estimator=True, target=True, name='uncalibrated') predict = model.Predict(inputs=[fit,d], target=True, name='y') cal = step.Construct('sklearn.calibration.CalibratedClassifierCV', cv=k_folds, inputs=[predict], inputs_mapping={'y':None}, name='calibrator') cal_est = model.FitPredict(inputs=[cal, d], target=True, name='calibrated') metrics = model.PrintMetrics([ {'metric':'baseline'}, {'metric':'precision', 'k':100}, {'metric':'precision', 'k':200}, {'metric':'precision', 'k':300}, ], inputs=[cal_est]) steps.append(metrics) return steps
def calibration(): steps = [] for n_estimators, k_folds in product(range(50, 300, 100), [2, 5]): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True est = step.Call(ensemble, 'RandomForestClassifier', n_estimators=n_estimators) fit = model.Fit(inputs=[est, d], return_estimator=True) fit.target = True predict = model.Predict(inputs=[fit, d]) predict.target = True cal = step.Call('sklearn.calibration.CalibratedClassifierCV', cv=k_folds, inputs=[MapResults([predict], {'y': None})]) cal_est = model.FitPredict(inputs=[cal, d]) cal_est.target = True steps.append(cal_est) return steps
def n_estimators_search(): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True predict = [] for n_estimators in range(1, 4): e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=n_estimators) f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True) p = model.Predict(inputs=[f, d]) p.target = True predict.append(p) return predict
def prediction(): # generate the data including a training and test split d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True # construct a random forest estimator e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=1) e.target = False # fit the estimator f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True) # make predictions p = model.Predict(inputs=[f, d]) p.target = True return p
def models(estimators, cv_search, transform_search): """ Grid search prediction workflows. Used by bll6_models, test_models, and product_models. Args: estimators: collection of steps, each of which constructs an estimator cv_search: dictionary of arguments to LeadCrossValidate to search over transform_search: dictionary of arguments to LeadTransform to search over Returns: a list drain.model.Predict steps constructed by taking the product of the estimators with the the result of drain.util.dict_product on each of cv_search and transform_search. Each Predict step contains the following in its inputs graph: - lead.model.cv.LeadCrossValidate - lead.model.transform.LeadTransform - drain.model.Fit """ steps = [] for cv_args, transform_args, estimator in product( dict_product(cv_search), dict_product(transform_search), estimators): cv = lead.model.cv.LeadCrossValidate(**cv_args) cv.name = 'cv' X_train = Call('__getitem__', inputs=[ MapResults( [cv], { 'X': 'obj', 'train': 'key', 'test': None, 'aux': None }) ]) mean = Call('mean', inputs=[X_train]) mean.name = 'mean' X_impute = Construct(data.impute, inputs=[ MapResults([cv], { 'aux': None, 'test': None, 'train': None }), MapResults([mean], 'value') ]) cv_imputed = MapResults([X_impute, cv], ['X', {'X': None}]) cv_imputed.target = True transform = lead.model.transform.LeadTransform(inputs=[cv_imputed], **transform_args) transform.name = 'transform' fit = model.Fit(inputs=[estimator, transform], return_estimator=True) fit.name = 'fit' y = model.Predict(inputs=[fit, transform], return_feature_importances=True) y.name = 'predict' y.target = True steps.append(y) return steps