Esempio n. 1
0
def calibration():
    steps = []
    for n_estimators, k_folds in product(range(50,300,100), [2,5]):
        d = data.ClassificationData(target=True, n_samples=1000, n_features=100)

        est = step.Construct('sklearn.ensemble.RandomForestClassifier',
                n_estimators=n_estimators, name='estimator') 

        fit = model.Fit(inputs=[est, d], return_estimator=True, target=True, name='uncalibrated')
        predict = model.Predict(inputs=[fit,d], target=True, name='y')

        cal = step.Construct('sklearn.calibration.CalibratedClassifierCV', cv=k_folds,
                inputs=[predict], inputs_mapping={'y':None}, name='calibrator')

        cal_est = model.FitPredict(inputs=[cal, d], target=True, name='calibrated')

        metrics = model.PrintMetrics([
                {'metric':'baseline'},
                {'metric':'precision', 'k':100},
                {'metric':'precision', 'k':200},
                {'metric':'precision', 'k':300},
        ], inputs=[cal_est])

        steps.append(metrics)

    return steps
Esempio n. 2
0
def forest():
    return [
        step.Construct('sklearn.ensemble.RandomForestClassifier',
                       n_estimators=500,
                       n_jobs=-1,
                       criterion='entropy',
                       balanced=True,
                       max_features='sqrt')
    ]
Esempio n. 3
0
def product_model():
    d = data.ClassificationData(target=True, n_samples=1000, n_features=100)
    est = step.Construct('sklearn.ensemble.RandomForestClassifier',
                n_estimators=10, name='estimator')

    m1 = model.FitPredict(inputs=[est, d], target=True, name='m1')
    m2 = model.FitPredict(inputs=[est, d], target=True, name='m2')

    p = model.PredictProduct(inputs=[m1,m2], target=True, inputs_mapping=['m1', 'm2'], name='p')

    return p
Esempio n. 4
0
def n_estimators_search():
    d = data.ClassificationData(n_samples=1000, n_features=100)
    d.target = True
    
    predict = []
    for n_estimators in range(1, 4):
        e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', 
                n_estimators=n_estimators)
        f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True)

        p = model.Predict(inputs=[f, d])
        p.target = True
        predict.append(p)
        
    return predict
Esempio n. 5
0
def forest(**update_kwargs):
    """
    Returns a step constructing a scikit-learn RandomForestClassifier
    """
    kwargs = dict(_class='sklearn.ensemble.RandomForestClassifier',
                  n_estimators=1000,
                  n_jobs=-1,
                  criterion='entropy',
                  class_weight='balanced_bootstrap',
                  max_features='sqrt',
                  random_state=0)

    kwargs.update(**update_kwargs)

    return [step.Construct(**kwargs)]
Esempio n. 6
0
def prediction():
    # generate the data including a training and test split
    d = data.ClassificationData(n_samples=1000, n_features=100)
    d.target = True

    # construct a random forest estimator
    e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=1)
    e.target = False

    # fit the estimator
    f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True)

    # make predictions
    p = model.Predict(inputs=[f, d])
    p.target = True
    return p
Esempio n. 7
0
def product_model():
    d = data.ClassificationData(n_samples=1000, n_features=100)
    d.target = True

    est = step.Construct(_class='sklearn.ensemble.RandomForestClassifier',
                n_estimators=10)
    est.name = 'estimator'

    m1 = model.FitPredict(inputs=[est, d])
    m1.target = True
    m1.name = 'm1'

    m2 = model.FitPredict(inputs=[est, d])
    m2.target = True
    m2.name = 'm2'

    p = model.PredictProduct(inputs=[m1,m2], inputs_mapping=['m1', 'm2'])
    p.target = True
    p.name = 'p'

    return p