コード例 #1
0
def compare_classifiers(X,
                        y,
                        ests,
                        scoring,
                        trials,
                        inner_splits,
                        outer_splits=None,
                        randcv_budget=20):

    results = []

    # For every model type
    for label, steps, p_grid in ests:
        cv_scores = np.empty(trials, dtype=float)
        cv_estimators = np.empty(trials, dtype=object)

        # Collect results from multiple trials of (nested) CV
        # TODO: RepeatedKFold? RepeatedStratifiedKFold?
        for trial in range(trials):
            est = imb_pipe.Pipeline(steps)
            cv_scores[trial], cv_estimators[trial] = nested_cv(
                X, y, est, p_grid, scoring, inner_splits, outer_splits,
                randcv_budget)
        results.append((label, cv_scores, cv_estimators))

    return results
コード例 #2
0
def no_resample(classifier):
    print('*** NO RESAMPLE ***')

    pipe = pipeline.Pipeline([
        ('scaler', preprocessing.StandardScaler()),
        classifier,
    ])

    X, y = prepare_data()

    y_pred = model_selection.cross_val_predict(pipe, X, y, cv=cv, n_jobs=-1)

    c = Counter(y)

    return y, y_pred, c
コード例 #3
0
def undersample(classifier):
    print('*** UNDERSAMPLE ***')

    pipe = pipeline.Pipeline([
        ('scaler', preprocessing.StandardScaler()),
        ('resample', under_sampling.RandomUnderSampler()),
        classifier,
    ])

    X, y = prepare_data()

    y_pred = model_selection.cross_val_predict(pipe, X, y, cv=cv, n_jobs=-1)

    c = Counter(under_sampling.RandomUnderSampler().fit_sample(X, y)[1])

    return y, y_pred, c
コード例 #4
0

def load_rutgers_with_quantiles():
    from glob import glob
    files = glob('../../featureGenerator/datasets/dataset-2-rutgers-wifi' +
                 '/with-quantiles/*.csv',
                 recursive=True)
    traces = [parse_rutgers_with_quantiles(df) for df in files]
    return traces


cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)

pipe_logreg = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    ('clf', linear_model.LogisticRegression()),
])

pipe_dtree = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    ('clf', tree.DecisionTreeClassifier()),
])


@memory.cache
def prepare_data():
    dataset = load_rutgers_with_quantiles()
    print('Rutgers loaded ...')
コード例 #5
0
    elif prr <= 0.1:
        return 'bad'
    else:
        return 'interm.'


features = ['rssi', 'rssi_std', 'rssi_avg']

cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)

#classifier = ('logreg', linear_model.LogisticRegression(solver='lbfgs', multi_class='ovr'))
classifier = ('dtree', tree.DecisionTreeClassifier())

pipe = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    classifier,
])


@memory.cache
def constant(pipe):
    print('*** ZERO PADDING interpolation ***')

    dataset = []
    for df in load_rutgers():
        df.loc[df['received'] == 0, 'rssi'] = np.nan
        dataset.append(df)
    print('Rutgers loaded ...')

    dataset = CustomInterpolation(source='rssi', strategy='constant', constant=0).fit_transform(dataset)
コード例 #6
0
    else:
        return 'interm.'


@memory.cache
def load_rutgers():
    return list(get_traces())


features = ['rssi', 'rssi_std', 'rssi_avg']

cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)

pipe = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    #('clf', tree.DecisionTreeClassifier(max_depth=3)),
    ('linear', linear_model.LogisticRegression(solver='ovr')),
])


@memory.cache
def different_window_sizes(W_PRR, W_HISTORY):
    print(f'*** PRR={W_PRR}, HISTORY={W_HISTORY} ***')

    dataset = load_rutgers()
    print('Rutgers loaded ...')

    dataset = CustomInterpolation(source='rssi',
                                  strategy='constant',
                                  constant=0).fit_transform(dataset)
    print('Interpolation applied ...')
コード例 #7
0
features_pipeline = ppl.make_union(engineered_feature_pipeline1,
                                   engineered_feature_pipeline2,
                                   engineered_feature_pipeline3,
                                   engineered_feature_pipeline4)

sampling_pipeline = imbppl.make_pipeline(
    over_sampling.RandomOverSampler(random_state=9565))

model_pipeline = imbppl.make_pipeline(
    LogisticRegression(multi_class='multinomial',
                       penalty='l2',
                       random_state=9546,
                       solver="lbfgs"))

pipe = imbppl.Pipeline([('prep', features_pipeline),
                        ('sample', sampling_pipeline),
                        ('clf', model_pipeline)])

y = d_in.hand
X = d_in.loc[:, 's1':'c5']  # produces a copy

# split - results in < 5 observations for a the smallest class (need for sampling)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=0.8,
                                                    stratify=y,
                                                    random_state=9565)

# training individual steps
X_tr_feat = features_pipeline.fit_transform(X_train, y_train)
コード例 #8
0
    # Every feature
    #[x[0] + x[1] for x in it.product(['rssi', 'rssi_avg', 'rssi_std'], ['^-4', '^-3', '^-2', '', '^-1', '^2', '^3', '^4'])]
]


@memory.cache
def load_rutgers():
    return list(get_traces())


cv = model_selection.StratifiedKFold(n_splits=10, shuffle=True)

pipe_logreg = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    ('logistic',
     linear_model.LogisticRegression(solver='lbfgs',
                                     max_iter=1e3,
                                     multi_class='ovr')),
])

pipe_dtree = pipeline.Pipeline([
    ('scaler', preprocessing.StandardScaler()),
    ('resample', over_sampling.RandomOverSampler()),
    ('DTree', tree.DecisionTreeClassifier()),
])


@memory.cache
def prepare_data():
    dataset = load_rutgers()
    print('Rutgers loaded ...')