def __init__(self):
        self.counters = np.load(os.path.join('data/', 'value_counters.npy'))

    def fit_transform(self, X, y=None):
        return self.transform(X), y

    def transform(self, X):
        var_cols = ['var_{}'.format(x) for x in range(200)]

        for i, c in enumerate(var_cols):
            X['count_' + c] = (X[c].map(self.counters[i]) == 1)

        return X


model.preprocess_online(UpsamplingPreprocessor, IsUniqueOnline)

fit_params = {'use_best_model': True, 'verbose': 5000, 'plot': True}
predict_params = {}
results = model.run(data_loader,
                    roc_auc_score,
                    fit_params,
                    predict_params,
                    verbose=True)

if args.save:
    current_file_path = os.path.abspath(__file__)  # to save this .py file
    model.save(data_loader, results, current_file_path, args.preds,
               args.models)
## << Create and train model
Esempio n. 2
0
        return self.transform(X), y

    def transform(self, X):
        var_cols = ['var_{}'.format(x) for x in range(200)]

        for i, c in enumerate(var_cols):
            if c not in self.to_exclude:
                X['varcount_' + c] = X[c].map(self.counters[i]).astype(
                    np.int32)
                X['unique_' + c] = (X['varcount_' + c] == 1)
                X['varcount_' + c] = X['varcount_' + c] * X[c]

        return X


model.preprocess_online(UpsamplingPreprocessor, CountersGeneratorOnline)

fit_params = {'use_best_model': True, 'verbose': 5000, 'plot': True}
predict_params = {}
results = model.run(data_loader,
                    roc_auc_score,
                    fit_params,
                    predict_params,
                    verbose=True)

if args.save:
    current_file_path = os.path.abspath(__file__)  # to save this .py file
    model.save(data_loader, results, current_file_path, args.preds,
               args.models)
## << Create and train model