def __init__(self): self.counters = np.load(os.path.join('data/', 'value_counters.npy')) def fit_transform(self, X, y=None): return self.transform(X), y def transform(self, X): var_cols = ['var_{}'.format(x) for x in range(200)] for i, c in enumerate(var_cols): X['count_' + c] = (X[c].map(self.counters[i]) == 1) return X model.preprocess_online(UpsamplingPreprocessor, IsUniqueOnline) fit_params = {'use_best_model': True, 'verbose': 5000, 'plot': True} predict_params = {} results = model.run(data_loader, roc_auc_score, fit_params, predict_params, verbose=True) if args.save: current_file_path = os.path.abspath(__file__) # to save this .py file model.save(data_loader, results, current_file_path, args.preds, args.models) ## << Create and train model
return self.transform(X), y def transform(self, X): var_cols = ['var_{}'.format(x) for x in range(200)] for i, c in enumerate(var_cols): if c not in self.to_exclude: X['varcount_' + c] = X[c].map(self.counters[i]).astype( np.int32) X['unique_' + c] = (X['varcount_' + c] == 1) X['varcount_' + c] = X['varcount_' + c] * X[c] return X model.preprocess_online(UpsamplingPreprocessor, CountersGeneratorOnline) fit_params = {'use_best_model': True, 'verbose': 5000, 'plot': True} predict_params = {} results = model.run(data_loader, roc_auc_score, fit_params, predict_params, verbose=True) if args.save: current_file_path = os.path.abspath(__file__) # to save this .py file model.save(data_loader, results, current_file_path, args.preds, args.models) ## << Create and train model