def show_histo(df, bins=20): """ plot histograms of columns """ assert(isinstance(df, pd.DataFrame)) for c in numeric_cols(df): df[c].hist(bins=bins) plt.title(c) plt.show()
def get_pipeline(est, is_tree, is_regressor, params): name = model_name(est) if name.startswith('Dummy'): ppl = Pipeline([ ('ft', FunctionTransformer()), ('mo', est) ]) params['ft__func'] = [lambda x:x[numeric_cols(x)]] params['ft__validate'] = [False] elif is_tree: ppl = Pipeline([ ('da', DateEncoder()), ('du', OrdinalEncoder()), ('ft', FunctionTransformer()), ('se', SelectKBest2()), ('mo', est) ]) params['da__ascategory'] = [False] params['du__drop_invariant'] = [True] params['ft__func'] = [lambda x:x.fillna(-999)] params['ft__validate'] = [False] params['se__score_func'] = get_selector(is_regressor, is_tree) params['se__k'] = [0.2, 0.5, 0.8, 1000, 1000] else: ppl = Pipeline([ ('da', DateEncoder()), ('en', FeatureUnion([ ('nu', Pipeline([('ft', FunctionTransformer()), ('in', Imputer()), ('sc', TransformerWrap(StandardScaler()))])), ('ca', Pipeline([('ft', FunctionTransformer()), ('sc', SparseCatEncoder())])) ])), ('fu', FeatureUnion([('se', SelectKBest2()), ('dr', TruncatedSVD2())])), ('mo', est) ]) params['en__nu__ft__func'] = [lambda x:x[numeric_cols(x)]] params['en__nu__ft__validate'] = [False] params['en__ca__ft__func'] = [lambda x:x[object_cols(x)]] params['en__ca__ft__validate'] = [False] params['fu__se__score_func'] = get_selector(is_regressor, is_tree) params['fu__se__k'] = [0.2, 0.5, 0.8, 1000] params['fu__dr__k'] = [0.2, 0.5, 0.8, 1000] return name, ppl, params
def transform(self, X, y=None): return X[numeric_cols(X)]
#x, y = get_iris() print_summary(x) ppl = Pipeline([ ('in', ConstantInputer()), ("da", DateEncoder()), ('en', FeatureUnion([('nu', Pipeline([('ft', FunctionTransformer()), ("sc", TransformerWrap(StandardScaler()))])), ('ca', make_pipeline(FunctionTransformer(), SparseCatEncoder(), FunctionTransformer()))])), ('fi', make_union(SelectKBest2(), TruncatedSVD2())) ]) params = { 'en__nu__ft__func': lambda x: x[numeric_cols(x)], 'en__nu__ft__validate': False, 'en__ca__functiontransformer-1__func': lambda x: x[object_cols(x)], 'en__ca__functiontransformer-1__validate': False, 'en__ca__functiontransformer-2__func': lambda x: x.loc[:, x.nunique() > 1], 'en__ca__functiontransformer-2__validate': False } ppl.set_params(**params) xt = ppl.fit_transform(x, y) print_summary(xt)