from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = ["Overall Qual", "Overall Cond"] def calc_qual(df): return df["Overall Qual"] - df["Overall Cond"] transformer = SimpleFunctionTransformer(calc_qual) name = "Qual" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import NullFiller, SimpleFunctionTransformer input = ["Total Bsmt SF", "1st Flr SF", "2nd Flr SF"] transformer = [ SimpleFunctionTransformer(lambda df: df.sum(axis=1)), NullFiller() ] name = "Total Area" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = 'BsmtFin SF 2' def has_qual(ser): return (ser > 0).astype(int) transformer = SimpleFunctionTransformer(has_qual) name = 'Basement type 2 indicator' feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import GroupwiseTransformer, SimpleFunctionTransformer from sklearn.impute import SimpleImputer input = ['Lot Frontage', 'Neighborhood'] transformer = [ SimpleFunctionTransformer(lambda df: df.set_index('Neighborhood', append=True)), GroupwiseTransformer( SimpleImputer(strategy='median'), groupby_kwargs={'level': 'Neighborhood'}, handle_error='ignore', ), SimpleImputer(strategy='median'), ] name = 'Imputed lot frontage by median per neighborhood' feature = Feature(input=input, transformer=transformer, name=name)
+ b"AAAGNvcHlfWHEFiFgGAAAAbl9qb2JzcQZOWAUAAABjb2VmX3EHY251bXB5LmNvcmUubXVsd" + b"GlhcnJheQpfcmVjb25zdHJ1Y3QKcQhjbnVtcHkKbmRhcnJheQpxCUsAhXEKQwFicQuHcQxS" + b"cQ0oSwFLBoVxDmNudW1weQpkdHlwZQpxD1gCAAAAZjhxEEsASwGHcRFScRIoSwNYAQAAADx" + b"xE05OTkr/////Sv////9LAHRxFGKJQzCs1DbrA2ATwJ+EvJ2cgXZAgOdkq43NAUBPL1Ngi7" + b"xiwDcFZw6sG1VAsa1vEjH/c0BxFXRxFmJYCQAAAF9yZXNpZHVlc3EXY251bXB5LmNvcmUub" + b"XVsdGlhcnJheQpzY2FsYXIKcRhoEkMI3Eon+jyJq0JxGYZxGlJxG1gFAAAAcmFua19xHEsGWAkAAABzaW5ndWxhcl9xHW" + b"gIaAlLAIVxHmgLh3EfUnEgKEsBSwaFcSFoEolDMB/pi7yTBxpByuth8u3RrEDOZGHPHa6qQK+4epK+NaJADgShEyZ3mkC" + b"rrRv+vjeVQHEidHEjYlgKAAAAaW50ZXJjZXB0X3EkaBhoEkMIC+mioaOz/kBxJYZxJlJxJ1gQAAAAX3NrbGVhcm5fdmVy" + b"c2lvbnEoWAYAAAAwLjIyLjFxKXViLg==") def base64_model(df): magic = pickle.loads(base64.b64decode(secret)) X = df[keys].values X[np.isnan(X)] = 0.0 return magic.predict(X) input = keys transformer = [SimpleFunctionTransformer(base64_model)] name = "Base64 Model" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = ["Overall Qual", "Overall Cond"] transformer = SimpleFunctionTransformer( lambda df: df["Overall Qual"] - df["Overall Cond"] ) name = "Adjusted Overall Quality" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = 'Open Porch SF' transformer = SimpleFunctionTransformer(lambda ser: ser > 0) name = 'Has open porch' feature = Feature(input=input, transformer=transformer, name=name)
import pandas as pd from ballet import Feature from ballet.eng import SimpleFunctionTransformer from sklearn.preprocessing import OneHotEncoder def calc_porch_type(df): # Porch features total_porch_area = df.sum(axis=1) porch_type = pd.Series("Unknown", index=df.index) porch_type[(total_porch_area == df["Enclosed Porch"]) & (df["Enclosed Porch"] > 0)] = "Enclosed" porch_type[(total_porch_area == df["3Ssn Porch"]) & (df["3Ssn Porch"] > 0)] = "3Ssn" porch_type[(total_porch_area == df["Open Porch SF"]) & (df["Open Porch SF"] > 0)] = "Open" return porch_type input = ["Enclosed Porch", "3Ssn Porch", "Open Porch SF"] transformer = [ SimpleFunctionTransformer(calc_porch_type), OneHotEncoder(), ] name = "Porch Type (Cleaned, One-Hot Encoded)" feature = Feature(input=input, transformer=transformer, name=name)
import numpy as np from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = 'Lot Area' transformer = SimpleFunctionTransformer(np.sqrt) name = 'Sqrt Lot Area' feature = Feature(input=input, transformer=transformer, name=name)
import numpy as np from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = ["Lot Area", "Lot Frontage"] def fill_frontage(df): mask = df["Lot Frontage"].isnull() df["Lot Frontage"][mask] = np.sqrt(df["Lot Area"])[mask] return df["Lot Frontage"] transformer = SimpleFunctionTransformer(fill_frontage) name = "Lot Frontage Fill" feature = Feature(input=input, transformer=transformer, name=name)
import pandas as pd from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = ["Year Built", "Garage Yr Blt"] def calc_age(df): return df["Year Built"].where(pd.notnull, df["Garage Yr Blt"]) transformer = SimpleFunctionTransformer(calc_age) name = "Year built fill" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import SimpleFunctionTransformer input = ["Full Bath", "Half Bath"] transformer = SimpleFunctionTransformer(lambda df: df["Full Bath"] + df["Half Bath"]) name = "Total Bath" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import NullFiller, SimpleFunctionTransformer input = ["Yr Sold", "Year Remod/Add"] transformer = [ SimpleFunctionTransformer(lambda df: df["Yr Sold"] - df["Year Remod/Add"]), NullFiller(), ] name = "House Age" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import NullFiller, SimpleFunctionTransformer input = ["Yr Sold", "Year Remod/Add"] def calc_age(df): return df["Yr Sold"] - df["Year Remod/Add"] transformer = [SimpleFunctionTransformer(calc_age), NullFiller()] name = "Age" feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import NamedFramer, SimpleFunctionTransformer from sklearn.preprocessing import OneHotEncoder # Zeros in 020-090 get cut off. This feature prepends them back. def pad_zero(ser): return ser.astype(str).str.pad(3, side='left', fillchar='0') input = 'MS SubClass' transformer = [ SimpleFunctionTransformer(pad_zero), NamedFramer('MS SubClass'), OneHotEncoder(), ] name = 'Pad zeros in MS SubClass' feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature from ballet.eng import NullFiller, SimpleFunctionTransformer input = ["Total Bsmt SF", "1st Flr SF", "2nd Flr SF"] transformer = [SimpleFunctionTransformer(lambda df: df.sum(axis=1)), NullFiller()] name = "Total Area Calculation" feature = Feature(input=input, transformer=transformer, name=name)