from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import utils as ut config = ut.read_config_file('config.yaml') titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('categorical_imputer', pp.CategoricalImputer( variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('missing_indicator', pp.MissingIndicator( variables=config[2]['Feature_Groups'].get('numerical_to_impute'))), ('numerical_imputer', pp.NumericalImputer( variables=config[2]['Feature_Groups'].get('numerical_to_impute'))), ('cabin_variable', pp.ExtractFirstLetter( variables=config[2]['Feature_Groups'].get('categorical_vars')[1])), ('rare_label_encoder', pp.RareLabelCategoricalEncoder( tol=0.05, variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('categorical_encoder', pp.CategoricalEncoder( variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('scaler', StandardScaler()), ('linear_model', LogisticRegression(C=0.0005, random_state=0))])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [ ('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('numerical_inputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('extract_firstletter', pp.ExtractFirstLetter(variables=config.CABIN)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder( tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [ ('extract_cabin_letter', pp.ExtractFirstLetter(variables=config.CABIN)), ('missing_indicator', pp.MissingIndicator(variables=config.INDICATE_MISSING_VARS)), ('impute_categorical', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('impute_numerical', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('encode_rare', pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS, tol=0.05)), ('encode_categorical', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)), ])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline([ # complete with the list of steps from the preprocessors file # and the list of variables from the config ('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)), ('cabin_extractor', pp.ExtractFirstLetter(config.CABIN)), ('rare_labels', pp.RareLabelCategoricalEncoder(tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)) ])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp from config import * titanic_pipe = Pipeline( [ ('categorical_imputer', pp.CategoricalImputer(variables=CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(variables=NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=NUMERICAL_VARS)), ('extract_first_letter', pp.ExtractFirstLetter(variables=CABIN_VAR)), ('rare_label_encoding', pp.RareLabelCategoricalEncoder(tol=0.05, variables=CATEGORICAL_VARS)), ('categorical_encoding', pp.CategoricalEncoder(variables=CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)) ] )
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline([ ('first_letter_extractor', pp.ExtractFirstLetter(variables=config.CABIN)), ('missing_indicator', pp.MissingIndicator(variables=config.CATEGORICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('numerical_inputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('Linear_model', LogisticRegression(C=0.0005, random_state=0)) ])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('first_letter_extractor', pp.ExtractFirstLetter(variables=config.CABIN)), ('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS_TO_IMPUTE)), ('numerical_imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoding', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('Logistic_model', LogisticRegression(C=0.0005, random_state=0))])