def run_training(): """Train the model.""" data = pd.read_csv('titanic.csv') X_train, X_test, y_train, y_test = train_test_split(data.drop('survived', axis=1), data['survived'], test_size=0.2, random_state=0) print(X_train.head()) test_pipeline_1 = pp.CategoricalImputer(variables=config.CATEGORICAL_VARS) test_pipeline_2 = pp.NumericalImputer( variables=config.NUMERICAL_VARS_WITH_NA) test_pipeline_3 = pp.ExtractFirstLetter(variables=config.CABIN) test_pipeline_4 = pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS) X_train = test_pipeline_1.fit_transform(X_train) X_train = test_pipeline_2.fit_transform(X_train) X_train = test_pipeline_3.fit_transform(X_train) print() print(X_train.head()) print() X_train = test_pipeline_4.fit_transform(X_train) print(X_train.head())
titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [ ('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('numerical_inputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('extract_firstletter', pp.ExtractFirstLetter(variables=config.CABIN)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder( tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('Linear_model', LogisticRegression(C=0.0005, random_state=0)) ] )
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('cabin_variable', pp.ExtractFirstLetter(variables=config.CABIN)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.01, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('Linear_model', LogisticRegression(C=0.0005, random_state=0))])
import preprocessors as pp import utils as ut config = ut.read_config_file('config.yaml') titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('categorical_imputer', pp.CategoricalImputer( variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('missing_indicator', pp.MissingIndicator( variables=config[2]['Feature_Groups'].get('numerical_to_impute'))), ('numerical_imputer', pp.NumericalImputer( variables=config[2]['Feature_Groups'].get('numerical_to_impute'))), ('cabin_variable', pp.ExtractFirstLetter( variables=config[2]['Feature_Groups'].get('categorical_vars')[1])), ('rare_label_encoder', pp.RareLabelCategoricalEncoder( tol=0.05, variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('categorical_encoder', pp.CategoricalEncoder( variables=config[2]['Feature_Groups'].get('categorical_vars'))), ('scaler', StandardScaler()), ('linear_model', LogisticRegression(C=0.0005, random_state=0))])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('first_word_extractor', pp.ExtractFirstLetter(variables=config.CABIN)), ('frequent_label_encoder', pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('logistic_model', LogisticRegression(C=0.0005, random_state=0))])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('missing_indicator', pp.MissingIndicator(variables=config.NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('extract_first_letter', pp.ExtractFirstLetter(variables=config.CABIN)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('rare_label_categorical', pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('logistic_regression', LogisticRegression(C=0.0005, random_state=0))])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline([ # complete with the list of steps from the preprocessors file # and the list of variables from the config ('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)), ('cabin_extractor', pp.ExtractFirstLetter(config.CABIN)), ('rare_labels', pp.RareLabelCategoricalEncoder(tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)) ])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( [('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)), ('extract_first_letter', pp.ExtractFirstLetter(config.CABIN)), ('rare_label_categorical_encoder', pp.RareLabelCategoricalEncoder(0.05, config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('linear_model', LogisticRegression(C=0.0005, random_state=0))] # complete with the list of steps from the preprocessors file # and the list of variables from the config )
import preprocessors as pp import config titanic_pipe = Pipeline( [ ('missing_indicator', pp.MissingIndicator(variables = config.NUMERICAL_VARS_WITH_NA)), ('categorical_imputer', pp.CategoricalImputer(variables = config.CATEGORICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables = config.NUMERICAL_VARS_WITH_NA)), ('extract_first_letter', pp.ExtractFirstLetter(variables = config.CABIN)), ('rare_label_encoding', pp.ExtractFirstLetter(variables = config.CATEGORICAL_VARS)), ('categorical_encoding', pp.CategoricalEncoder(variables = config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)) ] )
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp from config import * titanic_pipe = Pipeline( [ ('categorical_imputer', pp.CategoricalImputer(variables=CATEGORICAL_VARS)), ('missing_indicator', pp.MissingIndicator(variables=NUMERICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=NUMERICAL_VARS)), ('extract_first_letter', pp.ExtractFirstLetter(variables=CABIN_VAR)), ('rare_label_encoding', pp.RareLabelCategoricalEncoder(tol=0.05, variables=CATEGORICAL_VARS)), ('categorical_encoding', pp.CategoricalEncoder(variables=CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('model', LogisticRegression(C=0.0005, random_state=0)) ] )
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( # complete with the list of steps from the preprocessors file # and the list of variables from the config [('missing_indicator', pp.MissingIndicator(variables=config.CATEGORICAL_VARS + config.NUMERICAL_VARS)), ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('numerical_imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('extract_first_letter', pp.ExtractFirstLetter(variables=[ 'cabin', ])), ('rare_label_categorical_encoder', pp.RareLabelCategoricalEncoder(tol=0.05, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS))])
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import preprocessors as pp import config titanic_pipe = Pipeline( [ ('Extract_First_Letter', pp.ExtractFirstLetter(variables=['cabin'])), ('Numerical_Imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)), ('Categorical_Imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)), ('Rare_Label_Categorical_Encoder', pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('Categorical_Encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('scaler', StandardScaler()), ('Linear_model', LogisticRegression(C=0.005, random_state=0)) ], verbose=True )