Exemple #1
0
def run_training():
    """Train the model."""

    data = pd.read_csv('titanic.csv')
    X_train, X_test, y_train, y_test = train_test_split(data.drop('survived',
                                                                  axis=1),
                                                        data['survived'],
                                                        test_size=0.2,
                                                        random_state=0)

    print(X_train.head())
    test_pipeline_1 = pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)
    test_pipeline_2 = pp.NumericalImputer(
        variables=config.NUMERICAL_VARS_WITH_NA)
    test_pipeline_3 = pp.ExtractFirstLetter(variables=config.CABIN)
    test_pipeline_4 = pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)
    X_train = test_pipeline_1.fit_transform(X_train)
    X_train = test_pipeline_2.fit_transform(X_train)
    X_train = test_pipeline_3.fit_transform(X_train)
    print()
    print(X_train.head())
    print()
    X_train = test_pipeline_4.fit_transform(X_train)

    print(X_train.head())
Exemple #2
0

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [
        ('missing_indicator',
        	pp.MissingIndicator(variables=config.NUMERICAL_VARS)),

        ('categorical_imputer',
            pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
         
        ('numerical_inputer',
            pp.NumericalImputer(variables=config.NUMERICAL_VARS)),

        ('extract_firstletter',
        	pp.ExtractFirstLetter(variables=config.CABIN)),

        ('rare_label_encoder',
        	pp.RareLabelCategoricalEncoder(
        		tol=0.05,
        		variables=config.CATEGORICAL_VARS)),

        ('categorical_encoder',
            pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),

        ('scaler', StandardScaler()),
        ('Linear_model', LogisticRegression(C=0.0005, random_state=0))
    ]

)
Exemple #3
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('categorical_imputer',
      pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
     ('missing_indicator',
      pp.MissingIndicator(variables=config.NUMERICAL_VARS)),
     ('numerical_imputer',
      pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
     ('cabin_variable', pp.ExtractFirstLetter(variables=config.CABIN)),
     ('rare_label_encoder',
      pp.RareLabelCategoricalEncoder(tol=0.01,
                                     variables=config.CATEGORICAL_VARS)),
     ('categorical_encoder',
      pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('scaler', StandardScaler()),
     ('Linear_model', LogisticRegression(C=0.0005, random_state=0))])
import preprocessors as pp
import utils as ut

config = ut.read_config_file('config.yaml')

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('categorical_imputer',
      pp.CategoricalImputer(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('missing_indicator',
      pp.MissingIndicator(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('numerical_imputer',
      pp.NumericalImputer(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('cabin_variable',
      pp.ExtractFirstLetter(
          variables=config[2]['Feature_Groups'].get('categorical_vars')[1])),
     ('rare_label_encoder',
      pp.RareLabelCategoricalEncoder(
          tol=0.05,
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('categorical_encoder',
      pp.CategoricalEncoder(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('scaler', StandardScaler()),
     ('linear_model', LogisticRegression(C=0.0005, random_state=0))])
Exemple #5
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('missing_indicator',
      pp.MissingIndicator(variables=config.NUMERICAL_VARS)),
     ('categorical_imputer',
      pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
     ('numerical_imputer',
      pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
     ('first_word_extractor', pp.ExtractFirstLetter(variables=config.CABIN)),
     ('frequent_label_encoder',
      pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('categorical_encoder',
      pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('scaler', StandardScaler()),
     ('logistic_model', LogisticRegression(C=0.0005, random_state=0))])
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('missing_indicator',
      pp.MissingIndicator(variables=config.NUMERICAL_VARS)),
     ('numerical_imputer',
      pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
     ('extract_first_letter', pp.ExtractFirstLetter(variables=config.CABIN)),
     ('categorical_imputer',
      pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
     ('rare_label_categorical',
      pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('categorical_encoder',
      pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
     ('scaler', StandardScaler()),
     ('logistic_regression', LogisticRegression(C=0.0005, random_state=0))])
Exemple #7
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline([
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    ('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)),
    ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)),
    ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)),
    ('cabin_extractor', pp.ExtractFirstLetter(config.CABIN)),
    ('rare_labels',
     pp.RareLabelCategoricalEncoder(tol=0.05,
                                    variables=config.CATEGORICAL_VARS)),
    ('categorical_encoder',
     pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(C=0.0005, random_state=0))
])
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    [('categorical_imputer', pp.CategoricalImputer(config.CATEGORICAL_VARS)),
     ('missing_indicator', pp.MissingIndicator(config.NUMERICAL_VARS)),
     ('numerical_imputer', pp.NumericalImputer(config.NUMERICAL_VARS)),
     ('extract_first_letter', pp.ExtractFirstLetter(config.CABIN)),
     ('rare_label_categorical_encoder',
      pp.RareLabelCategoricalEncoder(0.05, config.CATEGORICAL_VARS)),
     ('categorical_encoder', pp.CategoricalEncoder(config.CATEGORICAL_VARS)),
     ('scaler', StandardScaler()),
     ('linear_model', LogisticRegression(C=0.0005, random_state=0))]
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
)
import preprocessors as pp
import config


titanic_pipe = Pipeline(
    [
    	('missing_indicator',
    		pp.MissingIndicator(variables = config.NUMERICAL_VARS_WITH_NA)),

    	('categorical_imputer',
    		pp.CategoricalImputer(variables = config.CATEGORICAL_VARS)),

    	('numerical_imputer',
    		pp.NumericalImputer(variables = config.NUMERICAL_VARS_WITH_NA)),

    	('extract_first_letter',
    		pp.ExtractFirstLetter(variables = config.CABIN)),

    	('rare_label_encoding',
    		pp.ExtractFirstLetter(variables = config.CATEGORICAL_VARS)),

    	('categorical_encoding',
    		pp.CategoricalEncoder(variables = config.CATEGORICAL_VARS)),

    	('scaler',
    		StandardScaler()),
    	('model',
    		LogisticRegression(C=0.0005, random_state=0))
    ]
    )
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
from config import *


titanic_pipe = Pipeline(

   	[
         ('categorical_imputer', pp.CategoricalImputer(variables=CATEGORICAL_VARS)),
   		('missing_indicator', pp.MissingIndicator(variables=NUMERICAL_VARS)),
   		('numerical_imputer', pp.NumericalImputer(variables=NUMERICAL_VARS)),
   		('extract_first_letter', pp.ExtractFirstLetter(variables=CABIN_VAR)),
   		('rare_label_encoding', pp.RareLabelCategoricalEncoder(tol=0.05, variables=CATEGORICAL_VARS)),
   		('categorical_encoding', pp.CategoricalEncoder(variables=CATEGORICAL_VARS)),
   		('scaler', StandardScaler()),
        ('model', LogisticRegression(C=0.0005, random_state=0))
   	]
  
   )
Exemple #11
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('missing_indicator',
      pp.MissingIndicator(variables=config.CATEGORICAL_VARS +
                          config.NUMERICAL_VARS)),
     ('categorical_imputer',
      pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
     ('numerical_imputer',
      pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
     ('extract_first_letter', pp.ExtractFirstLetter(variables=[
         'cabin',
     ])),
     ('rare_label_categorical_encoder',
      pp.RareLabelCategoricalEncoder(tol=0.05,
                                     variables=config.CATEGORICAL_VARS)),
     ('categorical_encoder',
      pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS))])
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import config


titanic_pipe = Pipeline(
    [
        ('Extract_First_Letter', pp.ExtractFirstLetter(variables=['cabin'])),
        ('Numerical_Imputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS)),
        ('Categorical_Imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS)),
        ('Rare_Label_Categorical_Encoder', pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS)),
        ('Categorical_Encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)),
        ('scaler', StandardScaler()),
        ('Linear_model', LogisticRegression(C=0.005, random_state=0))
    ],
    verbose=True
)