from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from regression_model.processing import preprocessors as pp from regression_model.processing import features from regression_model.config import config import logging _logger = logging.getLogger(__name__) price_pipe = Pipeline([ ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS_WITH_NA)), ('numerical_inputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS_WITH_NA)), ('temporal_variable', pp.TemporalVariableEstimator(variables=config.TEMPORAL_VARS, reference_variable=config.DROP_FEATURES)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.01, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('log_transformer', features.LogTransformer(variables=config.NUMERICALS_LOG_VARS)), ('drop_features', pp.DropUnecessaryFeatures(variables_to_drop=config.DROP_FEATURES)), ('scaler', MinMaxScaler()), ('forest', Lasso(random_state=0)) ])
# -*- coding: utf-8 -*- """ Created on Wed Aug 12 13:02:04 2020 @author: rkbra """ from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from sklearn.ensemble import RandomForestRegressor from regression_model.processing import preprocessors as pp from regression_model.processing import features from regression_model.config import config pipeline = Pipeline( [ ('duration_transformer', features.DurationTransformer()), ('journey_data_transformer', features.JourneyDateTransformer()), ('departure_time_transformer', features.DepartureTimeTransformer()), ('arival_time_transformer', features.ArrivalTimeTransformer()), ('total_stop_transformer', pp.TotalStopTransformer()), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.0015, variables=config.FEATURE_WITH_RARE_LABELS)), ('encoder', pp.Encoder(variables=config.CATEGORICAL_FEATURES)), ('scaler', MinMaxScaler()), ('model', RandomForestRegressor(n_estimators=100)) ] )
from sklearn.ensemble import RandomForestRegressor from sklearn.pipeline import Pipeline from regression_model.processing import preprocessors as pp from regression_model.config import config marathon_pipeline = Pipeline([ ( "categorical_imputer", pp.CategoricalImputer(variables=config.CATEGORICAL_VARS_WITH_NA), ), ( "rare_label_encoder", pp.RareLabelCategoricalEncoder(variables=config.CATEGORICAL_VARS), ), ("ordinal_encoder", pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ("regressor", RandomForestRegressor(max_depth=4, n_estimators=100, random_state=0)) ])
from regression_model.processing import features from regression_model.processing import preprocessors as pp from regression_model.config import config import logging _logger = logging.getLogger(__name__) price_pipe = Pipeline([ ( "categorical_imputer", pp.CategoricalImputer(variables_path=config.CATEGORICAL_VARS_FILE), ), ( "numerical_inputer", pp.NumericalImputer(variables_path=config.NUMERICAL_VARS_FILE), ), ( "rare_label_encoder", pp.RareLabelCategoricalEncoder( tol=0.01, variables_path=config.CATEGORICAL_VARS_FILE), ), ("log_transformer", features.LogTransformer(variables_path=config.NUMERICAL_LOG_VARS_FILE)), ( "scaler", MinMaxScaler(), ), ("Linear_model", Lasso(alpha=0.005, random_state=0)), ])