def train_model(self, ensembler=False): params = {} if ensembler: params = get_model_params(ensembler, 'catboost') else: params = get_model_params() self.model = CatBoostClassifier(iterations=params['iterations'], learning_rate=params['learning_rate'], depth=params['depth'], l2_leaf_reg=params['l2_leaf_reg'], od_type="Iter", verbose=0) if not (ensembler): self.model.fit(self.X, self.y) return self.model
def train_model(self, ensembler): params = {} if ensembler: params = get_model_params(ensembler, 'rfa') else: params = get_model_params() self.model = RandomForestClassifier( n_estimators=params['n_estimators'], criterion=params['criterion'], max_features=params['max_features'], max_depth=params['max_depth'], n_jobs=-1, random_state=42, class_weight=params['class_weight']) self.model.fit(self.X, self.y) return self.model
def start_preprocessing(self): print('\nCleaning up columns...') model_params = get_model_params() self.remake_date_cols() self.remake_states_lga() self.find_missing_val_replacements() self.clean_gender_col(self.preproc_args['missing']['gender']) self.clean_age_col(int(self.preproc_args['missing']['age']), self.preproc_args['missing']['age_map']) self.clean_color_col(self.preproc_args['missing']['color']) self.clean_carmake_col(self.preproc_args['missing']['make']) self.clean_carcat_col(self.preproc_args['missing']['category']) #self.clean_car_product(self.preproc_args['missing']['product']) #self.clean_state_col(self.preproc_args['missing']['state']) #self.clean_LGA_col(self.preproc_args['missing']['lga']) self.clean_date_col() #self.remake_nopol_col() print('\nPlotting distribution...') self.plot_graph() print('\nApplying label encoder...') #self.find_feature_correlation(self.preproc_args['correlation_LB'],self.preproc_args['correlation_UB']) self.select_best_features(model_params['feature_k']) self.drop_skip_columns() self.encode_labels() return self.apply_oversampling()
def train_model(self, ensembler = False): params = {} if ensembler: params = get_model_params(ensembler, 'lgbm') else: params = get_model_params() #self.X = self.X.rename(columns = lambda x: re.sub('[^A-Za-z0-9_]+','',x)) self.model = LGBMClassifier(random_state = 42, class_weight=params['class_weight'], n_estimators = params['n_estimators'], learning_rate = params['learning_rate'], min_split_gain = params['min_split_loss'], num_leaves = params['num_leaves'], min_child_samples = params['min_child_samples'], min_child_weight = params['min_child_weight']) if not(ensembler): self.model.fit(self.X, self.y) return self.model
def train_model(self): params = get_model_params() if params['kernel'] != 'linear': self.model = SVC(C=params['C'], kernel=params['kernel'], degree=params['degree'], gamma=params['gamma'], tol=params['tol'], class_weight=params['class_weight'], max_iter=-1, random_state = 42) else: self.model = LinearSVC(C=params['C'], loss=params['loss'], penalty=params['penalty'], class_weight=params['class_weight'], random_state = 42, max_iter = params['max_iter'], dual = params['dual'] ) self.model = self.model.fit(self.X,self.y) return self.model
def __init__(self, X, y): self.X = X self.y = y self.model_params = get_model_params()
def read_args(): args = get_model_params() val_args = get_validation_params() main(args, val_args)
def setup_voting_classifier(self, models): params = get_model_params() model = VotingClassifier(estimators=models, voting=params['voting_type']) model.fit(self.X, self.y) return model