def getModel(self, _params): return KNeighborsClassifier( n_neighbors=_params['n_neighbors'], weights=_params['weights'], algorithm=_params['algorithm'], leaf_size=_params['leaf_size'], p=_params['p'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return Perceptron( penalty=_params['penalty'], alpha=_params['alpha'], fit_intercept=_params['fit_intercept'], max_iter=int(_params['max_iter']), shuffle=_params['shuffle'], early_stopping=_params['early_stopping'], validation_fraction=_params['validation_fraction'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): estimator_list = [] for idx, job in enumerate(self.cantidate_job_list): if idx == _params['max_estimator']: break estimator_list.append((job.getJobName(), job.trained_model)) return Voting( estimators=estimator_list, # verbosity = 0, n_jobs= definitions.getNumberOfCore(), )
def getModel(self, _params): return LogisticRegression( penalty=_params['penalty'], dual=_params['dual'], C=_params['C'], fit_intercept=_params['fit_intercept'], intercept_scaling=_params['intercept_scaling'], solver=_params['solver'], max_iter=int(_params['max_iter']), l1_ratio=_params['l1_ratio'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return XGBRegressor( max_depth=int(_params['max_depth']), booster=_params['booster'], gamma=_params['gamma'], min_child_weight=int(_params['min_child_weight']), subsample=_params['subsample'], colsample_bytree=_params['colsample_bytree'], eta=_params['eta'], verbosity=0, n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return LGBMRegressor( max_depth= int(_params['max_depth']), num_leaves= int((2 ** np.maximum(np.minimum(_params['max_depth'], 12), 2)) * 0.6), boosting_type= _params['boosting_type'], learning_rate= _params['learning_rate'], n_estimators= int(_params['n_estimators']), bagging_fraction= _params['bagging_fraction'], bagging_freq= _params['bagging_freq'], feature_fraction= _params['feature_fraction'], max_bin = int(_params['max_bin']), min_data_in_leaf = int(_params['min_data_in_leaf']), n_jobs= definitions.getNumberOfCore(), )
def getModel(self, _params): return SGDClassifier( penalty=_params['penalty'], alpha=_params['alpha'], l1_ratio=_params['l1_ratio'], fit_intercept=bool(_params['fit_intercept']), max_iter=int(_params['max_iter']), shuffle=bool(_params['shuffle']), learning_rate=_params['learning_rate'], eta0=_params['eta0'], early_stopping=bool(_params['early_stopping']), class_weight=_params['class_weight'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return MLPRegressor( hidden_layer_sizes=(int(_params['layer_1']), int(_params['layer_2']), int(_params['layer_3'])), activation=_params['activation'], solver=_params['solver'], alpha=_params['alpha'], learning_rate=_params['learning_rate'], max_iter=int(_params['max_iter']), validation_fraction=_params['validation_fraction'], beta_1=_params['beta_1'], beta_2=_params['beta_2'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return RandomForest( n_estimators= int(_params['n_estimators']), # criterion= _params['criterion'], max_depth= _params['max_depth'], # min_samples_split= _params['min_samples_split'], # min_samples_leaf= _params['min_samples_leaf'], min_weight_fraction_leaf= _params['min_weight_fraction_leaf'], max_features= _params['max_features'], # max_leaf_nodes= int(_params['max_leaf_nodes']), # min_impurity_decrease= _params['min_impurity_decrease'], # bootstrap= _params['bootstrap'], oob_score= _params['oob_score'], # ccp_alpha= _params['ccp_alpha'], n_jobs= definitions.getNumberOfCore(), )
def getModel(self, _params): return LGBMClassifier( boosting_type=_params['boosting_type'], num_leaves=int( (2**np.maximum(np.minimum(_params['max_depth'], 12), 2)) * 0.6), max_depth=int(_params['max_depth']), learning_rate=_params['learning_rate'], n_estimators=int(_params['n_estimators']), subsample_for_bin=int(_params['subsample_for_bin']), min_split_gain=_params['min_split_gain'], min_child_weight=_params['min_child_weight'], min_child_samples=int(_params['min_child_samples']), reg_alpha=_params['reg_alpha'], reg_lambda=_params['reg_lambda'], n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): return XGBClassifier( booster=_params['booster'], eta=_params['eta'], gamma=_params['gamma'], max_depth=int(_params['max_depth']), min_child_weight=int(_params['min_child_weight']), max_delta_step=int(_params['max_delta_step']), subsample=_params['subsample'], colsample_bytree=_params['colsample_bytree'], colsample_bylevel=_params['colsample_bylevel'], colsample_bynode=_params['colsample_bynode'], reg_lambda=_params['reg_lambda'], alpha=_params['alpha'], verbosity=0, n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params, _x, _y, _x_eval): estimator_list = [] for idx, job in enumerate(self.cantidate_job_list): if idx == _params['max_estimator']: break estimator_list.append(job.model.getModel(job.best_params)) s_train, s_test = stacking( estimator_list, _x, _y, _x_eval, regression=False, metric=accuracy_score, stratified=_params['stratified'], shuffle=_params['shuffle'], random_state=0, n_jobs=definitions.getNumberOfCore(), )
def getModel(self, _params): if _params['max_features'] == definitions.JSON_NONE: _params['max_features'] = None if _params['class_weight'] == definitions.JSON_NONE: _params['class_weight'] = None return ExtraTrees( n_estimators= int(_params['n_estimators']), criterion= _params['criterion'], # max_depth= int(_params['max_depth']), # min_samples_split= _params['min_samples_split'], # min_samples_leaf= _params['min_samples_leaf'], min_weight_fraction_leaf= _params['min_weight_fraction_leaf'], max_features= _params['max_features'], # max_leaf_nodes= int(_params['max_leaf_nodes']), # min_impurity_decrease= _params['min_impurity_decrease'], # bootstrap= _params['bootstrap'], # bootstrap=True, # oob_score= bool(_params['oob_score']), class_weight= _params['class_weight'], # ccp_alpha= _params['ccp_alpha'] n_jobs= definitions.getNumberOfCore(), )
def __getRemovedOutlierIsolationForestDf(self, _df, _column_drop=True): iForest = IsolationForest(contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore()) return self.__getRemoveOutlierDf(_df, iForest, self.column_name_iforest, _column_drop)
def getIsolationForest(_df): clf = IsolationForest( contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore() ) return clf.fit_predict(_df)
def getLocalFactor(_df): clf = LocalOutlierFactor( contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore() ) return clf.fit_predict(_df)
def __getRemovedOutlierLocalFactorDf(self, _df): localFactor = LocalOutlierFactor(contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore()) return self.__getRemoveOutlierDf(_df, localFactor)
def __getRemovedOutlierIsolationForestDf(self, _df): iForest = IsolationForest(contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore()) return self.__getRemoveOutlierDf(_df, iForest)
def __getAddDbscanDf(self, _df): df = self.__getDroppedColumnDf(_df.copy()) dbscan = DBSCAN(n_jobs=definitions.getNumberOfCore()) pred = dbscan.fit_predict(df) df['added_cluster'] = pred return df
def __getRemovedOutlierLocalFactorDf(self, _df, _column_drop=True): localFactor = LocalOutlierFactor(contamination=OUTLIER_FRACTION, n_jobs=definitions.getNumberOfCore()) return self.__getRemoveOutlierDf(_df, localFactor, self.column_name_localfactor, _column_drop)