def test_params() -> None: params = {"x": 1} trial = _create_trial( value=0.2, params=params, distributions={"x": FloatDistribution(0, 10)}, ) assert trial.suggest_uniform("x", 0, 10) == 1 assert trial.params == params params = {"x": 2} trial.params = params assert trial.suggest_uniform("x", 0, 10) == 2 assert trial.params == params
def f(trial: optuna.trial.Trial) -> float: x = trial.suggest_uniform("x", -10, 10) y = trial.suggest_loguniform("y", 10, 20) z = trial.suggest_categorical("z", (10.0, 20.5, 30.0)) assert isinstance(z, float) return x**2 + y**2 + z
def f(trial): # type: (optuna.trial.Trial) -> float x = trial.suggest_uniform('x', -10, 10) y = trial.suggest_loguniform('y', 10, 20) z = trial.suggest_categorical('z', (10, 20.5, 30)) return x**2 + y**2 + z
def objective(trial: BaseTrial) -> float: a = trial.suggest_uniform("a", 0.0, 10.0) b = trial.suggest_loguniform("b", 0.1, 10.0) c = trial.suggest_discrete_uniform("c", 0.0, 10.0, 1.0) d = trial.suggest_int("d", 0, 10) e = trial.suggest_categorical("e", [0, 1, 2]) f = trial.suggest_int("f", 1, 10, log=True) assert isinstance(e, int) return a + b + c + d + e + f
def __call__(self, trial): # optional variables sampling ( to implement in the future) # if x_var_num_range is not None: # number_of_var = list(range(self.x_var_num_range[0], self.x_var_num_range[1]+1)) # x_var = np.random.choice(list(x_var.columns), number_of_var) # else: # x_var = x_train.columns if self.params is not None: params = exec(self.params) else: params = None if self.method == 'SVC': # support vectors machine SVM_params = { 'svc_c': trial.suggest_loguniform('svc_c', 1e-10, 1e10) } if params is not None: SVM_params.update(params) model = sklearn.svm.SVC(**SVM_params) elif self.method == 'RF': # random forest # RF_params = { 'n_estimators': int(trial.suggest_uniform('n_estimators', 20, 300)), 'max_depth': int(trial.suggest_uniform('max_depth', 2, 50)), 'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']), 'bootstrap': trial.suggest_categorical('bootstrap', [True, False]), 'class_weight': trial.suggest_categorical( 'class_weight', ['balanced', 'balanced_subsample', None]) } if params is not None: RF_params.update(params) model = RandomForestClassifier(**RF_params, n_jobs=15) elif self.method == 'AB': # ada boost AB_params = { 'n_estimators': int(trial.suggest_uniform('n_estimators', 20, 300)), 'learning_rate': trial.suggest_uniform('learning_rate', 0.05, 0.3), 'algorithm': trial.suggest_categorical('algorithm', ['SAMME.R', 'SAMME']) } if params is not None: AB_params.update(params) model = AdaBoostClassifier(**AB_params) elif self.method == 'GB': # gradient boost GB_params = { 'n_estimators': int(trial.suggest_loguniform('n_estimators', 20, 300)), 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5), 'max_depth': int(trial.suggest_loguniform('max_depth', 2, 50)), 'loss': trial.suggest_categorical('loss', ['deviance', 'exponential']) } if params is not None: GB_params.update(params) model = GradientBoostingClassifier(**GB_params) elif self.method == 'NB': # naive bayes NB_params = { 'var_smoothing': trial.suggest_loguniform('var_smoothing', 1e-10, 1e-05) } if params is not None: NB_params.update(params) model = skl.naive_bayes.GaussianNB(**NB_params) elif self.method == 'KNN': # k-nearest neighbours KNN_params = { 'n_neighbors': int(trial.suggest_loguniform('n_neighbors', 3, 7)) } if params is not None: KNN_params.update(params) model = skl.neighbors.KNeighborsClassifier(**KNN_params) elif self.method == 'LR': # logistic retression LR_params = { 'penalty': trial.suggest_categorical('penalty', ['l1', 'l2']), 'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]) } if params is not None: LR_params.update(params) model = skl.linear_model.LogisticRegression(**LR_params) elif self.method == 'LGBM': # logistic retression LGBM_params = { 'penalty': trial.suggest_categorical('penalty', ['l1', 'l2']), 'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]) } if params is not None: LGBM_params.update(params) model = LGBMClassifier(**LGBM_params) elif self.method == 'XGB': # XGBoost # XGB_params = { 'n_estimators': int(trial.suggest_loguniform('n_estimators', 5, 300)), 'booster': trial.suggest_categorical('booster', ['dart', 'gbtree']), 'eta': trial.suggest_loguniform('eta', 0.01, 0.5), 'max_depth': int(trial.suggest_loguniform('max_depth', 3, 30)) # ,'reg_lambda': trial.suggest_loguniform( 'reg_lambda', 0, 1) # ,'reg_alpha' : trial.suggest_loguniform( 'reg_alpha', 0, 1) } if params is not None: XGB_params.update(params) model = XGBClassifier(**XGB_params, nthread=15) elif self.method == 'CAT': # catboost # CAT_params = { 'iterations': int(trial.suggest_loguniform('iterations', 20, 300)), 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5), 'depth': int(trial.suggest_loguniform('depth', 2, 16)) } if params is not None: CAT_params.update(params) model = CatBoostClassifier(**CAT_params, early_stopping_rounds=75, logging_level='Silent') # calibration (optional) if self.calibration_method is not None: model = CalibratedClassifierCV(model, cv=3, method=self.calibration_method) # model fit, self.y_train) # y labels y_labels = list(self.y_train.drop_duplicates()) y_labels.sort() self.y_labels = y_labels # model prediction and classification prob_train = pd.DataFrame(model.predict_proba(self.x_train), columns=[str(x) for x in model.classes_]) prob_train = prob_train.reset_index(drop=True) prob_test = pd.DataFrame(model.predict_proba(self.x_test), columns=[str(x) for x in model.classes_]) prob_test = prob_test.reset_index(drop=True) if self.priori is not None: for p in range(prob_test.shape[1]): var = prob_test.columns[p] priori_p = self.priori[var] prob_test.iloc[:, p] = prob_test.iloc[:, p] * priori_p prob_train.iloc[:, p] = prob_train.iloc[:, p] * priori_p prob_train_sum = prob_train.apply(lambda x: 1 / sum(x), axis=1) prob_test_sum = prob_test.apply(lambda x: 1 / sum(x), axis=1) for p in self.y_labels: prob_train.loc[:, p] = prob_train.loc[:, p] * prob_train_sum prob_test.loc[:, p] = prob_test.loc[:, p] * prob_test_sum if prob_test.shape[1] == 2 and self.threshold is not None: var_1 = list(self.threshold.keys())[0] var_2 = list(self.threshold.keys())[1] threshold_var_1 = list(self.threshold.values())[0] threshold_var_2 = list(self.threshold.values())[1] classification_train = pd.DataFrame([ var_1 if x >= threshold_var_1 else var_2 for x in prob_train[str(var_1)] ]) classification_test = pd.DataFrame([ var_1 if x >= threshold_var_1 else var_2 for x in prob_test[str(var_1)] ]) else: classification_train = pd.DataFrame(prob_train.idxmax( axis=1)) # zapis jako DataFrame by dalej zrobić 'concat' classification_test = pd.DataFrame(prob_test.idxmax( axis=1)) # zapis jako DataFrame by dalej zrobić 'concat' # selection and determining score function to optimize balanced_accuracy = skm.balanced_accuracy_score( y_true=self.y_test, y_pred=classification_test) accuracy = skm.accuracy_score(y_true=self.y_test, y_pred=classification_test) # ta funkcja nie jest napisane pod katem modeli multilabel aps = average_precision_score(y_true=self.y_test, y_score=prob_test[str(self.pos_label)], pos_label=str(self.pos_label)) skm.recall_score(y_true=self.y_test.astype(int), y_pred=classification_test.astype(int), average='binary', pos_label=self.pos_label) # recall try: if len(self.y_labels) < 3: recall_train = skm.recall_score( y_true=self.y_train.astype(int), y_pred=classification_train.astype(int), average='binary', pos_label=self.pos_label) recall_test = skm.recall_score( y_true=self.y_test.astype(int), y_pred=classification_test.astype(int), average='binary', pos_label=self.pos_label) else: recall_train = skm.recall_score( y_true=self.y_train.astype(str), y_pred=classification_train.astype(str), average='weighted') recall_test = skm.recall_score( y_true=self.y_test.astype(str), y_pred=classification_test.astype(str), average='weighted') except: recall_train = np.nan recall_test = np.nan # precision try: if len(self.y_labels) < 3: precision_train = skm.precision_score( y_true=self.y_train.astype(int), y_pred=classification_train.astype(int), average='binary', pos_label=self.pos_label) precision_test = skm.precision_score( y_true=self.y_test.astype(int), y_pred=classification_test.astype(int), average='binary', pos_label=self.pos_label) else: precision_train = skm.precision_score( y_true=self.y_train.astype(str), y_pred=classification_train.astype(str), average='weighted') precision_test = skm.precision_score( y_true=self.y_test.astype(str), y_pred=classification_test.astype(str), average='weighted') except: precision_train = np.nan precision_test = np.nan # f1 try: if len(self.y_labels) < 3: f1_train = skm.f1_score(y_true=self.y_train, y_pred=classification_train, average='binary', pos_label=str(self.pos_label)) f1_test = skm.f1_score(y_true=self.y_test, y_pred=classification_test, average='binary', pos_label=str(self.pos_label)) else: f1_train = skm.f1_score(y_true=self.y_train, y_pred=classification_train, average='weighted') f1_test = skm.f1_score(y_true=self.y_test, y_pred=classification_test, average='weighted') except: f1_train = np.nan f1_test = np.nan scores = pd.DataFrame([[ balanced_accuracy, accuracy, recall_train, recall_test, precision_train, precision_test, f1_train, f1_test, aps ]], columns=[ 'balanced_accuracy', 'accuracy', 'recall_train', 'recall_test', 'precision_train', 'precision_test', 'f1_train', 'f1_test', 'aps' ]) self.scores = pd.concat([self.scores, scores]) # zeracanie scoru którego 'optuna' używa do optymalizacji grida (możemy chyba tylko jedną wartość zwrócić. if self.opt_function == 'balanced_accuracy': return (balanced_accuracy) elif self.opt_function == 'aps': return (aps) elif self.opt_function == 'accuracy': return (accuracy) elif self.opt_function == 'recall': return (recall_test) elif self.opt_function == 'precision': return (precision_test)