예제 #1
0
def _trial_to_params(trial: Trial):
    params = {**DEFAULT_PARAMS,
              # 'gblinear' and 'dart' boosters are too slow
              "booster": trial.suggest_categorical("booster", ['gbtree']),
              "seed": trial.suggest_int('seed', 0, 999999),
              "learning_rate": trial.suggest_loguniform(
                  'learning_rate', 0.005, 0.5),
              "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0),
              "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0)}

    if params['booster'] == 'gbtree' or params['booster'] == 'dart':
        sampling_method = trial.suggest_categorical(
            "sampling_method", ["uniform", "gradient_based"])
        if sampling_method == 'uniform':
            subsample = trial.suggest_discrete_uniform('subsample',
                                                       .5, 1, .05)
        else:
            subsample = trial.suggest_discrete_uniform('subsample',
                                                       .1, 1, .05)
        params.update({
            "max_depth": trial.suggest_int('max_depth', 2, 25),
            "sampling_method": sampling_method,
            "subsample": subsample,
            "colsample_bytree": trial.suggest_discrete_uniform(
                'colsample_bytree', .20, 1., .01),
            "colsample_bylevel": trial.suggest_discrete_uniform(
                'colsample_bylevel', .20, 1., .01),
            "colsample_bynode": trial.suggest_discrete_uniform(
                'colsample_bynode', .20, 1., .01),
            "gamma": trial.suggest_categorical("gamma", [0, 0, 0, 0, 0, 0.01,
                                                         0.1, 0.2, 0.3, 0.5,
                                                         1., 10., 100.]),
            "min_child_weight": trial.suggest_categorical('min_child_weight',
                                                          [1, 1, 1, 1, 2, 3,
                                                           4, 5, 1, 6, 7, 8, 9,
                                                           10, 11, 15, 30, 60,
                                                           100, 1, 1, 1]),
            "max_delta_step": trial.suggest_categorical("max_delta_step",
                                                        [0, 0, 0, 0, 0,
                                                         1, 2, 5, 8]),
            "grow_policy": trial.suggest_categorical(
                "grow_policy", ["depthwise", "lossguide"]),
            "tree_method": "gpu_hist",
            "gpu_id": 0})
    if params["booster"] == "dart":
        params.update({
            "sample_type": trial.suggest_categorical(
                "sample_type", ["uniform", "weighted"]),
            "normalize_type": trial.suggest_categorical(
                "normalize_type", ["tree", "forest"]),
            "rate_drop": trial.suggest_loguniform("rate_drop", 1e-8, 1.0),
            "skip_drop": trial.suggest_loguniform("skip_drop", 1e-8, 1.0)})
    return params
예제 #2
0
def objective(trial: Trial, train_X, train_y, test_X, test_y) -> float:
    params = {
        "n_estimators":
        trial.suggest_int('n_estimators', 0, 1000),
        'max_depth':
        trial.suggest_int('max_depth', 2, 25),
        'reg_alpha':
        trial.suggest_int('reg_alpha', 0, 10),
        'reg_lambda':
        trial.suggest_int('reg_lambda', 0, 10),
        'min_child_weight':
        trial.suggest_int('min_child_weight', 0, 20),
        'gamma':
        trial.suggest_int('gamma', 0, 5),
        'learning_rate':
        trial.suggest_loguniform('learning_rate', 0.0001, 0.5),
        'colsample_bytree':
        trial.suggest_discrete_uniform('colsample_bytree', 0.1, 1, 0.01),
        'nthread':
        -1,
        'scale_pos_weight':
        trial.suggest_int('scale_pos_weight', 1, 10),
        'random_state':
        trial.suggest_int('random_state', 1, 30),
        'subsample':
        trial.suggest_float('subsample', 0.5, 0.9)
    }
    model = XGBClassifier(**params)

    model.fit(train_X, train_y)

    return cross_val_score(model, test_X, test_y).mean()
def objective(trial: Trial, X_train, X_test, y_train, y_test) -> float:
    params = {
        "booster": "gbtree",
        "n_estimators": trial.suggest_int("n_estimators", 0, 1000),
        "max_depth": trial.suggest_int("max_depth", 2, 10),
        "reg_alpha": trial.suggest_int("reg_alpha", 0, 5),
        "reg_lambda": trial.suggest_int("reg_lambda", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 0, 5),
        "gamma": trial.suggest_int("gamma", 0, 5),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.5),
        "colsample_bytree": trial.suggest_discrete_uniform(
            "colsample_bytree", 0.1, 1, 0.01
        ),
        "nthread": -1,
        "use_label_encoder": False,
        "eval_metric": "logloss",
    }

    model = MultiOutputRegressor(XGBRegressor(**params))
    model.fit(X_train, y_train)

    n_scores = cross_val_score(
        model, X_test, y_test, scoring="neg_mean_absolute_error", cv=3, n_jobs=-1
    )

    return np.mean(abs(n_scores))
예제 #4
0
def objective(trial: Trial, data) -> float:
    params = {
        "booster":
        "gbtree",
        #"tree_method": "gpu_hist",
        "n_estimators":
        trial.suggest_int("n_estimators", 0, 1000),
        "max_depth":
        trial.suggest_int("max_depth", 2, 10),
        "reg_alpha":
        trial.suggest_int("reg_alpha", 0, 5),
        "reg_lambda":
        trial.suggest_int("reg_lambda", 0, 5),
        "min_child_weight":
        trial.suggest_int("min_child_weight", 0, 5),
        "gamma":
        trial.suggest_int("gamma", 0, 5),
        "learning_rate":
        trial.suggest_loguniform("learning_rate", 0.005, 0.5),
        "colsample_bytree":
        trial.suggest_discrete_uniform("colsample_bytree", 0.1, 1, 0.01),
        "nthread":
        -1,
        "use_label_encoder":
        False,
        "eval_metric":
        "logloss"
    }

    mae, y, yhat = walk_forward_validation(params, data, 20)

    return mae
예제 #5
0
    def _suggest(self, trial: optuna.Trial, v: problem.Var) -> float:
        if v.name in trial.params:
            if isinstance(trial.params[v.name], str):
                assert isinstance(v.range, problem.CategoricalRange)
                return v.range.choices.index(trial.params[v.name])
            else:
                return trial.params[v.name]

        if isinstance(v.range, problem.ContinuousRange):
            if v.distribution == problem.Distribution.UNIFORM:
                return trial.suggest_uniform(v.name, v.range.low, v.range.high)
            elif v.distribution == problem.Distribution.LOG_UNIFORM:
                return trial.suggest_loguniform(v.name, v.range.low,
                                                v.range.high)
        elif isinstance(v.range, problem.DiscreteRange):
            if self._use_discrete_uniform:
                return trial.suggest_discrete_uniform(v.name,
                                                      v.range.low,
                                                      v.range.high - 1,
                                                      q=1)
            elif v.distribution == problem.Distribution.LOG_UNIFORM:
                return trial.suggest_int(v.name,
                                         v.range.low,
                                         v.range.high - 1,
                                         log=True)
            else:
                return trial.suggest_int(v.name, v.range.low, v.range.high - 1)
        elif isinstance(v.range, problem.CategoricalRange):
            category = trial.suggest_categorical(v.name, v.range.choices)
            return v.range.choices.index(category)

        raise ValueError("Unsupported parameter: {}".format(v))
예제 #6
0
def cnn_pipeline_factory(report_dir: Path, trial: Trial) -> ArmorDigitPipeline:
    return ArmorDigitKerasPipeline.from_custom_cnn(
        input_size=32,
        conv_blocks=((32, 32), (64, 64)),
        logs_dir=str(report_dir),
        dropout=trial.suggest_uniform("dropout", 0, 0.99),
        lr=trial.suggest_loguniform("lr", 1e-5, 1e-1),
        dense_size=2**round(
            trial.suggest_discrete_uniform("dense_size_log2", 3, 10, 1)),
    )
예제 #7
0
def trial_to_params(trial: optuna.Trial):
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2', 'elasticnet'])
    learning_rate = trial.suggest_categorical(
        'learning_rate', ['constant', 'optimal', 'invscaling'])

    params = {**DEFAULT_PARAMS,
              'loss': trial.suggest_categorical(
                  'loss', ['squared_loss', 'huber', 'epsilon_insensitive',
                           'squared_epsilon_insensitive']),
              'penalty': penalty,
              'alpha': trial.suggest_loguniform('alpha', 1e-7, 1.),
              'random_state': trial.suggest_int('random_state', 0, 999999),
              'learning_rate': learning_rate}

    if penalty == 'elasticnet':
        params['l1_ratio'] = trial.suggest_discrete_uniform(
            'l1_ratio', .01, .99, .01)
    if learning_rate in ['constant', 'invscaling']:
        params['eta0'] = trial.suggest_loguniform('eta0', 1e-7, 1e-1)
    if learning_rate == 'invscaling':
        params['power_t'] = trial.suggest_discrete_uniform(
            'power_t', 0.1, 0.5, 0.001)
    return params
def objective(trial: Trial) -> float:
    params = {
        "changepoint_range":
        trial.suggest_discrete_uniform("changepoint_range", 0.8, 0.95, 0.001),
        "n_changepoints":
        trial.suggest_int("n_changepoints", 20, 35),
        "changepoint_prior_scale":
        trial.suggest_discrete_uniform("changepoint_prior_scale", 0.001, 0.5,
                                       0.001),
        "seasonality_prior_scale":
        trial.suggest_discrete_uniform("seasonality_prior_scale", 1, 25, 0.5),
        "growth":
        "logistic",
        "seasonality_mode":
        "additive",
        "yearly_seasonality":
        False,
        "weekly_seasonality":
        True,
        "daily_seasonality":
        True,
    }
    # fit_model
    m = Prophet(**params)
    train["cap"] = cap
    train["floor"] = floor
    m.fit(train)
    future = m.make_future_dataframe(periods=163, freq="H")

    future["cap"] = cap
    future["floor"] = floor

    forecast = m.predict(future)
    valid_forecast = forecast.tail(163)
    val_rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False)

    return val_rmse
예제 #9
0
    def _construct_trial_grid(trial: optuna.Trial, param_space: Dict):
        param_grid = {}
        for name, params in param_space.items():
            param_type = params[0]
            if param_type == "categorical":
                choices = params[1]
                param_grid[name] = trial.suggest_categorical(name, choices)
            elif param_type == "discrete_uniform":
                low, high, q = params[1], params[2], params[3]
                param_grid[name] = trial.suggest_discrete_uniform(
                    name, low, high, q)
            elif param_type == "loguniform":
                low, high = params[1], params[2]
                param_grid[name] = trial.suggest_loguniform(name, low, high)
            elif param_type == "uniform":
                low, high = params[1], params[2]
                param_grid[name] = trial.suggest_uniform(name, low, high)
            elif param_type == "float":
                low, high = params[1], params[2]
                step, log = None, False
                if len(params) > 3:
                    step = params[3]
                if len(params) > 4:
                    log = params[4]
                param_grid[name] = trial.suggest_float(name,
                                                       low,
                                                       high,
                                                       step=step,
                                                       log=log)
            elif param_type == "int":
                low, high = params[1], params[2]
                step, log = 1, False
                if len(params) > 3:
                    step = params[3]
                if len(params) > 4:
                    log = params[4]
                param_grid[name] = trial.suggest_int(name,
                                                     low,
                                                     high,
                                                     step=step,
                                                     log=log)
            else:
                raise ValueError(
                    f"Undefined sampling method given for trial object: {name}: {params}"
                )

        return param_grid
def objective(trial: Trial):
    # x = trial.suggest_uniform('x', -10, 10)
    # return (x - 2) ** 2
    clean_token_count_limit = int(
        trial.suggest_discrete_uniform('clean_token_count_limit', 20, 60000,
                                       1))
    trial.set_user_attr(
        'run',
        {
            'loss': 1.2,
            # -- store other results like this
            'os_uname': os.uname(),
            'clean_token_count_limit': clean_token_count_limit,
            'attachments': {
                'info': 'info',
                'output': 'output'
            }
        })
    if clean_token_count_limit < 10000:
        return None
    return float(clean_token_count_limit)**2
예제 #11
0
    def add_suggest(trial: optuna.Trial, user_attrs={}):
        """
        Add hyperparam ranges to an optuna trial and typical user attrs.
        
        Usage:
            trial = optuna.trial.FixedTrial(
                params={         
                    'hidden_size': 128,
                }
            )
            trial = add_suggest(trial)
            trainer = pl.Trainer()
            model = LSTM_PL(dict(**trial.params, **trial.user_attrs), dataset_train,
                            dataset_test, cache_base_path, norm)
            trainer.fit(model)
        """
        trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
        trial.suggest_uniform("attention_dropout", 0, 0.75)
        # we must have nhead<==hidden_size
        # so           nhead_power.max()<==hidden_size_power.min()
        trial.suggest_discrete_uniform("hidden_size_power", 4, 10, 1)
        trial.suggest_discrete_uniform("hidden_out_size_power", 4, 9, 1)
        trial.suggest_discrete_uniform("nhead_power", 1, 4, 1)
        trial.suggest_int("nlayers", 1, 12)
        trial.suggest_categorical("use_lstm", [False, True])
        trial.suggest_categorical("agg", ['last', 'max', 'mean', 'all'])   

        user_attrs_default = {
            "batch_size": 16,
            "grad_clip": 40,
            "max_nb_epochs": 200,
            "num_workers": 4,
            "num_extra_target": 24 * 4,
            "vis_i": "670",
            "num_context": 24 * 4,
            "input_size": 18,
            "input_size_decoder": 17,
            "context_in_target": False,
            "output_size": 1,
            "patience": 3,
            'min_std': 0.005,
        }
        [trial.set_user_attr(k, v) for k, v in user_attrs_default.items()]
        [trial.set_user_attr(k, v) for k, v in user_attrs.items()]
        return trial
def objective(trial: Trial) -> float:
    params = {
        "epochs":
        trial.suggest_categorical("epochs", [50, 100, 200, 300, 400, 500]),
        "batch_size":
        64,
        "num_hidden_layers":
        trial.suggest_int("num_hidden_layers", 0, 5),
        "learning_rate":
        trial.suggest_float("learning_rate", 1e-3, 0.1),
        "changepoints_range":
        trial.suggest_discrete_uniform("changepoints_range", 0.8, 0.95, 0.001),
        "n_changepoints":
        trial.suggest_int("n_changepoints", 20, 35),
        "seasonality_mode":
        "additive",
        "yearly_seasonality":
        False,
        "weekly_seasonality":
        True,
        "daily_seasonality":
        True,
        "loss_func":
        "MSE",
    }
    # fit_model
    m = NeuralProphet(**params)
    m.fit(train, freq="1D")
    future = m.make_future_dataframe(train,
                                     periods=len(valid),
                                     n_historic_predictions=True)

    forecast = m.predict(future)
    valid_forecast = forecast[forecast.y.isna()]
    val_rmse = mean_squared_error(valid_forecast.yhat1, valid, squared=False)

    return val_rmse
예제 #13
0
def HDL_define_by_run(trial: Trial, df: pd.DataFrame, sub_HDL: dict, name):
    choices = list(sub_HDL.keys())
    choice = trial.suggest_categorical(name, choices)
    df_ = df.loc[df[name] == choice, :]
    df = df_
    if choice == "None":
        return df
    HP = sub_HDL[choice]
    for hp_name, hp_define in HP.items():
        _type = hp_define["_type"]
        _value = hp_define["_value"]
        com_hp_name = f"{name}.{choice}.{hp_name}"
        if _type in ("ordinal", "choice"):
            v = trial.suggest_categorical(com_hp_name, _value)
        elif _type in ("int_quniform", "quniform"):
            v = trial.suggest_discrete_uniform(com_hp_name, *_value)
        else:
            raise NotImplementedError
        if isinstance(v, float):
            df_ = df.loc[np.abs(df[com_hp_name] - v) < 1e-8, :]
        else:
            df_ = df.loc[df[com_hp_name] == v, :]
        df = df_
    return df
def objective(trial: Trial) -> float:
    params = {
        "changepoint_range": trial.suggest_discrete_uniform(
            "changepoint_range", 0.8, 0.95, 0.001
        ),
        "n_changepoints": trial.suggest_int("n_changepoints", 20, 35),
        "changepoint_prior_scale": trial.suggest_discrete_uniform(
            "changepoint_prior_scale", 0.001, 0.5, 0.001
        ),
        "seasonality_prior_scale": trial.suggest_discrete_uniform(
            "seasonality_prior_scale", 1, 25, 0.5
        ),
        "yearly_fourier": trial.suggest_int("yearly_fourier", 5, 15),
        "monthly_fourier": trial.suggest_int("monthly_fourier", 3, 12),
        "weekly_fourier": trial.suggest_int("weekly_fourier", 3, 7),
        "quaterly_fourier": trial.suggest_int("quaterly_fourier", 3, 10),
        "yearly_prior": trial.suggest_discrete_uniform("yearly_prior", 1, 25, 0.5),
        "monthly_prior": trial.suggest_discrete_uniform("monthly_prior", 1, 25, 0.5),
        "weekly_prior": trial.suggest_discrete_uniform("weekly_prior", 1, 25, 0.5),
        "quaterly_prior": trial.suggest_discrete_uniform("quaterly_prior", 1, 25, 0.5),
        "growth": "logistic",
        "seasonality_mode": "additive",
        "weekly_seasonality": True,
        "daily_seasonality": True,
    }
    # fit_model
    model = Prophet(
        changepoint_range=params["changepoint_prior_scale"],
        n_changepoints=params["n_changepoints"],
        changepoint_prior_scale=params["changepoint_prior_scale"],
        seasonality_prior_scale=params["seasonality_prior_scale"],
        yearly_seasonality=False,
        weekly_seasonality=True,
        daily_seasonality=True,
        growth="logistic",
        seasonality_mode="additive",
    )
    model.add_seasonality(
        name="yearly",
        period=365.25,
        fourier_order=params["yearly_fourier"],
        prior_scale=params["yearly_prior"],
    )
    model.add_seasonality(
        name="monthly",
        period=30.5,
        fourier_order=params["monthly_fourier"],
        prior_scale=params["monthly_prior"],
    )
    model.add_seasonality(
        name="weekly",
        period=7,
        fourier_order=params["weekly_fourier"],
        prior_scale=params["weekly_prior"],
    )
    model.add_seasonality(
        name="quaterly",
        period=365.25 / 4,
        fourier_order=params["quaterly_fourier"],
        prior_scale=params["quaterly_prior"],
    )
    train["cap"] = cap
    train["floor"] = floor
    model.fit(train)
    future = model.make_future_dataframe(periods=144, freq="d")
    future["cap"] = cap
    future["floor"] = floor

    forecast = model.predict(future)
    valid_forecast = forecast.tail(7)

    rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False)

    return rmse
예제 #15
0
    def objective(self, trial:optuna.Trial):
        """otpuna objective function
        
        Args:
            trial (optuna.Trial): traial object of optuna
        
        Returns:
            [type]: [description]
        """

        logger = ErmineLogger.get_instance()
        logger.debug("objective")
        optuna_dict = {}
        template = self.template
        optuna_params = self.optuna_params
        logger.debug(optuna_params)

        for p_key in optuna_params:
            p = optuna_params[p_key]
            logger.debug("p in optuna key "+ p_key + " , " + "val "+ p)
            if( p.startswith("uniform")):
                uni_pattern = "uniform\((.*),(.*)\)"
                # logger.debug("check unipattern " + p)
                matchobj = re.match(uni_pattern,p)
                low = float(matchobj.group(1))
                high = float(matchobj.group(2))
                # print(matchobj.group(0) + "," + matchobj.group(1))
                v = trial.suggest_uniform(p,low,high)
                optuna_dict[p_key] = str(v)
            elif p.startswith("loguniform"):
                loguni_pattern = "loguniform\((.*),(.*)\)"
                # logger.debug("check log unipattern " + p)
                matchobj = re.match(loguni_pattern,p)
                low = float(matchobj.group(1))
                high = float(matchobj.group(2))
                # print(matchobj.group(0) + "," + matchobj.group(1))
                v = trial.suggest_loguniform(p,low,high)
                optuna_dict[p_key] = str(v)
            elif p.startswith("categorical"):
                category_pattern = "categorical\((\[.*\])\)"
                matchobj = re.match(category_pattern,p)
                str_array = matchobj.group(1)
                # print(str_array)
                json_array = json.loads(str_array)
                # print(json_array)
                v = trial.suggest_categorical(p,json_array)
                optuna_dict[p_key]=v
            elif p.startswith("int"):
                int_pattern = "int\((.*),(.*)\)"
                # logger.debug("check int unipattern " + p)
                matchobj = re.match(int_pattern,p)
                low = float(matchobj.group(1))
                high = float(matchobj.group(2))
                # print(matchobj.group(0) + "," + matchobj.group(1))
                v = trial.suggest_loguniform(p,low,high)
                optuna_dict[p_key] = str(v)
            elif p.startswith("discrete_uniform"):
                disc_uni_pattern = "discrete_uniform\((.*),(.*),(.*)\)"
                # logger.debug("check discrete unipattern " + p)
                matchobj = re.match(disc_uni_pattern,p)
                logger.debug(matchobj)
                low = float(matchobj.group(1))
                high = float(matchobj.group(2))
                q = float(matchobj.group(3))
                # print(matchobj.group(0) + "," + matchobj.group(1)+"," + matchobj.group(2))
                v = trial.suggest_discrete_uniform(p,low,high,q)
                optuna_dict[p_key] = str(v)

        self.logger.debug("optuna trial values")
        self.logger.debug(optuna_dict)

        trial_config = self.generate_trial_config(template,optuna_dict)

        self.logger.debug(trial_config)
        
        # print(optuna_dict)
        bucket:WorkingBucket = self.execute(trial_config)
        if "Result" in bucket:
            return bucket["Result"]
        else:
            return 0