def function_optimize_optuna(trial : Trial):
    train_it= 600


    stack_layers = trial.suggest_int('stack_layers', 1, 3) #2
    max_pool_layers = trial.suggest_categorical('max_pool_layers', [True, False])
    if max_pool_layers:
        max_pool_layers = stack_layers
    else:
        max_pool_layers = 0

    st_filter = int(trial.suggest_discrete_uniform('st_filter', 30, 50, 10)) #40
    inc_filter = int(trial.suggest_discrete_uniform('inc_filter', 30, 50, 10))  #50

    extra_porc = trial.suggest_int('extra_porc', 1, 4)  # 2
    input_factor = trial.suggest_categorical('input_factor', [0.5,0.25]) # 0.5


    lr = trial.suggest_categorical('learning_rate', [0.001,0.0001])
    #loss_string_options = ['cross_entropy','mse']
    #loss_string = trial.suggest_categorical('loss_string', loss_string_options)
    loss_string = 'cross_entropy'
    replace_max_pool_with_stride = trial.suggest_categorical('replace_max_pool_with_stride', [True,False])


    exp_params = {
        'tf_config' : tf.ConfigProto(allow_soft_placement = True),
        'max_pool_layers' : max_pool_layers,
        'stack_layers' : stack_layers,
        'input_factor' : input_factor,
        'extra_porc' : extra_porc,
        'lr' : lr,
        'st_filter' : st_filter,
        'inc_filter' : inc_filter,
        'loss_string' : loss_string,
        'replace_max_pool_with_stride' : replace_max_pool_with_stride
    }
    print("PARAMS : {0}".format(exp_params))

    out_dict,out_folder = train_run(train_it, save_model=True, interactive_plot=False, **exp_params)

    # save params
    metric = float(out_dict['global_F1'])

    trial.set_user_attr('out_path',out_folder)
    for k in out_dict:
        if k != 'global_F1':
            trial.set_user_attr(k,float(out_dict[k]))

    return metric
Esempio n. 2
0
def objective(trial: Trial, X: pd.Series, y: pd.Series, workers: int) -> int:
    # Get architecture and its parameters
    pipeline = define_hyperparameters(trial=trial)

    # Evaluate architectures and store settings and metrics
    scores = cross_validate(
        pipeline,
        X,
        y,
        cv=3,
        scoring=["accuracy", "average_precision", "f1"],
        n_jobs=workers,
    )

    trial.set_user_attr(key="model", value=pipeline)
    for metric in ("accuracy", "average_precision", "f1"):
        trial.set_user_attr(key=metric, value=scores[f"test_{metric}"].mean())

    return scores["test_accuracy"].mean()
Esempio n. 3
0
    def __call__(self, trial: trial_module.Trial) -> float:
        params = self._get_params(trial)  # type: Dict[str, Any]
        dataset = copy.copy(self.dataset)
        callbacks = self._get_callbacks(trial)  # type: List[Callable]
        eval_hist = lgb.cv(
            params,
            dataset,
            callbacks=callbacks,
            early_stopping_rounds=self.early_stopping_rounds,
            feval=self.feval,
            fobj=self.fobj,
            folds=self.cv,
            init_model=self.init_model,
            num_boost_round=self.n_estimators,
        )  # Dict[str, List[float]]
        values = eval_hist[
            "{}-mean".format(self.eval_name)
        ]  # type: List[float]
        best_iteration = len(values)  # type: int

        trial.set_user_attr("best_iteration", best_iteration)

        trial_path = self.model_dir / "trial_{}".format(trial.number)

        trial_path.mkdir(exist_ok=True, parents=True)

        boosters = callbacks[0].boosters_  # type: ignore

        for i, b in enumerate(boosters):
            b.best_iteration = best_iteration

            b.free_dataset()

            booster_path = trial_path / "fold_{}.pkl".format(i)

            with booster_path.open("wb") as f:
                pickle.dump(b, f)

        return values[-1]
Esempio n. 4
0
    def _store_scores(self, trial: Trial, scores: Dict[str, OneDimArrayLikeType]) -> None:

        for name, array in scores.items():
            if name in ["test_score", "train_score"]:
                for i, score in enumerate(array):
                    trial.set_user_attr("split{}_{}".format(i, name), score)

            trial.set_user_attr("mean_{}".format(name), np.nanmean(array))
            trial.set_user_attr("std_{}".format(name), np.nanstd(array))
Esempio n. 5
0
        def objective(trial: Trial) -> float:
            """Compute objective value

            Parameter
            ---------
            trial : `Trial`
                Current trial

            Returns
            -------
            loss : `float`
                Loss
            """

            # use pyannote.metrics metric when available
            try:
                metric = self.pipeline.get_metric()
            except NotImplementedError as e:
                metric = None
                losses = []

            processing_time = []
            evaluation_time = []

            # instantiate pipeline with value suggested in current trial
            pipeline = self.pipeline.instantiate(
                self.pipeline.parameters(trial=trial))

            if show_progress != False:
                progress_bar = tqdm(total=len(inputs), **show_progress)
                progress_bar.update(0)

            # accumulate loss for each input
            for i, input in enumerate(inputs):

                # process input with pipeline
                # (and keep track of processing time)
                before_processing = time.time()
                output = pipeline(input)
                after_processing = time.time()
                processing_time.append(after_processing - before_processing)

                # evaluate output (and keep track of evaluation time)
                before_evaluation = time.time()

                # when metric is not available, use loss method instead
                if metric is None:
                    loss = pipeline.loss(input, output)
                    losses.append(loss)

                # when metric is available,`input` is expected to be provided
                # by a `pyannote.database` protocol
                else:
                    from pyannote.database import get_annotated

                    _ = metric(input["annotation"],
                               output,
                               uem=get_annotated(input))

                after_evaluation = time.time()
                evaluation_time.append(after_evaluation - before_evaluation)

                if show_progress != False:
                    progress_bar.update(1)

                if self.pruner is None:
                    continue

                trial.report(
                    np.mean(losses) if metric is None else abs(metric), i)
                if trial.should_prune(i):
                    raise optuna.structs.TrialPruned()

            if show_progress != False:
                progress_bar.close()

            trial.set_user_attr("processing_time", sum(processing_time))
            trial.set_user_attr("evaluation_time", sum(evaluation_time))

            return np.mean(losses) if metric is None else abs(metric)