Ejemplo n.º 1
0
def train_light_gbm_regressor(X, y, cv, n_params, test_size=.2, n_jobs=-1):

    LGBM_params = {
        'num_leaves': sp_randint(6, 50),
        'min_child_samples': sp_randint(100, 500),
        'min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4],
        'subsample': sp_uniform(loc=0.2, scale=0.8),
        'colsample_bytree': sp_uniform(loc=0.4, scale=0.6),
        'reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
        'reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100]
    }

    Xt, Xv, yt, yv = train_test_split(X, y, test_size=test_size)

    param_list = list(ParameterSampler(LGBM_params, n_iter=n_params))
    param_scores = []

    int_skf = KFold(n_splits=cv)

    for p in range(n_params):

        best_scs = []

        for train_i, test_i in int_skf.split(Xt, yt):

            Xt_train, yt_train = Xt[train_i], yt[train_i]
            Xt_test, yt_test = Xt[test_i], yt[test_i]

            model = LGBMRegressor(n_jobs=n_jobs,
                                  silent=True,
                                  n_estimators=5000,
                                  **param_list[p])
            model.fit(Xt_train,
                      yt_train,
                      eval_set=(Xt_test, yt_test),
                      verbose=False,
                      early_stopping_rounds=300)

            best_sc = model.best_score_['valid_0']['l2']
            best_scs.append(best_sc)

        param_scores.append(np.mean(best_scs))

    bp_ind = np.argmin(param_scores)
    model = LGBMRegressor(n_jobs=n_jobs,
                          silent=True,
                          n_estimators=5000,
                          **param_list[bp_ind])
    model.fit(Xt,
              yt,
              eval_set=(Xv, yv),
              verbose=False,
              early_stopping_rounds=500)

    return model
Ejemplo n.º 2
0
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10,
                               random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)
Ejemplo n.º 3
0
def bm25_parameter_space(n_trials):
    rng = np.random.RandomState(42)

    return ParameterSampler(
        dict(tf_method=["binary", "raw", "freq", "log_norm", "double_norm"]
             , idf_method=["smooth", "probabilistic"],
             drop_stopwords=[True, False],
             drop_suffix=[True, False], drop_punct=[True, False],
             lowercase=[True, False],
             k1=uniform(1.2, 2.0),
             b=uniform(0.5, 0.8), delta=uniform(0, 2)), n_iter=n_trials, random_state=rng)
Ejemplo n.º 4
0
def uniform_sample_params(template: Dict, n_samples: int):
    # Sample according to the template for n_samples
    param_grid = dict()

    for k, v in template.items():
        if type(v) == str:
            param_grid[k] = eval("dist." + v)
        else:
            param_grid[k] = v

    return ParameterSampler(param_grid, n_iter=n_samples)
Ejemplo n.º 5
0
 def _sample_config(self) -> dict:
     params = list(
         ParameterSampler(self._params_space,
                          n_iter=1,
                          random_state=self.random_state))[0]
     for key in params:
         if isinstance(params[key], np.float64):
             # Fix error in FastAI, can't handle np.float64
             params[key] = float(params[key])
     params.update(self._params_static)
     return params
Ejemplo n.º 6
0
def ts_grid_search(df,
                   holidays,
                   param_grid=None,
                   cv_param=None,
                   RandomizedSearch=True,
                   random_state=None):
    '''网格搜索
    时间序列需要特殊的交叉验证
    
    df:   
    holidays: 需要实现调好  
    
    '''

    df = df.copy()
    if param_grid is None:
        param_grid = {
            'growth': ['linear'],
            'seasonality_prior_scale': np.round(np.logspace(0, 2.2, 10)),
            'holidays_prior_scale': np.round(np.logspace(0, 2.2, 10)),
            'changepoint_prior_scale':
            [0.05]  #[0.005,0.01,0.02,0.03,0.05,0.008,0.10,0.13,0.16,0.2]
            ,
            'interval_width': [0.80]  #[0.2,0.4,0.6,0.8]
        }

    if RandomizedSearch:
        param_list = list(
            ParameterSampler(param_grid, n_iter=10, random_state=random_state))
    else:
        param_list = list(ParameterGrid(param_grid))

    if cv_param is None:
        cv_param = {'horizon': 30, 'period': 120, 'initial': 1095}

    scores = []
    for i, param in enumerate(param_list):
        print('{}/{}:'.format(i, len(param_list)), param)
        param.update({'holidays': holidays})
        scores_tmp = ts_evaluation(df, param, exp=True, **cv_param)
        param.pop('holidays')
        tmp = param.copy()
        tmp.update({'mape': scores_tmp['mape'], 'rmse': scores_tmp['rmse']})
        scores.append(tmp)
        print('mape : {:.5f}%'.format(100 * scores_tmp['mape']))

    scores = pd.DataFrame(scores)

    best_param_ = scores.loc[scores['mape'].argmin(), :].to_dict()
    best_scores_ = best_param_['mape']
    best_param_.pop('mape')
    best_param_.pop('rmse')

    return best_param_, best_scores_, scores
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):
    """
    Example:
        python v12.py tuning --devices 0,1,2,3 --n-gpu 2
    """

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        {
            'batch_size': [32],
            # 'epochs': [1],
            # 'scaleup_epochs': [0],
            'loss': ['arcface'],
            'verifythresh': [30],
            'freqthresh': [3],
            'margin': [0.3, 0.2],
            's': [30],
            # 'augmentation': ['soft'],
        },

        # for reproduce
        # {
        #     'batch_size': [32],
        #     'loss': ['arcface'],
        #     'verifythresh': [30, 40, 50],
        #     'freqthresh': [2, 3, 5],
        #     'margin': [0.3],
        #     's': [30],
        # },
        # {
        #     'batch_size': [32],
        #     'loss': ['arcface'],
        #     'verifythresh': [30],
        #     'freqthresh': [3],
        #     'margin': [0.2],
        #     's': [30],
        # },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block]

    utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices,
                        params=params, root=ROOT, save_interval=save_interval,
                        candidate_list=candidate_chunk)
    def _get_hyperparameter_configurations(self,n):
        """
            Args:
                None
            Returns:
                    n randomly sampled hyperparameter configutations

            """
        np.random.seed(self.seed)
        return [{'model_name':str(uuid.uuid4().hex),'score':np.nan,'config':config}
                for config in list(ParameterSampler(self.param_grid, n_iter=n))]
Ejemplo n.º 9
0
def load_hyperparam_grid(n_iter=50):

    from sklearn.model_selection import ParameterSampler

    hyperparam_grid = {
        "lr": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0],
        "weight_decay": [0.00001, 0.0001, 0.001, 0.01, 0.1],
        "n_hid": [25, 50, 75, 100],
        "p_dropout": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    }
    return list(
        ParameterSampler(hyperparam_grid, n_iter=n_iter, random_state=42))
Ejemplo n.º 10
0
 def __init__(self, algo, calib_type, hp_grid, cv, scoring, n_iter=None):
     self.cv = cv
     self.algo = algo
     self.scoring = scoring
     self.hp_grid = hp_grid
     self.best_hp = {}
     if calib_type == 'GridSearch':
         self.hp_iterator = ParameterGrid(self.hp_grid)
     elif calib_type == 'RandomSearch':
         self.hp_iterator = ParameterSampler(self.hp_grid, n_iter)
     elif calib_type == 'GeneticAlgorithm':
         self.hp_iterator = GeneticAlgorithm(self.hp_grid, n_iter)
Ejemplo n.º 11
0
def select_best_nn_classifier(build_func,
                              params_grid,
                              n_iter,
                              random_state,
                              X_train,
                              y_train,
                              X_val,
                              y_val,
                              early_stopping,
                              epochs=100,
                              shuffle=False,
                              verbose=1):
    ''' select best neural network model using KerasClassifier and ParameterSampler to generate models with diffrent parameters'''

    models = []
    scoring_list = []

    # list of dicts random generated paramters
    params_list = list(ParameterSampler(params_grid, n_iter, random_state))

    for params in params_list:

        # set parameters for model
        model = KerasClassifier(build_fn=build_func,
                                validation_data=(X_val, y_val),
                                input_shape=X_train.shape[1:],
                                epochs=epochs,
                                shuffle=shuffle,
                                callbacks=[early_stopping],
                                verbose=0,
                                **params)

        model_name = f'{model.__class__.__name__}{params}'
        model.fit(X_train, y_train)

        score_train = accuracy_score(model.predict(X_train), y_train)
        score_valid = accuracy_score(model.predict(X_val), y_val)

        scoring_list.append(score_valid)
        models.append((model_name, model))

        if verbose == 1:
            print(model_name)
            print(
                f'Accuracy score on training set: {score_train.round(4)} | Accuracy score on validation set: {score_valid.round(4)}'
            )
            print('-' * 100)

    # select best models using scoring list
    best_model_index = np.argmax(scoring_list)
    best_model = models[best_model_index]

    return best_model[0], best_model[1]
Ejemplo n.º 12
0
def search(command,
           param_dists,
           num_samples,
           sbatch_args,
           source_bashrc=True,
           conda_env=None,
           hparams_save_path="./",
           max_concurrent_jobs=0,
           wait=False,
           random_seed=42):
    sampler = ParameterSampler(param_dists,
                               n_iter=num_samples,
                               random_state=random_seed)

    hparams = list(sampler)
    hparam_args = []
    for p in hparams:
        args_list = []
        for k, v in p.items():
            if isinstance(v, bool):
                if v:
                    args_list.append(f"--{k}")
            else:
                args_list.append(f"--{k} {v}")
        hparam_args.append(" ".join(args_list))

    hparams_array = "('" + "' '".join(hparam_args) + "')"
    script = ["#!/bin/bash\n"] + [f"#SBATCH {arg}\n" for arg in sbatch_args]
    script.append(f"#SBATCH --array=0-{len(hparams)-1}%{max_concurrent_jobs}\n")
    if wait:
       script.append("#SBATCH -W")
    if conda_env or source_bashrc:
        script.append("source ~/.bashrc\n")
    if conda_env:
        script.append(f"conda activate {conda_env}\n")

    script.append(f"HPARAMS={hparams_array}\n")
    script.append(f"{command} ${{HPARAMS[$SLURM_ARRAY_TASK_ID]}}\n")

    with tempfile.NamedTemporaryFile("w") as f:
        f.writelines(script)
        f.seek(0)
        out = subprocess.run(
            ["sbatch", os.path.join(tempfile.gettempdir(), f.name)],
            capture_output=True)

    job_id = out.stdout.decode().replace("Submitted batch job ",
                                         "").rstrip("\n")
    df = pd.DataFrame(hparams)
    df["version"] = [f"{job_id}_{str(i)}" for i in range(len(df))]
    df.to_csv(os.path.join(hparams_save_path,
                           f"hparams_{job_id}.csv"),
              index=False)
Ejemplo n.º 13
0
def tfidf_parameter_space(n_trials):
    rng = np.random.RandomState(42)

    return ParameterSampler(dict(
        tf_method=["binary", "raw", "freq", "log_norm", "double_norm"],
        idf_method=["smooth", "probabilistic"],
        drop_stopwords=[True, False],
        drop_suffix=[True, False],
        drop_punct=[True, False],
        lowercase=[True, False]),
                            n_iter=n_trials,
                            random_state=rng)
Ejemplo n.º 14
0
def optimize_circuits(k, direction, cerebellum="seg"):
  print("Assessing k={:02d} circuits".format(k))

  act_bin = load_coordinates(cerebellum=cerebellum)

  lexicon = load_lexicon(["cogneuro"])
  dtm_bin = load_doc_term_matrix(version=190325, binarize=True)
  lexicon = sorted(list(set(lexicon).intersection(dtm_bin.columns)))
  dtm_bin = dtm_bin[lexicon]

  lists, circuits = load_domains(k, cerebellum=cerebellum)

  function_features = pd.DataFrame(index=dtm_bin.index, columns=range(1, k+1))
  structure_features = pd.DataFrame(index=act_bin.index, columns=range(1, k+1))
  for i in range(1, k+1):
      functions = lists.loc[lists["CLUSTER"] == i, "TOKEN"]
      function_features[i] = dtm_bin[functions].sum(axis=1)
      structures = circuits.loc[circuits["CLUSTER"] == i, "STRUCTURE"]
      structure_features[i] = act_bin[structures].sum(axis=1)
  function_features = pd.DataFrame(doc_mean_thres(function_features),
                                   index=dtm_bin.index, columns=range(1, k+1))
  structure_features = pd.DataFrame(binarize(structure_features),
                                    index=act_bin.index, columns=range(1, k+1))

  # Load the data splits
  splits = {}
  for split in ["train", "validation"]:
    splits[split] = [int(pmid.strip()) for pmid in open("../../../data/splits/{}.txt".format(split), "r").readlines()]

  # Specify the hyperparameters for the randomized grid search
  param_grid = {"penalty": ["l1", "l2"],
                "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                "fit_intercept": [True, False]}
  param_list = list(ParameterSampler(param_grid, n_iter=28, random_state=42))
  max_iter = 500

  if direction == "forward":
    file = "fits/forward_k{:02d}_{}.p".format(k, direction)
    if not os.path.isfile(file):
      print("-" * 80 + "\nOptimizing forward model\n" + "-" * 80)
      train_set = [function_features.loc[splits["train"]], structure_features.loc[splits["train"]]]
      val_set = [function_features.loc[splits["validation"]], structure_features.loc[splits["validation"]]]
      op_fit = optimize_hyperparameters(param_list, train_set, val_set, max_iter=max_iter)
      pickle.dump(op_fit, open(file, "wb"), protocol=2)

  elif direction == "reverse":
    file = "fits/reverse_k{:02d}_{}.p".format(k, direction)
    if not os.path.isfile(file):
      print("-" * 80 + "\nOptimizing reverse model\n" + "-" * 80)
      train_set = [structure_features.loc[splits["train"]], function_features.loc[splits["train"]]]
      val_set = [structure_features.loc[splits["validation"]], function_features.loc[splits["validation"]]]
      op_fit = optimize_hyperparameters(param_list, train_set, val_set, max_iter=max_iter)
      pickle.dump(op_fit, open(file, "wb"), protocol=2)
Ejemplo n.º 15
0
def sample_population(estimator_cls, params, X, y, popsize=25, scoring='accuracy'):
    population = set()
    p = iter(ParameterSampler(param_distributions=params, n_iter=100))
    while len(population) < popsize:
        try:
            inner = {attr: BaseParam(param) for attr, param in (next(p)).items()}
            individual = Estimator(estimator_cls=estimator_cls, inner=inner, X=X, y=y, scoring=scoring)
            individual.evaluate()
            population.add(individual)
        except:
            print(f"Failed to draft initial {inner}")
    return population
Ejemplo n.º 16
0
def draw_random_p0(cfg, N_max_fits):
    param_grid = {
        #   'lambda_E': uniform(cfg.lambda_E/10, cfg.lambda_E*5),
        #   'lambda_I': uniform(cfg.lambda_I/10, cfg.lambda_I*5),
        "beta": uniform(cfg.beta / 10, cfg.beta * 5),
        "tau": uniform(-10, 10),
    }
    i = 0
    while i < N_max_fits:
        random_p0 = list(ParameterSampler(param_grid, n_iter=1))[0]
        yield i, random_p0
        i += 1
Ejemplo n.º 17
0
def sample_hyperparameters(
        model_name: str,
        data_origin: str,
        round_to: int = 6) -> List[Dict[str, Union[int, float]]]:
    """
    Sample the hyperparameters for different runs of the same model. The distributions parameters are sampled from are
    defined in uncertainty_estimation.models.info.PARAM_SEARCH and the number of evaluations per model type in
    uncertainty_estimation.models.info.NUM_EVALS.

    Parameters
    ----------
    model_name: str
        Name of the model.
    data_origin: str
        Specify the data set which should be used to specify the hyperparameters to be sampled / default values.
    round_to: int
        Decimal that floats should be rounded to.

    Returns
    -------
    sampled_params: List[Dict[str, Union[int, float]]]
        List of dictionaries containing hyperparameters and their sampled values.
    """
    sampled_params = list(
        ParameterSampler(
            param_distributions={
                hyperparam: PARAM_SEARCH[hyperparam]
                for hyperparam, val in MODEL_PARAMS[model_name]
                [data_origin].items()  # MIMIC is just a default here
                if hyperparam in PARAM_SEARCH
            },
            n_iter=NUM_EVALS[model_name],
        ))

    sampled_params = [
        dict(
            {
                # Round float values
                hyperparam:
                round(val, round_to) if isinstance(val, float) else val
                for hyperparam, val in params.items()
            },
            **{
                # Add hyperparameters that stay fixed
                hyperparam: val
                for hyperparam, val in MODEL_PARAMS[model_name]
                [data_origin].items()  # MIMIC is just a default here
                if hyperparam not in PARAM_SEARCH
            },
        ) for params in sampled_params
    ]

    return sampled_params
Ejemplo n.º 18
0
def random_search():
    param_grid = {
        'noise_factor_cafe': uniform(3, 1),
        'noise_factor_car': uniform(15, 2),
        'noise_factor_white': uniform(0.05, 0.02),
        'noise_file': [0, 1, 2],
        'speed_factor': uniform(0.8, 0.4),
    }
    param_list = list(ParameterSampler(param_grid, n_iter=10))
    return [
        dict((k, round(v, 4) if not isinstance(v, int) else v)
             for (k, v) in d.items()) for d in param_list
    ]
Ejemplo n.º 19
0
    def maximize(self, score_optimum=None, realize=True):
        """
        Find the next best hyper-parameter setting to optimizer.

        Parameters
        ----------
        score_optimum: float
            An optional score to use inside the EI formula instead of the optimizer's current_best_score

        realize: bool
            Whether or not to give a more realistic estimate of the EI (default=True)

        Returns
        -------
        best_setting: dict
            The setting with the highest expected improvement
        
        best_score: float
            The highest EI (per second)
        """

        start = time.time()

        # Select a sample of parameters
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.draw_samples)

        # Set score optimum
        if score_optimum is None:
            score_optimum = self.current_best_score

        # Determine the best parameters
        best_setting, best_score = self._maximize_on_sample(
            sampled_params, score_optimum)

        if self.local_search:
            best_setting, best_score = self._local_search(
                best_setting,
                best_score,
                score_optimum,
                max_steps=self.ls_max_steps)

        if realize:
            best_setting, best_score = self._realize(best_setting, best_score,
                                                     score_optimum)

        # Store running time
        running_time = (time.time() - start) / self.simulate_speedup
        self.maximize_times.append(running_time)

        return best_setting, best_score
Ejemplo n.º 20
0
    def __init__(
        self,
        model_fn: Callable[..., tf.keras.models.Model],
        param_distributions: Dict[str, Callable],
        n_iter: int = 10,
        verbose: int = 0,
        **kwargs: Any,
    ) -> None:
        """RandomSearch for a given parameter distribution.

        The random search randomly iterates over the param_distributions
        dictionary, which defines the hyperparameter value range for a key that
        is a parameter name of the model_fn.
        For example, if the model_fn has the parameter "num_units" a dictionary
        could look like this:

        ``` python
            def model_fn(num_units: int):
                pass

            param_distributions = {"num_units": [10, 20 ,30]}
        ```

        Note: Inside the model_fn it is expected that the model is compiled.

        The random search is evaluated by:
        - The validation loss value, if no metrics are passed to model.compile()
        - The validation score of the last defined metric in model.compile()

        ``` python
            model.compile(loss="mse", metrics=["mse", "mae"])
        ```

        This would sort the grid search combinations based on the validation
        mae score.

        Args:
            model_fn (Callable[..., tf.keras.models.Model]): Function that
                builds and compiles a tf.keras.models.Model object.
            param_distributions (Dict[str, Callable]): Dict of str, callable
                pairs, where the str is the parameter name of the.
            n_iter (int): Number of random models. Defaults to 10.
            verbose (int): Whether to show information in the terminal.
                Defaults to 0.
            kwargs (Any): Keyword arguments for the model_fn function.
        """
        super().__init__(model_fn=model_fn, verbose=verbose, **kwargs)
        self.param_distributions = param_distributions
        self.n_iter = n_iter
        self.random_sampler = ParameterSampler(self.param_distributions,
                                               n_iter=self.n_iter)
Ejemplo n.º 21
0
 def LCB(self, n_sample, gpr=None, Xd=None):
     gpr = self._get_gpr(gpr)
     if Xd is None:
         Xd = self.Xd
     preds = gpr.predict(Xd, return_std=True)
     preds = pd.DataFrame({"prediction" : preds[0], "std_dev" : preds[1]})
     # n.b. lambda is a keyword so change vector of values to alpha
     alpha = ParameterSampler({ "alpha" : expon()}, n_iter=n_sample)
     lcb = pd.DataFrame({"lcb_{}".format(i) : \
                         preds.prediction - \
                         (li["alpha"] * preds.std_dev) \
                         for i, li in enumerate(alpha)})
     # TODO: include X in lcb, to look up parameters from selected values
     return lcb
Ejemplo n.º 22
0
def _get_padadamps(n_params):
    powers = [5, 5.5, 6, 6.5, 7]
    param_space = {
        "initial_batch_size": [2**i for i in powers],
        "max_batch_size": [100, 200, 500, 1000, 2000, 5000],
        "batch_growth_rate": loguniform(1e-3, 1e-1),
        "dwell": [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000],
        "weight_decay": [1e-3, 1e-4, 1e-5, 1e-6, 0, 0, 0],
    }
    model = PadaDamp(seed=42)

    params = ParameterSampler(param_space, n_iter=n_params, seed=42)
    models = [clone(model).set_params(**p) for p in params]
    return models
Ejemplo n.º 23
0
    def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(frame, sampled_params)
Ejemplo n.º 24
0
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler])
Ejemplo n.º 25
0
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        # {
        #     'loss': ['arcface'],
        #     # 'epochs': [5],
        #     # 'augmentation': ['soft'],
        #     'verifythresh': [20, 30, 40],
        #     'freqthresh': [1],
        #     # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        # },
        # {
        #     'loss': ['arcface'],
        #     # 'epochs': [5],
        #     # 'augmentation': ['soft'],
        #     'verifythresh': [20],
        #     'freqthresh': [2],
        #     # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        # },
        {
            'loss': ['arcface'],
            'verifythresh': [30],
            'freqthresh': [3],
        },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(
            ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block:(block_id + 1) *
                                     n_per_block]

    utils.launch_tuning(mode=mode,
                        n_iter=n_iter,
                        n_gpu=n_gpu,
                        devices=devices,
                        params=params,
                        root=ROOT,
                        save_interval=save_interval,
                        candidate_list=candidate_chunk)
Ejemplo n.º 26
0
 def iter(self):
     ''' The __iter__ method that returns an iterator 
     Since it is called at each new call of the iterable in a 'for' statement,
     it initializes all dynamic elements '''
     self.n_iter = 0
     self.pop_scores = []
     try:
         self.population = list(
             ParameterSampler(self.hp_grid, self.init_pop_size)
         )  # First population is random, we turn it into list to copy it
     except ValueError:
         self.population = list(ParameterGrid(self.hp_grid))
     self.current_pop = self.population.copy()
     self.generation = 0
     return self
Ejemplo n.º 27
0
def sample_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space, n_iter=num, random_state=random_state)

    for params in sampler:
        yield params
Ejemplo n.º 28
0
def fit_svr(X_train: np.array,
            y_train: np.array,
            X_valid: np.array,
            y_valid: np.array,
            params_dict: dict = params_dict,
            n_iter: int = 25):
    ps = ParameterSampler(n_iter=n_iter, param_distributions=params_dict)
    scores = np.zeros(n_iter)
    models = list(repeat(None, n_iter))
    for idx, params in enumerate(ps):
        svr = SVR(**params)
        svr.fit(X_train, y_train)
        scores[idx] = r2_score(y_valid, svr.predict(X_valid))
        models[idx] = svr
    return models[np.argmax(scores)]
Ejemplo n.º 29
0
def Train_Light_GBM(X, y, int_cv=3, regression=True, n_params=10, test_size=.2, n_jobs=1, e_stop_rounds=300, **kwargs):
    '''
    Wrapper function to train a Light GBM regression or classifier model
       X - Training input
       y - Training labels
       int_cv - Number of internal cross validation folds
       regression - True for regression, False for binary classification
       n_params - Number of different random hyperparam combinations to explore
       test_size - Size in (%) of the outer test fold (to use for final validation fit)
       n_jobs - Number of proc. to use
       e_stop_rounds - Number of early stop rounds used in checking parameters (double used in final fit)
       
    '''

    if regression:
        Base_Model = LGBMRegressor
    else:
        Base_Model = LGBMClassifier

    #Train val split, for final fit
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size)
    
    param_scores = []
    param_list = list(ParameterSampler(DEFAULT_PARAM_GRID, n_iter=n_params))
    int_skf = KFold(n_splits=int_cv)
    
    for p in range(n_params):
        best_scores = []
       
        for train_ind, test_ind in int_skf.split(X_train, y_train):
            
            int_X_train, int_y_train = X_train[train_ind], y_train[train_ind]
            int_X_test, int_y_test = X_train[test_ind], y_train[test_ind]
            
            model = Base_Model(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[p])
            model.fit(int_X_train, int_y_train, eval_set=(int_X_test, int_y_test),
                        verbose=False, early_stopping_rounds=e_stop_rounds)
            
            best_score = list(model.best_score_['valid_0'].values())[0]
            best_scores.append(best_score)
        
        param_scores.append(np.mean(best_scores))
    
    bp_ind = np.argmin(param_scores) #Index of best parameters
    model = Base_Model(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[bp_ind])
    model.fit(X_train, y_train, eval_set=(X_val, y_val), verbose=False, early_stopping_rounds=int(e_stop_rounds*2))

    return model
Ejemplo n.º 30
0
def knn_classifier(parameter_sampler_random_int):
    parameters_sample_dict = {
        "n_neighbors": sp_randint(1, 25),
        "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"],
        "weights": ["uniform", "distance"],
    }

    p_clf = list(
        ParameterSampler(
            parameters_sample_dict, n_iter=1, random_state=parameter_sampler_random_int
        )
    )[0]

    clf = KNeighborsClassifier(**p_clf)

    return clf, build_classifier_parameter_dict(clf, p_clf)