コード例 #1
0
def check_min_data(trial, params, **args):
    min_data = trial.suggest_int("min_data", 5, 100)
    params = dict(params, min_data=min_data)
    score = check(trial, params, **args)
    acc = human.humanacc(trial.user_attrs["acc"])
    logger.debug("- min_data trial %d: %s [min_data=%s]" %
                 (trial.number, acc, params["min_data"]))
    return score
コード例 #2
0
def check_regular(trial, params, **args):
    lambda_l1 = trial.suggest_float("lambda_l1", 1e-8, 10.0)
    lambda_l2 = trial.suggest_float("lambda_l2", 1e-8, 10.0)
    params = dict(params, lambda_l1=lambda_l1, lambda_l2=lambda_l2)
    score = check(trial, params, **args)
    acc = human.humanacc(trial.user_attrs["acc"])
    logger.debug("- regular trial %d: %s [l1=%s, l2=%s]" %
                 (trial.number, acc, params["lambda_l1"], params["lambda_l2"]))
    return score
コード例 #3
0
def train(f_train,
          f_test,
          d_tmp="optuna-tmp",
          phases="l:b:m:r",
          iters=100,
          timeout=None,
          init_params=None,
          usebar=True,
          min_leaves=256,
          max_leaves=32768):
    (xs, ys) = trains.load(f_train)
    dtrain = lgb.Dataset(xs, label=ys)
    testd = trains.load(f_test) if f_test != f_train else (xs, ys)
    os.system('mkdir -p "%s"' % d_tmp)
    redirect.module("optuna", os.path.join(d_tmp, "optuna.log"))

    params = dict(lgbooster.DEFAULTS)
    if init_params: params.update(init_params)
    pos = sum(ys)
    neg = len(ys) - pos
    #params["scale_pos_weight"] = neg / pos
    params["is_unbalance"] = "true" if neg != pos else "false"
    phases = phases.split(":")
    if "m" in phases:
        params["feature_pre_filter"] = "false"
    timeout = timeout / len(phases) if timeout else None
    iters = iters // len(phases) if iters else None
    args = dict(dtrain=dtrain,
                testd=testd,
                d_tmp=d_tmp,
                iters=iters,
                timeout=timeout,
                usebar=usebar,
                min_leaves=min_leaves,
                max_leaves=max_leaves)

    if init_params is not None:
        f_mod = os.path.join(d_tmp, "init.lgb")
        (score, acc, dur) = model(params, dtrain, testd, f_mod,
                                  "[init]" if usebar else None)
        best = (score, acc, f_mod, dur)
        logger.debug("- initial model: %s" % human.humanacc(acc))
    else:
        best = (-1, None, None)

    for phase in phases:
        trial = PHASES[phase](params=params, **args)
        if trial.user_attrs["score"] > best[0]:
            best = tuple(trial.user_attrs[x]
                         for x in ["score", "acc", "model", "time"])
            params.update(trial.params)
            #if "num_leaves_base" in params:
            #   params["num_leaves"] = round(2**(params["num_leaves_base"]/2))
            #   del params["num_leaves_base"]

    return best + (params, pos, neg)
コード例 #4
0
def check_leaves(trial, params, min_leaves, max_leaves, **args):
    #num_leaves_base = trial.suggest_int('num_leaves_base', 16, 31)
    #num_leaves = round(2**(num_leaves_base/2))
    num_leaves = trial.suggest_int('num_leaves', min_leaves, max_leaves)
    params = dict(params, num_leaves=num_leaves)
    score = check(trial, params, **args)
    acc = human.humanacc(trial.user_attrs["acc"])
    logger.debug("- leaves trial %d: %s [num_leaves=%s]" %
                 (trial.number, acc, params["num_leaves"]))
    return score
コード例 #5
0
def check_bagging(trial, params, **args):
    bagging_freq = trial.suggest_int("bagging_freq", 1, 7)
    bagging_fraction = min(
        trial.suggest_float("bagging_fraction", 0.4, 1.0 + 1e-12), 1.0)
    params = dict(params,
                  bagging_freq=bagging_freq,
                  bagging_fraction=bagging_fraction)
    score = check(trial, params, **args)
    acc = human.humanacc(trial.user_attrs["acc"])
    logger.debug("- bagging trial %d: %s [freq=%s, frac=%s]" %
                 (trial.number, acc, params["bagging_freq"],
                  params["bagging_fraction"]))
    return score
コード例 #6
0
def lgbtune(f_train,
            f_test,
            d_tmp="optuna-tmp",
            phases="l:b:m:r",
            iters=None,
            timeout=3600,
            init_params={},
            min_leaves=256,
            max_leaves=32768):
    (_, acc, f_mod, _, params, _, _) = train(f_train, f_test, d_tmp, phases,
                                             iters, timeout, init_params, True,
                                             min_leaves, max_leaves)
    logger.info("")
    logger.info("Best model params: %s" % str(params))
    logger.info("Best model accuracy: %s" % human.humanacc(acc))
    logger.info("Best model file: %s" % f_mod)