예제 #1
0
async def evaluate_baseline(data, lr, fraction, epochs, seed):
    test = load_test(data, seed)
    baseline = train_baseline(data, lr, fraction, epochs, seed)
    test, baseline = await test, await baseline
    rng_seed(seed)
    ndcg_score, _ = evaluate_fraction(test, baseline, fraction)
    logging.info(f"[{seed}, {lr}] evaluation baseline: {ndcg_score:.4f}")
    return ndcg_score
예제 #2
0
async def evaluate_baseline(data, lr, fraction, epochs, eps, tau, seed):
    test = load_test(data, seed)
    baseline = train_baseline(data, lr, fraction, epochs, eps, tau, seed)
    test, baseline = await test, await baseline
    baseline = baseline.__deepcopy__()
    rng_seed(seed)
    acc_policy, acc_best = evaluate(test, baseline)
    logging.info(f"[{seed}, {lr}, {eps}] evaluation baseline: {acc_policy:.4f} (stochastic) {acc_best:.4f} (deterministic)")
    return {'policy': acc_policy, 'best': acc_best}
예제 #3
0
async def train_baseline(data, lr, fraction, epochs, seed):
    train = await load_train(data, seed)
    policy = OnlinePolicy(train.d, lr)
    baseline_size = int(fraction * train.size)
    prng = rng_seed(seed)
    indices = prng.permutation(train.size)[0:baseline_size]
    logging.info(f"[{seed}, {lr}] training baseline (size: {baseline_size})")
    optimize_supervised(train, indices, policy, lr, epochs)
    return policy
예제 #4
0
async def train_baseline(data, lr, l2, fraction, epochs, eps, tau, seed):
    train = await load_train(data)
    policy = EpsgreedyPolicy(train.k, train.d, lr=lr, eps=eps)
    #policy = BoltzmannPolicy(train.k, train.d, lr=lr, tau=tau, l2=l2)
    baseline_size = int(fraction * train.n)
    prng = rng_seed(seed)
    indices = prng.permutation(train.n)[0:baseline_size]
    logging.info(f"[{seed}, {lr}, {eps}] training baseline (size: {baseline_size}, weights:{train.d * train.k})")
    optimize_supervised_hinge(train, indices, policy, lr, epochs)
    return policy
예제 #5
0
async def ranking_run(config, data, behavior, points, seed):

    # Load train, test and policy
    train = load_train(data, seed)
    test = load_test(data, seed)
    baseline = best_baseline(data, seed)
    train, test, baseline = await train, await test, await baseline

    # Data structure to hold output results
    out = {
        'deploy': np.zeros(len(points)),
        'learned': np.zeros(len(points)),
        'regret': np.zeros(len(points))
    }

    # Seed randomness
    prng = rng_seed(seed)

    # Build policy
    args = {'d': train.d, 'pairs': train.pairs, 'baseline': baseline.__deepcopy__()}
    args.update(vars(config))
    if behavior in ['perfect']:
        args['eta'] = 0.0
    else:
        args['eta'] = 1.0
    if not config.cold:
        args['w'] = np.copy(baseline.w)
    policy = create_policy(**args)

    if hasattr(policy, 'ucb_baseline') and hasattr(policy, 'lcb_w'):
        out['ucb_b'] = np.zeros(len(points))
        out['lcb_w'] = np.zeros(len(points))

    # Build behavior model
    click_model = build_click_model(behavior)

    # Generate training indices and seed randomness
    indices = prng.randint(0, train.size, np.max(points))

    # Evaluate on point 0
    out['deploy'][0], out['learned'][0] = evaluate(test, policy)
    log_progress(0, points, seed, data, behavior, config, out, policy)

    # Train and evaluate at specified points
    for i in range(1, len(points)):
        start = points[i - 1]
        end = points[i]
        out['regret'][i] = out['regret'][i - 1] + optimize(train, indices[start:end], policy, click_model)
        out['deploy'][i], out['learned'][i] = evaluate(test, policy)
        if hasattr(policy, 'ucb_baseline') and hasattr(policy, 'lcb_w'):
            out['ucb_b'], out['lcb_w'] = policy.ucb_baseline, policy.lcb_w
        log_progress(i, points, seed, data, behavior, config, out, policy)

    return out
예제 #6
0
async def statistical_baseline(data, l2, seed, strategy):
    with open("conf/classification/baselines.json", "rt") as f:
        baselines = json.load(f)
    fraction = baselines[data]['fraction']
    train = await load_train(data, seed)
    draw_type = {
        'ucb': TYPE_UCB,
        'thompson': TYPE_THOMPSON
    }[strategy]
    policy = StatisticalPolicy(train.k, train.d, l2=l2, draw_type=draw_type)
    baseline_size = int(fraction * train.n)
    prng = rng_seed(seed)
    indices = prng.permutation(train.n)[0:baseline_size]
    logging.info(f"[{seed}] training ridge regression baseline (size: {baseline_size}, weights:{train.d * train.k})")
    optimize_supervised_ridge(train, indices, policy)
    return policy
예제 #7
0
async def load_from_path(file_path,
                         min_d=0,
                         sample=1.0,
                         seed=0,
                         sample_inverse=False):
    xs, ys = load_svmlight_file(file_path)
    ys = ys.astype(np.int32)
    ys -= np.min(ys)
    if sample < 1.0:
        prng = rng_seed(seed)
        indices = prng.permutation(xs.shape[0])
        if not sample_inverse:
            indices = indices[0:int(sample * xs.shape[0])]
        else:
            indices = indices[int(sample * xs.shape[0]):]
        xs = xs[indices, :]
        ys = ys[indices]
    k = np.unique(ys).shape[0]
    n = xs.shape[0]
    d = xs.shape[1]
    xs = from_scipy(xs, min_d=min_d)  #xs.todense().A
    ys.setflags(write=False)
    return ClassificationDataset(xs, ys, n, d, k)
async def classification_run(config, data, points, seed, vali=0.0):

    # Load train, test and policy
    train = load_train(data, seed)
    test = load_test(data, seed)
    policy = build_policy(config, data, points, seed)
    train, test, policy = await train, await test, await policy
    policy = policy.__deepcopy__()

    # Data structure to hold output results
    out = {
        'deploy': np.zeros(len(points)),
        'learned': np.zeros(len(points)),
        'regret': np.zeros(len(points)),
        'test_regret': np.zeros(len(points)),
    }

    # Generate training indices and seed randomness
    prng = rng_seed(seed)
    indices_shuffle = prng.permutation(train.n)

    train_indices = prng.randint(0, int((1.0 - vali) * train.n),
                                 np.max(points))
    train_indices = indices_shuffle[train_indices]

    if vali != 0.0:
        vali_indices = prng.randint(int((1.0 - vali) * train.n), train.n,
                                    np.max(points))
        vali_indices = indices_shuffle[vali_indices]
    else:
        vali_indices = train_indices

    # Evaluate on point 0
    if vali == 0.0:
        out['deploy'][0], out['learned'][0] = evaluate(test, policy,
                                                       np.arange(0, test.n))
    else:
        out['deploy'][0], out['learned'][0] = evaluate(
            train, policy, indices_shuffle[np.arange(int(vali * train.n),
                                                     train.n)])
    out['regret'][0] = 0.0
    out['test_regret'][0] = 0.0
    log_progress(0, points, data, out, policy, config, seed)

    # Train and evaluate at specified points
    for i in range(1, len(points)):
        start = points[i - 1]
        end = points[i]
        train_regret, test_regret = optimize(train,
                                             np.copy(train_indices[start:end]),
                                             np.copy(vali_indices[start:end]),
                                             policy)
        out['regret'][i] = out['regret'][i - 1] + train_regret
        out['test_regret'][i] = out['test_regret'][i - 1] + test_regret
        if vali == 0.0:
            out['deploy'][i], out['learned'][i] = evaluate(
                test, policy, np.arange(0, test.n))
        else:
            out['deploy'][i], out['learned'][i] = evaluate(
                train, policy,
                indices_shuffle[np.arange(int(vali * train.n), train.n)])
        log_progress(i, points, data, out, policy, config, seed)

    return out