Esempio n. 1
0
def example1():
    """ Optimize AllRange workload using PIdentity template and report the expected error """
    print('Example 1')
    W = workload.AllRange(256)
    pid = templates.PIdentity(16, 256)
    res = pid.optimize(W)

    err = error.rootmse(W, pid.strategy())
    err2 = error.rootmse(W, workload.Identity(256))
    print(err, err2)
Esempio n. 2
0
def calculate_workload_error2(wk, strategy, eps):
    # Deprecated
    expected_error = hdmm_error.rootmse(wk, strategy, eps=eps)
    per_query_error = hdmm_error.per_query_error_sampling(wk,
                                                          strategy,
                                                          100000,
                                                          eps,
                                                          normalize=True)
    if np.var(per_query_error) < 1e-2:
        pdf = False
        pdf_x = False
    else:
        density = np.histogram(per_query_error, bins='auto', density=True)
        pdf = density[0].tolist()
        pdf_x = density[1][0:-1].tolist()
    res = {
        'pdf': pdf,
        'pdf_x': pdf_x,
        'method': 'HDMM',
        'expected_error': expected_error,
        'num_query': int(wk.shape[0]),
    }
    return res
Esempio n. 3
0
    ax = sns.scatterplot(x=mean, y=std)
    ax.set(xlabel='Average Error',
           ylabel='Stdev Error',
           title=f'Avg vs Stdev of Error over re-optimizations, SF1')
    plt.savefig('error_variation_std.png')
    plt.show()


if __name__ == '__main__':

    full = [
        'P1', 'P3', 'P4', 'P5', 'P8', 'P9', 'P10', 'P11', 'P12', 'P12A_I',
        'PCT12', 'PCT12A_O'
    ]
    w_sf1_full = build_workload(full)
    print('Query count, full', w_sf1_full.shape[0])

    stats = {}
    trials = 10  #150
    strategies = [opt_strategy(w_sf1_full) for i in range(trials)]
    errors = [
        np.sqrt(error.per_query_error(w_sf1_full, a)) for a in strategies
    ]
    stats['rootmse      '] = [error.rootmse(w_sf1_full, a) for a in strategies]
    stats['max_query_err'] = [np.max(e) for e in errors]
    stats['min_query_err'] = [np.min(e) for e in errors]
    stats['mean_query_err'] = [np.mean(e) for e in errors]

    # mean_vs_max_error(w_sf1_full)
    mean_vs_max_error(w_sf1_full, trials=100)
Esempio n. 4
0
    temp = workload.Marginals.approximate(A_manual)
    A_manual_opt = templates.Marginals(domain, seed=1003)
    A_manual_opt._params = temp.weights
    A_manual_opt.optimize(W)
    A_manual_opt._params = np.clip(A_manual_opt._params, 0, float('inf'))

    print('Num queries:', W.shape[0])
    print('Sensitivity:', W.sensitivity())
    print('Marg', '\t\t', f'{error.rootmse(W, A_marg.strategy()):10.3f}')
    print('Ident_full', '\t', f'{error.rootmse(W, A_identity_full.strategy()):10.3f}')
    print('Identity', '\t', f'{error.rootmse(W, A_identity):10.3f}')
    print('Workload', '\t', f'{error.rootmse(W, A_wkld):10.3f}')
    print('Manual', '\t', f'{error.rootmse(W, A_manual):10.3f}')
    print('Manual Opt', '\t', f'{error.rootmse(W, A_manual_opt.strategy()):10.3f}')

    summarize_strategy(W, A_marg, domain)

    print('')

    print('')

    for m in W.matrices:
         print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_marg.strategy()))

    for m in W.matrices:
         print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_manual))

    for m in W.matrices:
         print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_manual_opt.strategy()))

Esempio n. 5
0
    A_scen2 = templates.Marginals(domain)
    A_scen2.optimize(W_scen2)

    # Here we analyze the error of scenario 2, which is what the user really wants
    # But we consider the strategy from scenario1 and compare with strategy derived from scenario2

    print('Num queries:', W_scen2.shape[0])
    print('Sensitivity:', W_scen2.sensitivity())
    print('Per query RMSE, A_scen1', '\t\t',
          f'{error.rootmse(W_scen1, A_scen1.strategy()):10.3f}')
    print('Per query RMSE, A_scen2', '\t\t',
          f'{error.rootmse(W_scen1, A_scen2.strategy()):10.3f}')
    print('')

    summarize_strategy(W_scen1, A_scen1, domain)

    print('')

    summarize_strategy(W_scen1, A_scen2, domain)

    print('')
    for m in W_scen1.matrices:
        print(
            m.base.key,
            util.marginal_index_repr(m.base.key, dim, " "),
            m.base.shape[0],
            '\t',
            error.rootmse(m, A_scen1.strategy()),
            error.rootmse(m, A_scen2.strategy()),
        )
Esempio n. 6
0
    return A


def marginal_strategy(workload=None):

    template = templates.Marginals((2, 2, 63, 8))
    A = template.restart_optimize(workload, 25)[0]
    return A


if __name__ == '__main__':

    W = pl94_workload(with_full_id=True)

    print(W.shape)

    A = opt_p_identity(workload=W)
    err = error.rootmse(W, A)
    print('KroneckerPIdentity', err)

    #W = Marginals.approximate(W)
    A = marginal_strategy(workload=W)
    marg_err = error.rootmse(W, A)
    #marg_err = np.sqrt( error.expected_error(W, A) / (3*3*64*9) )
    print('Marginals', marg_err)

    robert_err = error.rootmse(W, manual_strategy())
    print('Robert', robert_err)

    print(A.weights / A.weights.sum())
Esempio n. 7
0
    rmse = np.sqrt(np.mean(errors) / W.shape[0])
    print(rmse)
    return rmse


if __name__ == '__main__':

    #x = synthetic_data(N=1000000)

    engine = 'wnnls'
    W = pl94_workload()
    A1 = opt_p_identity(W)
    A2 = marginal_strategy(W)
    A3 = manual_strategy()

    err1 = error.rootmse(W, A1)
    err2 = error.rootmse(W, A2)
    err3 = error.rootmse(W, A3)

    trials = 25
    Ns = [10**k for k in range(1, 8)]

    errs1, errs2, errs3 = [], [], []
    for N in Ns:
        x = synthetic_data(N)
        errs1.append(run_test(W, A1, x, trials=trials, engine=engine))
        errs2.append(run_test(W, A2, x, trials=trials, engine=engine))
        errs3.append(run_test(W, A3, x, trials=trials, engine=engine))

    plt.plot(Ns, [err1] * len(Ns), 'b', label='KronPIdentity+LS')
    plt.plot(Ns, [err2] * len(Ns), 'r', label='Marginals+LS')