Ejemplo n.º 1
0
def test_fw_input_validation():
    fe1 = False
    fe2 = False
    try:
        bc.FrankWolfe('fdas')
    except ValueError:
        fe1 = True
        pass
    except:
        assert False, "Unrecognized error type"
    try:
        bc.FrankWolfe(np.array(['fdsa', 'asdf']))
    except ValueError:
        fe2 = True
        pass
    except:
        assert False, "Unrecognized error type"

    if not fe1 or not fe2:
        assert False, "FW failed: did not catch invalid input"
Ejemplo n.º 2
0
                  str(accept_rate) + ', passes if in (.15, .7) ')
            mcmc_attempt += 1
        th_samples = np.array(th_samples)
        Fs_full[tr] = 0.  #always 0, just doing this to make later code simpler
        full_samples = np.array(th_samples)

        print('Running coreset construction / MCMC')
        for aidx, anm in enumerate(anms):
            print(anm + ':')

            t0 = time.time()
            alg = None
            if 'GIGA' in anm:
                alg = bc.GIGA(vecs)
            elif anm == 'FW':
                alg = bc.FrankWolfe(vecs)
            else:
                alg = bc.RandomSubsampling(vecs)
            t_setup = time.time() - t0

            t_alg = 0.
            for m in range(Ms.shape[0]):
                print('M = ' + str(Ms[m]) + ': coreset construction')
                #this runs alg up to a level of M; on the next iteration, it will continue from where it left off
                t0 = time.time()
                alg.run(Ms[m])
                t_alg += time.time() - t0
                wts = alg.weights()
                idcs = wts > 0

                print('M = ' + str(Ms[m]) + ': metropolis hastings')
Ejemplo n.º 3
0
D = 50

err = np.zeros((len(anms), n_trials, Ms.shape[0]))
csize = np.zeros((len(anms), n_trials, Ms.shape[0]))
cput = np.zeros((len(anms), n_trials, Ms.shape[0]))
for tr in range(n_trials):
    X = np.random.randn(N, D)
    XS = X.sum(axis=0)
    for aidx, anm in enumerate(anms):
        print('data: gauss, trial ' + str(tr + 1) + '/' + str(n_trials) +
              ', alg: ' + anm)
        alg = None
        if anm == 'GIGA':
            alg = bc.GIGA(X)
        elif anm == 'FW':
            alg = bc.FrankWolfe(X)
        else:
            alg = bc.RandomSubsampling(X)

        for m, M in enumerate(Ms):
            t0 = time.time()
            alg.run(M)
            tf = time.time()
            cput[aidx, tr,
                 m] = tf - t0 + cput[aidx, tr, m - 1] if m > 0 else tf - t0
            wts = alg.weights()
            err[aidx, tr, m] = np.sqrt(
                (((wts[:, np.newaxis] * X).sum(axis=0) - XS)**2).sum())
            csize[aidx, tr, m] = (wts > 0).sum()

np.savez_compressed('gauss_results.npz',
Ejemplo n.º 4
0
def fw_single(N, D, dist="gauss"):
    x = gendata(N, D, dist)
    xs = x.sum(axis=0)
    fw = bc.FrankWolfe(x)

    #bound tests
    prev_sqrt_bd = np.inf
    prev_exp_bd = np.inf
    for m in range(1, N + 1):
        sqrt_bd = fw.sqrt_bound(m)
        exp_bd = fw.exp_bound(m)
        assert sqrt_bd >= 0., "FW failed: sqrt bound < 0 " + str(sqrt_bd)
        assert sqrt_bd - prev_sqrt_bd < tol, "FW failed: sqrt bound is not decreasing"
        assert exp_bd >= 0., "FW failed: exp bound < 0"
        assert exp_bd - prev_exp_bd < tol, "FW failed: exp bound is not decreasing"
        prev_sqrt_bd = sqrt_bd
        prev_exp_bd = exp_bd
    assert fw.sqrt_bound(
        1e100) < tol, "FW failed: sqrt bound doesn't approach 0"
    assert fw.exp_bound(1e100) < tol, "FW failed: exp bound doesn't approach 0"

    #incremental M tests
    prev_err = np.inf
    for m in range(1, N + 1):
        fw.run(m)
        if x.shape[0] == 1:
            assert np.fabs(fw.weights() - np.array([1])) < tol or (
                np.fabs(fw.weights() - np.array([0])) < tol and (x**2).sum()
                == 0.), "FW failed: coreset not immediately optimal with N = 1"
        assert (fw.weights() > 0.).sum() <= m, "FW failed: coreset size > m"
        xw = (fw.weights()[:, np.newaxis] * x).sum(axis=0)
        assert np.sqrt(((xw - xs)**2).sum(
        )) - prev_err < tol, "FW failed: error is not monotone decreasing"
        assert np.fabs(fw.error('accurate') - np.sqrt(((xw - xs)**2).sum())
                       ) < tol, "FW failed: x(w) est is not close to true x(w)"
        assert np.fabs(
            fw.error('accurate') - fw.error()
        ) < tol * 1000, "FW failed: fw.error(accurate/fast) do not return similar results"
        assert fw.sqrt_bound() - np.sqrt(
            ((xw - xs)**2).sum()) >= -tol, "FW failed: sqrt bound invalid"
        assert fw.exp_bound() - np.sqrt(
            ((xw - xs)**2).sum()) >= -tol, "FW failed: exp bound invalid"
        if 'colinear' in dist and m >= 2:
            assert np.sqrt(
                ((xw - xs)**2).sum()
            ) < tol, "FW failed: for M>=2, coreset with colinear data not optimal"
        if 'axis' in dist:
            assert np.all(
                np.fabs(fw.weights()[fw.weights() > 0.] - float(N) / float(m))
                < tol), "FW failed: on axis-aligned data, weights are not N/M"
            assert np.fabs(
                np.sqrt(((xw - xs)**2).sum()) / np.sqrt((xs**2).sum()) -
                np.sqrt(float(N) / float(m) - 1.)
            ) < tol, "FW failed: on axis-aligned data, error is not sqrt(N/M-1)"
        prev_err = np.sqrt(((xw - xs)**2).sum())
    #save incremental M result
    w_inc = fw.weights()
    xw_inc = (fw.weights()[:, np.newaxis] * x).sum(axis=0)

    #check reset
    fw.reset()
    assert fw.M == 0 and np.all(np.fabs(fw.weights()) < tol) and np.fabs(
        fw.error() - np.sqrt((xs**2).sum())
    ) < tol and not fw.reached_numeric_limit, "FW failed: fw.reset() did not properly reset"
    #check run up to N all at once vs incremental
    fw.run(N)
    xw = (fw.weights()[:, np.newaxis] * x).sum(axis=0)
    assert np.sqrt(
        ((xw - xw_inc)**2).sum()
    ) < tol, "FW failed: incremental run up to N doesn't produce same result as one run at N"