Example #1
0
def test_emd2_multi():

    from ot.datasets import get_1D_gauss as gauss

    n = 1000  # nb bins

    # bin positions
    x = np.arange(n, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=20, s=5)  # m= mean, s= std

    ls = np.arange(20, 1000, 20)
    nb = len(ls)
    b = np.zeros((n, nb))
    for i in range(nb):
        b[:, i] = gauss(n, m=ls[i], s=10)

    # loss matrix
    M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
    # M/=M.max()

    print('Computing {} EMD '.format(nb))

    # emd loss 1 proc
    ot.tic()
    emd1 = ot.emd2(a, b, M, 1)
    ot.toc('1 proc : {} s')

    # emd loss multipro proc
    ot.tic()
    emdn = ot.emd2(a, b, M)
    ot.toc('multi proc : {} s')

    np.testing.assert_allclose(emd1, emdn)
Example #2
0
def test_tic_toc():

    import time

    ot.tic()
    time.sleep(0.5)
    t = ot.toc()
    t2 = ot.toq()

    # test timing
    np.testing.assert_allclose(0.5, t, rtol=1e-2, atol=1e-2)

    # test toc vs toq
    np.testing.assert_allclose(t, t2, rtol=1e-2, atol=1e-2)
Example #3
0
def test_tic_toc():

    import time

    ot.tic()
    time.sleep(0.5)
    t = ot.toc()
    t2 = ot.toq()

    # test timing
    np.testing.assert_allclose(0.5, t, rtol=1e-1, atol=1e-1)

    # test toc vs toq
    np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1)
Example #4
0
def test_tic_toc():

    import time

    ot.tic()
    time.sleep(0.1)
    t = ot.toc()
    t2 = ot.toq()

    # test timing
    # np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1)
    # very slow macos github action equality not possible
    assert t > 0.09

    # test toc vs toq
    np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1)
Example #5
0
def test_dual_variables():
    n = 500  # nb bins
    m = 600  # nb bins

    mean1 = 300
    mean2 = 400

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1)))**(1. / 2)

    print('Computing {} EMD '.format(1))

    # emd loss 1 proc
    ot.tic()
    G, log = ot.emd(a, b, M, log=True)
    ot.toc('1 proc : {} s')

    ot.tic()
    G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
    ot.toc('1 proc : {} s')

    cost1 = (G * M).sum()
    # Check symmetry
    np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
    # Check with closed-form solution for gaussians
    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))

    # Check that both cost computations are equivalent
    np.testing.assert_almost_equal(cost1, log['cost'])
    check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])

    constraint_violation = log['u'][:, None] + log['v'][None, :] - M

    assert constraint_violation.max() < 1e-8
Example #6
0
def test_emd2_multi():
    n = 500  # nb bins

    # bin positions
    x = np.arange(n, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=20, s=5)  # m= mean, s= std

    ls = np.arange(20, 500, 20)
    nb = len(ls)
    b = np.zeros((n, nb))
    for i in range(nb):
        b[:, i] = gauss(n, m=ls[i], s=10)

    # loss matrix
    M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
    # M/=M.max()

    print('Computing {} EMD '.format(nb))

    # emd loss 1 proc
    ot.tic()
    emd1 = ot.emd2(a, b, M, 1)
    ot.toc('1 proc : {} s')

    # emd loss multipro proc
    ot.tic()
    emdn = ot.emd2(a, b, M)
    ot.toc('multi proc : {} s')

    ot.tic()
    emdn2 = ot.emd2(a, b, M, dense=False)
    ot.toc('multi proc : {} s')

    np.testing.assert_allclose(emd1, emdn)
    np.testing.assert_allclose(emd1, emdn2, rtol=1e-6)

    # emd loss multipro proc with log
    ot.tic()
    emdn = ot.emd2(a, b, M, log=True, return_matrix=True)
    ot.toc('multi proc : {} s')

    for i in range(len(emdn)):
        emd = emdn[i]
        log = emd[1]
        cost = emd[0]
        check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost)
        emdn[i] = cost

    emdn = np.array(emdn)
    np.testing.assert_allclose(emd1, emdn)
Example #7
0
def test_dual_variables():
    n = 5000  # nb bins
    m = 6000  # nb bins

    mean1 = 1000
    mean2 = 1100

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)

    print('Computing {} EMD '.format(1))

    # emd loss 1 proc
    ot.tic()
    G, log = ot.emd(a, b, M, log=True)
    ot.toc('1 proc : {} s')

    ot.tic()
    G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
    ot.toc('1 proc : {} s')

    cost1 = (G * M).sum()
    # Check symmetry
    np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
    # Check with closed-form solution for gaussians
    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))

    # Check that both cost computations are equivalent
    np.testing.assert_almost_equal(cost1, log['cost'])
    check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
Example #8
0
def test_emd2_multi():
    n = 1000  # nb bins

    # bin positions
    x = np.arange(n, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=20, s=5)  # m= mean, s= std

    ls = np.arange(20, 1000, 20)
    nb = len(ls)
    b = np.zeros((n, nb))
    for i in range(nb):
        b[:, i] = gauss(n, m=ls[i], s=10)

    # loss matrix
    M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
    # M/=M.max()

    print('Computing {} EMD '.format(nb))

    # emd loss 1 proc
    ot.tic()
    emd1 = ot.emd2(a, b, M, 1)
    ot.toc('1 proc : {} s')

    # emd loss multipro proc
    ot.tic()
    emdn = ot.emd2(a, b, M)
    ot.toc('multi proc : {} s')

    np.testing.assert_allclose(emd1, emdn)

    # emd loss multipro proc with log
    ot.tic()
    emdn = ot.emd2(a, b, M, log=True, return_matrix=True)
    ot.toc('multi proc : {} s')

    for i in range(len(emdn)):
        emd = emdn[i]
        log = emd[1]
        cost = emd[0]
        check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost)
        emdn[i] = cost

    emdn = np.array(emdn)
    np.testing.assert_allclose(emd1, emdn)
def get_MSE(dataset_name, emd, repo):
    _, _, test=get_data(dataset_name, repo)
    xtest1, xtest2, ytest = test
    ot.tic()
    ytest_pred=emd.predict([xtest1,xtest2])
    t_est=ot.toc()
    err=np.mean(np.square(ytest_pred.ravel()-ytest.ravel()))
    errr=np.mean(np.square(ytest_pred.ravel()-ytest.ravel()))/np.mean(np.square(ytest.ravel()))
    r=np.corrcoef(ytest.ravel(),ytest_pred.ravel())[0,1]
    # compute quantiles
    nbin=30
    yp_mean=np.zeros((nbin,))
    yp_10=np.zeros((nbin,))
    yp_90=np.zeros((nbin,))
    yp_plot=np.zeros((nbin,))

    hst,bins=np.histogram(ytest[:],nbin)
    yp_plot[:]=np.array([.5*bins[k]+.5*bins[k+1] for k in range(nbin)])
    for j in range(nbin):
        idx=np.where((ytest[:]>bins[j]) * (ytest[:]<bins[j+1]) )
        ytemp=ytest_pred[idx]
        if ytemp.any():
            yp_mean[j]=ytemp.mean()
            yp_10[j]=np.percentile(ytemp,10)
            yp_90[j]=np.percentile(ytemp,90)
        else:
            yp_mean[j]=np.nan
            yp_10[j]=np.nan
            yp_90[j]=np.nan
    print('MSE={}\nRel MSE={}\nr={}\nEMD/s={}'.format(err,errr,r,ytest_pred.shape[0]/t_est))
    
    pl.figure(1,(8,3))
    pl.clf()
    pl.plot([0,45],[0,45],'k')
    xl=pl.axis()
    pl.plot(ytest,ytest_pred,'+')
    pl.plot([0,45],[0,45],'k')
    pl.axis(xl)

    pl.xlim([0,45])
    pl.ylim([0,45])
    pl.xlabel('True Wass. distance')
    pl.ylabel('Predicted Wass. distance')
    pl.title('True and predicted Wass. distance')
    pl.legend(('Exact prediction','Model prediction'))
    pl.savefig('imgs/{}_emd_pred_true.png'.format(dataset_name),dpi=300)
    pl.savefig('imgs/{}_emd_pred_true.pdf'.format(dataset_name))

    pl.subplot(1,2,2)
    pl.plot([ytest[:].min() ,ytest[:].max() ],[ytest[:].min() ,ytest[:].max() ],'k')
    pl.plot(yp_plot[:],yp_mean[:],'r+-')
    pl.plot(yp_plot[:],yp_10[:],'g+-')
    pl.plot(yp_plot[:],yp_90[:],'b+-')
    pl.xlim([0,45])
    pl.ylim([0,45])
    pl.legend(('Exact prediction','Mean pred','10th percentile','90th precentile',))
    pl.title('{} MSE:{:3.2f}, RelMSE:{:3.3f}, Corr:{:3.3f}'.format('',err,errr,r))
    pl.grid()
    pl.xlabel('True Wass. distance')
    pl.ylabel('Predicted Wass. distance')
    pl.savefig('imgs/{}_emd_pred_true_quantile.png'.format(dataset_name),dpi=300)

    pl.savefig('imgs/{}_perf.png'.format(dataset_name),dpi=300,bbox_inches='tight')
    pl.savefig('imgs/{}_perf.pdf'.format(dataset_name),dpi=300,bbox_inches='tight')
Example #10
0
def get_MSE(dataset_name, emd, repo):
    _, _, test = get_data(dataset_name, repo)
    xtest1, xtest2, ytest = test
    ot.tic()
    ytest_pred = emd.predict([xtest1, xtest2])
    t_est = ot.toc()
    err = np.mean(np.square(ytest_pred.ravel() - ytest.ravel()))
    errr = np.mean(np.square(ytest_pred.ravel() - ytest.ravel())) / np.mean(
        np.square(ytest.ravel()))
    r = np.corrcoef(ytest.ravel(), ytest_pred.ravel())[0, 1]
    # compute quantiles
    nbin = 30
    yp_mean = np.zeros((nbin, ))
    yp_10 = np.zeros((nbin, ))
    yp_90 = np.zeros((nbin, ))
    yp_plot = np.zeros((nbin, ))

    hst, bins = np.histogram(ytest[:], nbin)
    yp_plot[:] = np.array(
        [.5 * bins[k] + .5 * bins[k + 1] for k in range(nbin)])
    for j in range(nbin):
        idx = np.where((ytest[:] > bins[j]) * (ytest[:] < bins[j + 1]))
        ytemp = ytest_pred[idx]
        if ytemp.any():
            yp_mean[j] = ytemp.mean()
            yp_10[j] = np.percentile(ytemp, 10)
            yp_90[j] = np.percentile(ytemp, 90)
        else:
            yp_mean[j] = np.nan
            yp_10[j] = np.nan
            yp_90[j] = np.nan
    print('MSE={}\nRel MSE={}\nr={}\nEMD/s={}'.format(
        err, errr, r, ytest_pred.shape[0] / t_est))

    if not os.path.exists('imgs'):
        os.makedirs('imgs')

    pl.figure(1, (8, 3))
    pl.clf()
    pl.plot([0, 45], [0, 45], 'k')
    xl = pl.axis()
    pl.plot(ytest, ytest_pred, '+')
    pl.plot([0, 45], [0, 45], 'k')
    pl.axis(xl)

    pl.xlim([0, 45])
    pl.ylim([0, 45])
    pl.xlabel('True Wass. distance')
    pl.ylabel('Predicted Wass. distance')
    pl.title('True and predicted Wass. distance')
    pl.legend(('Exact prediction', 'Model prediction'))
    pl.savefig('imgs/{}_emd_pred_true.png'.format(dataset_name), dpi=300)
    pl.savefig('imgs/{}_emd_pred_true.pdf'.format(dataset_name))

    pl.subplot(1, 2, 2)
    pl.plot([ytest[:].min(), ytest[:].max()], [ytest[:].min(), ytest[:].max()],
            'k')
    pl.plot(yp_plot[:], yp_mean[:], 'r+-')
    pl.plot(yp_plot[:], yp_10[:], 'g+-')
    pl.plot(yp_plot[:], yp_90[:], 'b+-')
    pl.xlim([0, 45])
    pl.ylim([0, 45])
    pl.legend((
        'Exact prediction',
        'Mean pred',
        '10th percentile',
        '90th precentile',
    ))
    pl.title('{} MSE:{:3.2f}, RelMSE:{:3.3f}, Corr:{:3.3f}'.format(
        '', err, errr, r))
    pl.grid()
    pl.xlabel('True Wass. distance')
    pl.ylabel('Predicted Wass. distance')
    pl.savefig('imgs/{}_emd_pred_true_quantile.png'.format(dataset_name),
               dpi=300)

    pl.savefig('imgs/{}_perf.png'.format(dataset_name),
               dpi=300,
               bbox_inches='tight')
    pl.savefig('imgs/{}_perf.pdf'.format(dataset_name),
               dpi=300,
               bbox_inches='tight')
Example #11
0
for i in range(n_distributions):
    pl.plot(x, A[:, i])
pl.title('Distributions')
pl.tight_layout()

#%% barycenter computation

alpha = 0.5  # 0<=alpha<=1
weights = np.array([1 - alpha, alpha])

# l2bary
bary_l2 = A.dot(weights)

# wasserstein
reg = 1e-3
ot.tic()
bary_wass = ot.bregman.barycenter(A, M, reg, weights)
ot.toc()

ot.tic()
bary_wass2 = ot.lp.barycenter(A,
                              M,
                              weights,
                              solver='interior-point',
                              verbose=True)
ot.toc()

pl.figure(2)
pl.clf()
pl.subplot(2, 1, 1)
for i in range(n_distributions):