def test_normalize(): np.random.seed(123) npar = 2 k = 3 x = np.linspace(-k*np.pi, k*np.pi, 21) vnm = mixvn.VonMisesMixture(x) for k_dist in range(1, 5): nparams = npar * k_dist + k_dist - 1 for _ in range(10): params = np.random.uniform(-5, 5, size=nparams) params[npar * k_dist:] /= 10 pdf1 = vnm.pdf_mix(params) params2 = mixvn.normalize_params(params) #most params are changed #print np.max(np.abs(params - params2)) > 0.001, pdf2 = vnm.pdf_mix(params2) assert_almost_equal(pdf2, pdf1, decimal=14)
Created on Fri Oct 26 21:57:48 2012 Author: Josef Perktold """ import numpy as np from scipy import stats, interpolate, integrate from dist_mixtures.mixture_von_mises import (VonMisesMixture, normalize_params, shift_loc) from numpy.testing import assert_almost_equal, assert_ p = [-4, -2.5*np.pi, -4, 1*np.pi, 5, 3.5*np.pi, 0, 0] p_transformed = [4, 0.5*np.pi, 4, 0, 5, -0.5*np.pi, 0, 0] assert_almost_equal(normalize_params(p), p_transformed, 13) loc1 = shift_loc(np.linspace(-5, 5, 21) * np.pi)[:-1] loc2 = np.tile(np.linspace(-1, 0.5, 4) * np.pi, 5) assert_almost_equal(loc1, loc2, 13) res2_params = np.array([ 1.90886275, -2.99882496, 0.38442792, -0.86952549, 0.33013396]) res3_params = np.array([ 0.27505697, -1.27358384, 2.38037407, -2.90256257, -4.29683974, -0.48736552, 2.08776124, 1.9212298 ]) res4_params = np.array([ 1.56354196, -2.95987588, -1.19876203, -2.94677148, -4.76746645, 1.87938641, 9.89761038, -0.49076411, 3.97260961, 2.50826187, 1.29319052])
alpha=0.7, label='data') plt.plot(data[:, 0], res.model.pdf_mix(res.params, data[:, 0]) * rad_diff, color='r', lw=2, alpha=0.7, label='estimated') plt.title('Length distribution - data and estimate') plt.legend() count_endog = data_raw[:, 1] / 100. bins_exog = np.linspace(-np.pi, np.pi, 180 + 1) modb = mixvn.VonMisesMixtureBinned(count_endog, bins_exog) resb = modb.fit(start_params=res.params, method='bfgs') resb.params = mixvn.normalize_params(resb.params) resb2 = modb.fit(start_params=start_params, method='bfgs') resb2.params = mixvn.normalize_params(resb2.params) print 'res.params ', res.params print 'resb.params ', resb.params print 'resb2.params', resb2.params #TODO: need to standardize sequence of components in params print 'gof chisquare', resb.model.gof_chisquare(resb.params) #LS is more sensitive to start_params ? resbls = modb.fit_ls(start_params=res.params) resbls_params = mixvn.normalize_params(resbls[0]) print 'resbls params ', resbls_params plt.figure() plt.plot(data[:, 0],
def test_vonmisesmixture(): #np.random.seed(987789) #TODO: add seed later, random failure without #values of initialization not used mod2 = mixvn.VonMisesMixture(np.random.uniform(-np.pi, np.pi, size=10)) params = [2., -0.75 * np.pi, 4., np.pi/2, 0.4] nobs = 50000 rvs = mod2.rvs_mix(params, size=nobs, shuffle=True) assert_equal(len(rvs), nobs) #check withing bounds above = (rvs > np.pi).sum() below = (rvs < -np.pi).sum() assert_equal(above, 0) assert_equal(below, 0) #gof tests bins = 180 bins = np.linspace(-np.pi, np.pi, bins+1) #count, bins_ = np.histogram(rvs, bins=bins, normed=True) #freq = count * np.diff(bins_) count, bins_ = np.histogram(rvs, bins=bins) freq = count / count.sum() assert_equal(count.sum(), len(rvs)) ks = stats.kstest(rvs, lambda x: mod2.cdf_mix(params, x)) assert_array_less(0.1, ks[1]) c1 = mod2.cdf_mix(params, bins) p0 = np.diff(c1) chi2 = stats.chisquare(count, p0 * nobs) assert_array_less(0.1, chi2[1]) mse = ((freq - p0)**2).mean() assert_array_less(mse, 1e-4) #bin_center = bins[:-1] + np.diff(bins) / 2 #more pdf, cdf checks p2 = mod2.pdf_mix(params, bins) from scipy import integrate c2 = integrate.cumtrapz(p2, dx=bins[1]-bins[0]) assert_almost_equal(c2, c1[1:], decimal=4) #approximation error #check wrapping to [-np.pi, np.pi] #TODO: open interval ? assert_almost_equal(mod2.cdf_mix(params, -np.pi), 0, decimal=13) assert_almost_equal(mod2.cdf_mix(params, np.pi), 1, decimal=13) c3 = mod2.cdf_mix(params, bins + 2 * np.pi) assert_almost_equal(c3 - 1, c1, decimal=13) c3 = mod2.cdf_mix(params, bins - 2 * np.pi) assert_almost_equal(c3 + 1, c1, decimal=13) p1 = mod2.pdf_mix(params, bins) p3 = mod2.pdf_mix(params, bins + 2 * np.pi) assert_almost_equal(p3, p1, decimal=13) p3 = mod2.pdf_mix(params, bins - 2 * np.pi) assert_almost_equal(p3, p1, decimal=13) #check standalone functions p4 = mod2.pdf_mix(params, bins * 4) pf1 = mixvn.pdf_mix(params, bins * 4) assert_almost_equal(pf1, p4, decimal=13) pvn = mixvn.pdf_vn(bins * 4, params[0], params[1]) psp = stats.vonmises.pdf(bins * 4, params[0], params[1]) assert_almost_equal(pvn, psp, decimal=13) #periodicity of cdf with trend, origin not fixed cvn = mixvn.cdf_vn(bins, params[0], params[1]) cvn3 = mixvn.cdf_vn(bins + 2 * np.pi, params[0], params[1]) assert_almost_equal(cvn3 - 1, cvn, decimal=13) cvn3 = mixvn.cdf_vn(bins - 2 * np.pi, params[0], params[1]) assert_almost_equal(cvn3 + 1, cvn, decimal=13) assert_almost_equal(pvn, psp, decimal=13) #test fit mod3 = mixvn.VonMisesMixture(rvs[:2000]) #good starting values res3 = mod3.fit(start_params=np.array(params)*1.1) res3.params = mixvn.normalize_params(res3.params) assert_almost_equal(res3.params, params, decimal=1) #simple starting values, refit same model instance res3 = mod3.fit(start_params=0.5*np.ones(len(params))) res3.params = mixvn.normalize_params(res3.params) assert_almost_equal(res3.params, params, decimal=1) ##fit is not much better with full sample (but slower) #mod4 = mixvn.VonMisesMixture(rvs) #res4 = mod4.fit(start_params=0.5*np.ones(len(params))) #res4.params = mixvn.normalize_params(res4.params) #assert_almost_equal(res4.params, params, decimal=1) #fit with binned data, full sample, 180 bins on (-pi, pi) mod5 = mixvn.VonMisesMixtureBinned(count, bins) #good starting values res5 = mod5.fit(start_params=np.array(params)*1.1) res5.params = mixvn.normalize_params(res5.params) assert_almost_equal(res5.params, params, decimal=1) assert_almost_equal(res5.params, res3.params, decimal=1) #LSfit with binned data, full sample, 180 bins on (-pi, pi) #mod5 = mixvn.VonMisesMixtureBinned(count.astype(float), bins) #good starting values res6 = mod5.fit_ls(start_params=np.array(params)*1.1) res6_params = mixvn.normalize_params(res6[0]) assert_almost_equal(res6_params, params, decimal=1) assert_almost_equal(res6_params, res5.params, decimal=2) #simple starting values start_params=0.5*np.ones(len(params)) start_params[-1] = 0.05 #TODO: optimize.leastsq can fail with maxfev, scipy cdf with ZeroDivision start_params = [-2.5, 4, 0.1, 4, 0.1] res6a = mod5.fit_ls(start_params=start_params) res6a_params = mixvn.normalize_params(res6a[0]) assert_almost_equal(res6a_params, params, decimal=1) assert_almost_equal(res6_params, res6_params, decimal=4)
Created on Fri Oct 26 21:57:48 2012 Author: Josef Perktold """ import numpy as np from scipy import stats, interpolate, integrate from dist_mixtures.mixture_von_mises import (VonMisesMixture, normalize_params, shift_loc) from numpy.testing import assert_almost_equal, assert_ p = [-4, -2.5 * np.pi, -4, 1 * np.pi, 5, 3.5 * np.pi, 0, 0] p_transformed = [4, 0.5 * np.pi, 4, 0, 5, -0.5 * np.pi, 0, 0] assert_almost_equal(normalize_params(p), p_transformed, 13) loc1 = shift_loc(np.linspace(-5, 5, 21) * np.pi)[:-1] loc2 = np.tile(np.linspace(-1, 0.5, 4) * np.pi, 5) assert_almost_equal(loc1, loc2, 13) res2_params = np.array( [1.90886275, -2.99882496, 0.38442792, -0.86952549, 0.33013396]) res3_params = np.array([ 0.27505697, -1.27358384, 2.38037407, -2.90256257, -4.29683974, -0.48736552, 2.08776124, 1.9212298 ]) res4_params = np.array([ 1.56354196, -2.95987588, -1.19876203, -2.94677148, -4.76746645, 1.87938641,