Beispiel #1
0
from scipy import stats
import numpy as np
from statsmodels.sandbox.distributions.mixture_rvs import mixture_rvs
from statsmodels.nonparametric.kde import (kdensity, kdensityfft)
import matplotlib.pyplot as plt

np.random.seed(12345)
obs_dist = mixture_rvs([.25, .75],
                       size=10000,
                       dist=[stats.norm, stats.norm],
                       kwargs=(dict(loc=-1, scale=.5), dict(loc=1, scale=.5)))
#.. obs_dist = mixture_rvs([.25,.75], size=10000, dist=[stats.norm, stats.beta],
#..            kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=1,args=(1,.5))))

f_hat, grid, bw = kdensityfft(obs_dist, kernel="gauss", bw="scott")

# Check the plot

plt.figure()
plt.hist(obs_dist, bins=50, normed=True, color='red')
plt.plot(grid, f_hat, lw=2, color='black')
plt.show()

# do some timings
# get bw first because they're not streamlined
from statsmodels.nonparametric import bandwidths
bw = bandwidths.bw_scott(obs_dist)

#.. timeit kdensity(obs_dist, kernel="gauss", bw=bw, gridsize=2**10)
#.. timeit kdensityfft(obs_dist, kernel="gauss", bw=bw, gridsize=2**10)
Beispiel #2
0
if __name__ == '__main__':

    examples = ['chebyt', 'fourier', 'hermite']#[2]

    nobs = 10000

    import matplotlib.pyplot as plt
    from statsmodels.sandbox.distributions.mixture_rvs import (
                                                mixture_rvs, MixtureDistribution)

    #np.random.seed(12345)
##    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
##                   kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.75)))
    mix_kwds = (dict(loc=-0.5,scale=.5),dict(loc=1,scale=.2))
    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
                   kwargs=mix_kwds)
    mix = MixtureDistribution()

    #obs_dist = np.random.randn(nobs)/4. #np.sqrt(2)


    if "chebyt_" in examples: # needed for Cheby example below
        #obs_dist = np.clip(obs_dist, -2, 2)/2.01
        #chebyt [0,1]
        obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/2.0 #/4. + 2/4.0
        #fourier [0,1]
        #obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/4. + 2/4.0
        f_hat, grid, coeffs, polys = density_orthopoly(obs_dist, ChebyTPoly, order=20, xeval=None)
        #f_hat /= f_hat.sum() * (grid.max() - grid.min())/len(grid)
        f_hat0 = f_hat
        from scipy import integrate
Beispiel #3
0
import numpy as np
from statsmodels.sandbox.distributions.mixture_rvs import mixture_rvs
from statsmodels.nonparametric.kde import KDE
from scipy import stats

# get results from Stata

curdir = os.path.dirname(os.path.abspath(__file__))
rfname = os.path.join(curdir,'results','results_kde.csv')
#print rfname
KDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)

# setup test data

np.random.seed(12345)
Xi = mixture_rvs([.25,.75], size=200, dist=[stats.norm, stats.norm],
                kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.5)))

class CheckKDE(object):
    decimal_density = 7
    def test_density(self):
        npt.assert_almost_equal(self.res1.density, self.res_density,
                self.decimal_density)

class TestKDEGauss(CheckKDE):
    @classmethod
    def setupClass(cls):
        res1 = KDE(Xi)
        res1.fit(kernel="gau", fft=False, bw="silverman")
        cls.res1 = res1
        cls.res_density = KDEResults["gau_d"]
if __name__ == '__main__':

    examples = ['chebyt', 'fourier', 'hermite']#[2]

    nobs = 10000

    import matplotlib.pyplot as plt
    from statsmodels.sandbox.distributions.mixture_rvs import (
                                                mixture_rvs, MixtureDistribution)

    #np.random.seed(12345)
##    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
##                   kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.75)))
    mix_kwds = (dict(loc=-0.5,scale=.5),dict(loc=1,scale=.2))
    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
                   kwargs=mix_kwds)
    mix = MixtureDistribution()

    #obs_dist = np.random.randn(nobs)/4. #np.sqrt(2)


    if "chebyt_" in examples: # needed for Cheby example below
        #obs_dist = np.clip(obs_dist, -2, 2)/2.01
        #chebyt [0,1]
        obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/2.0 #/4. + 2/4.0
        #fourier [0,1]
        #obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/4. + 2/4.0
        f_hat, grid, coeffs, polys = density_orthopoly(obs_dist, ChebyTPoly, order=20, xeval=None)
        #f_hat /= f_hat.sum() * (grid.max() - grid.min())/len(grid)
        f_hat0 = f_hat
        from scipy import integrate