Ejemplo n.º 1
0
def mvcm(coord_data, y, bw0='cv_ls', res_idx=0):
    """
        Local linear kernel smoothing on beta in MVCM.

        :param
            coord_data (matrix): common coordinate matrix (l*d)
            y (matrix): imaging response data (response matrix, n*l*m)
            bw0 (vector): pre-defined optimal bandwidth
            res_idx (scalar): indicator of calculating the residual matrix
        :return
            sm_y (matrix): smoothed response matrix (n*l*m)
            bw_o (matrix): optimal bandwidth (d*m)

    """

    # Set up
    n, l, m = y.shape
    d = coord_data.shape[1]
    sm_y = y * 0
    res_y = y * 0
    bw_o = np.zeros(shape=(d, m))

    if d == 1:
        var_tp = 'c'
    elif d == 2:
        var_tp = 'cc'
    else:
        var_tp = 'ccc'

    for mii in range(m):
        y_avg = np.mean(y[:, :, mii], axis=0)
        if bw0 is 'cv_ls':
            model_bw = nparam.KernelReg(endog=[y_avg], exog=[coord_data], var_type=var_tp, bw='cv_ls')
            bw_opt = model_bw.bw
            print("The optimal bandwidth for the " + str(mii+1) + "-th image measurement is")
            print(bw_opt)
            bw_o[:, mii] = bw_opt
        else:
            bw_opt = bw0[:, mii]
        for nii in range(n):
            y_ii = np.reshape(y[nii, :, mii], newshape=y_avg.shape)
            model_y = nparam.KernelReg(endog=[y_ii], exog=[coord_data], var_type=var_tp, bw=bw_opt)
            sm_y[nii, :, mii] = model_y.fit()[0]
        if res_idx == 1:
            res_y[:, :, mii] = y[:, :, mii] - sm_y[:, :, mii]
            print("The bound of the residual is ["
                  + str(np.min(res_y[:, :, mii])) + ", " + str(np.max(res_y[:, :, mii])) + "]")

    return sm_y, bw_o, res_y
def KernelReg1D():
    np.random.seed(12345)
    nobs = 500

    x = np.random.uniform(-2, 2, size=nobs)
    x.sort()
    y_true = 0.05 + np.abs(0.15 * (np.sin(x * 5) / x + 2 * x))
    R = np.random.random(size=nobs)
    y = y_true > R

    model = nparam.KernelReg(
        endog=[y],
        exog=[x],
        reg_type='lc',  # lc, ll
        var_type='c',  # c, u, o
        bw='cv_ls')  # cv_ls, aic

    sm_bw = model.bw

    sm_mean, sm_mfx = model.fit()

    fig = plt.figure('Test1D', figsize=(9, 5))
    ax = fig.add_subplot(111)
    ax.plot(x, y, '+', alpha=0.5, ms=20, color='navy')
    ax.plot(x, y_true, lw=1, label='true mean')
    ax.plot(x, sm_mean, lw=1, label='kernel mean')
    ax.tick_params(direction='out')
    ax.set(xlim=[-2, 2], ylim=[0, 1])
    ax.legend()

    plt.tight_layout()
Ejemplo n.º 3
0
def kernelFit(x, y, **kwargs):
    """Use statsmodels to do a kernel regression to the given data.
       Returns the x and fitted y."""

    # continuous ('c') variables, locally linear 'll' as vs locally const
    model = smnparam.KernelReg(y, x, var_type='c', reg_type='ll', **kwargs)
    ykernel, mfx = model.fit()
    return x, ykernel
def KernelReg2D():
    np.random.seed(12345)
    nobs = 500

    xy = np.random.uniform(0, 1, size=(nobs, 2))
    tanh_fn = lambda x, y: math.tanh(x) * math.tanh(y)
    z_true = [tanh_fn(x, y) for (x, y) in xy]

    R = np.random.random(size=nobs)
    z = z_true > R

    model = nparam.KernelReg(
        endog=[z],
        exog=[zip(*xy)],
        reg_type='ll',  # 'lc' (local constant), 'll' (local linear)
        var_type='cc',  # 'c' (continuous) -- for each variable in exog list
        bw='cv_ls')  # cv_ls, aic

    X, Y = np.mgrid[0:1:100j, 0:1:100j]
    positions = np.vstack([X.ravel(), Y.ravel()]).T
    sm_mean, sm_mfx = model.fit(positions)
    Z = np.reshape(sm_mean, X.shape)
    Z_true = np.reshape([tanh_fn(x, y) for (x, y) in positions], X.shape)

    fig = plt.figure('Test2D', figsize=(12, 5))

    color_args = dict(cmap='afmhot', vmin=0, vmax=1, alpha=0.5)

    ax = fig.add_subplot(121)
    im = ax.pcolor(X, Y, Z_true, **color_args)
    cax = ax.scatter(*zip(*xy), c=z_true, s=30, **color_args)
    ax.tick_params(direction='out')
    ax.set(xlim=[0, 1], ylim=[0, 1])
    fig.colorbar(cax)

    ax = fig.add_subplot(122)
    im = ax.pcolor(X, Y, Z, **color_args)
    cax = ax.scatter(*zip(*xy), c=z, s=20, **color_args)
    ax.tick_params(direction='out')
    ax.set(xlim=[0, 1], ylim=[0, 1])
    fig.colorbar(cax)

    plt.tight_layout()
Ejemplo n.º 5
0
def interp_scatter(x,
                   y,
                   z,
                   ranges,
                   cmap='afmhot',
                   plot=True,
                   interp=True,
                   **kwargs):

    xlim, ylim, (vmin, vmax) = ranges

    if interp:
        logger.debug('Instantiating KernelReg')
        model = nparam.KernelReg(
            [z], [x, y],
            defaults=nparam.EstimatorSettings(efficient=True),
            reg_type='ll',
            var_type='cc',
            bw='cv_ls')
        X, Y = np.mgrid[slice(xlim[0], xlim[1], 101j),
                        slice(ylim[0], ylim[1], 101j)]
        positions = np.vstack([X.ravel(), Y.ravel()]).T

        logger.debug('Fitting kernel regression model...')
        sm_mean, sm_mfx = model.fit(positions)
        Z = np.reshape(sm_mean, X.shape)

    if plot:
        color_args = dict(cmap=cmap, vmin=vmin, vmax=vmax, alpha=1)
        if interp:
            logger.debug('Plotting fitted model on mesh grid...')
            im = plt.pcolormesh(X, Y, Z, shading='gouraud', **color_args)

        kwargs.update(color_args)

        logger.debug('Plotting scatter...')
        if 's' not in kwargs:
            kwargs['s'] = 20
        plt.scatter(x, y, c=z, lw=0.5, edgecolor='darkgray', **kwargs)
        plt.gca().set(xlim=xlim, ylim=ylim)
    else:
        return X, Y, Z
    nobs = 200
    np.random.seed(1234)
    C1 = np.random.normal(size=(nobs, ))
    C2 = np.random.normal(2, 1, size=(nobs, ))
    noise = np.random.normal(size=(nobs, ))
    Y = 0.3 + 1.2 * C1 - 0.9 * C2 + noise
    #self.write2file('RegData.csv', (Y, C1, C2))

    #CODE TO PRODUCE BANDWIDTH ESTIMATION IN R
    #library(np)
    #data <- read.csv('RegData.csv', header=FALSE)
    #bw <- npregbw(formula=data$V1 ~ data$V2 + data$V3,
    #                bwmethod='cv.aic', regtype='lc')
    model = nparam.KernelReg(endog=[Y],
                             exog=[C1, C2],
                             reg_type='lc',
                             var_type='cc',
                             bw='aic')
    mean, marg = model.fit()
    #R_bw = [0.4017893, 0.4943397]  # Bandwidth obtained in R
    bw_expected = [0.3987821, 0.50933458]
    #npt.assert_allclose(model.bw, bw_expected, rtol=1e-3)
    print('bw')
    print(model.bw)
    print(bw_expected)

    print('\nsig_test - default')
    print(model.sig_test([1], nboot=100))
    t1 = time.time()
    res0 = smkr.TestRegCoefC(model, [1])
    print('pvalue')
Ejemplo n.º 7
0
        censor_val=c_val[:, None]
        #defaults=nparam.EstimatorSettings(efficient=True)
    )

    sm_bw = model.bw

    sm_mean, sm_mfx = model.fit()

    #    model1 = nparam.KernelReg(endog=[y],
    #                             exog=[x], reg_type='lc',
    #                             var_type='c', bw='cv_ls')
    #    mean1, mfx1 = model1.fit()

    model2 = nparam.KernelReg(endog=[y_cens],
                              exog=[x, x2],
                              reg_type='ll',
                              var_type='cc',
                              bw='aic')  #, 'cv_ls'

    mean2, mfx2 = model2.fit()

    print(model.bw)
    #print model1.bw
    print(model2.bw)

    ix = np.argsort(y_cens)
    ix_rev = np.zeros(nobs, int)
    ix_rev[ix] = np.arange(nobs)
    ix_rev = model.sortix_rev

    import matplotlib.pyplot as plt
Ejemplo n.º 8
0
import statsmodels.nonparametric.api as nparam

if __name__ == '__main__':

    np.random.seed(500)
    nobs = [250, 1000][0]
    sig_fac = 1
    x = np.random.uniform(-2, 2, size=nobs)
    x.sort()
    y_true = np.sin(x * 5) / x + 2 * x
    y = y_true + sig_fac * (np.sqrt(
        np.abs(3 + x))) * np.random.normal(size=nobs)

    model = nparam.KernelReg(endog=[y],
                             exog=[x],
                             reg_type='lc',
                             var_type='c',
                             bw='cv_ls',
                             defaults=nparam.EstimatorSettings(efficient=True))

    sm_bw = model.bw

    sm_mean, sm_mfx = model.fit()

    model1 = nparam.KernelReg(endog=[y],
                              exog=[x],
                              reg_type='lc',
                              var_type='c',
                              bw='cv_ls')
    mean1, mfx1 = model1.fit()

    model2 = nparam.KernelReg(endog=[y],
         4.672, 3.883, 3.065, 3.489, 3.635, 5.443, 6.302,
         9.054, 12.485, 9.896, 8.33, 6.161, 7.055, 8.717,
         6.95]

italy_year = \
        [1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951,
       1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952,
       1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952,
       1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953,
       1953, 1953, 1953, 1953, 1953, 1953]

italy_year = np.asarray(italy_year, float)

model = nparam.KernelReg(endog=[italy_gdp],
                         exog=[italy_year],
                         reg_type='lc',
                         var_type='o',
                         bw='cv_ls')

sm_bw = model.bw
R_bw = 0.1390096

sm_mean, sm_mfx = model.fit()
sm_mean2 = sm_mean[0:5]
sm_mfx = sm_mfx[0:5]
R_mean = 6.190486

sm_R2 = model.r_squared()
R_R2 = 0.1435323

npt.assert_allclose(sm_bw, R_bw, atol=1e-2)