def GridDetectibilites(period, amps, detectability, newAmps):
    """Put the detectability info onto a regular grid.

    Args:
        period: Periods for each data point.
        amps: Semi-amplitudes for each datapoint.
        detectability: Detected fraction for each datapoint.
    Return:
        2-D numpy array of detectability gridded onto period by newAmps.

    """
    periods = list(set(period))
    periods.sort()
    grid = np.zeros((len(newAmps), len(periods)))
    for i, p in enumerate(periods):
        # Select data for just this period and add endpoints
        ind = np.where(period == p)
        ampsThisP = np.hstack((0, amps[ind], 100))
        detectThisP = np.hstack((0, detectability[ind], 1))
        # Fix the shapes of arrays so they can be used in fitter
        ampsThisP.shape = (len(ampsThisP), 1)
        detectThisP.shape = (len(detectThisP),)
        newAmps.shape = (len(newAmps), 1)
        # Fit with Gaussian Kernel Regression
        model = NadarayaWatson('gaussian', h=1)
        model.fit(ampsThisP, detectThisP)
        gridDetectability = model.predict(newAmps)
        #Make sure everything is between limits
        gridDetectability[np.where(gridDetectability < 1e-3)] = 0
        gridDetectability[np.where(gridDetectability > 0.999)] = 1
        grid[:, i] = gridDetectability
    return grid
예제 #2
0
def test_NW_simple():
    X = np.arange(11.)
    y = X + 1
    dy = 1

    # by symmetry, NW regression should get these exactly correct
    Xfit = np.array([4, 5, 6])[:, None]
    y_true = np.ravel(Xfit + 1)

    clf = NadarayaWatson(h=0.5).fit(X[:, None], y, dy)
    y_fit = clf.predict(Xfit)

    assert_allclose(y_fit, y_true)
예제 #3
0
def test_NW_simple():
    X = np.arange(11.)
    y = X + 1
    dy = 1

    # by symmetry, NW regression should get these exactly correct
    Xfit = np.array([4, 5, 6])[:, None]
    y_true = np.ravel(Xfit + 1)

    clf = NadarayaWatson(h=0.5).fit(X[:, None], y, dy)
    y_fit = clf.predict(Xfit)

    assert_allclose(y_fit, y_true)
예제 #4
0
def test_NW_simple_laplacian_kernel():
    X = np.arange(11.)
    y = X + 1
    dy = 1

    # by symmetry, NW regression should get these exactly correct
    Xfit = np.array([4, 5, 6])[:, None]
    y_true = np.ravel(Xfit + 1)

    kwargs = {'gamma': 10.}
    clf = NadarayaWatson(kernel='laplacian', **kwargs).fit(X[:, None], y, dy)
    y_fit = clf.predict(Xfit)

    assert_allclose(y_fit, y_true)
예제 #5
0
def test_X_invalid_shape_exception():
    X = np.arange(11.)
    y = X + 1
    dy = 1

    clf = NadarayaWatson(h=0.5).fit(X[:, None], y, dy)

    # not valid Xfit.shape[1], should raise an exception
    Xfit = np.array([[4, 5, 6], [1, 2, 3]])
    y_true = np.ravel(Xfit + 1)

    with pytest.raises(Exception) as e:
        y_fit = clf.predict(Xfit)

    assert str(e.value) == "dimensions of X do not match training dimension"

    # not valid Xfit.shape[1], should raise an exception
    Xfit = np.array([4, 5, 6])
    y_true = np.ravel(Xfit + 1)

    with pytest.raises(Exception) as e:
        y_fit = clf.predict(Xfit)

    assert str(e.value) == "X must be two-dimensional"
예제 #6
0
cosmo = Cosmology()
z = np.linspace(0.01, 2, 1000)
mu_true = np.asarray(map(cosmo.mu, z))

#------------------------------------------------------------
# Define our classifiers
basis_mu = np.linspace(0, 2, 15)[:, None]
basis_sigma = 3 * (basis_mu[1] - basis_mu[0])

subplots = [221, 222, 223, 224]
classifiers = [
    LinearRegression(),
    PolynomialRegression(4),
    BasisFunctionRegression('gaussian', mu=basis_mu, sigma=basis_sigma),
    NadarayaWatson('gaussian', h=0.1)
]
text = [
    'Straight-line Regression', '4th degree Polynomial\n Regression',
    'Gaussian Basis Function\n Regression', 'Gaussian Kernel\n Regression'
]

# number of constraints of the model.  Because
# Nadaraya-watson is just a weighted mean, it has only one constraint
n_constraints = [2, 5, len(basis_mu) + 1, 1]

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(8, 8))
fig.subplots_adjust(left=0.1,
                    right=0.95,
예제 #7
0
def fit_NadarayaWatson(features_train, labels_train, features_pred, kernel='gaussian', alpha=0.05):
	model = NadarayaWatson(kernel, alpha)
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	return labels_pred
예제 #8
0
    y_tar_train = y_tar_train[idx]
    print 'Ending with %i elements' % (x_vec_train.shape[0])
    print '___________________________________________'

print '#### Final shapes of tables'
print x_vec_train.shape, y_tar_train.shape
'''

print '#### Runing Kernel Regressor'
hs = np.arange(0.001, 0.1, 0.005)
#mse_test = []
#mse_train = []
mean_train, mean_test, std_train, std_test = [], [], [], []
for h in hs:
    print h
    NW_model = NadarayaWatson("gaussian", h = h)
    print 'Fitting'
    #NW_model.fit(x_vec_train, y_tar_train)
    scores_train, scores_test = [], []
    print 'Predicting and doing crossvalidation'
    #y_pre_train = NW_model.predict(x_vec_train[1000:2000])
    #mse_train.append(((y_tar_train[1000:2000] - y_pre_train)**2).sum()/len(y_pre_train))
    ss_train = cross_validation.ShuffleSplit(len(y_tar), n_iter=5, test_size=1./3.)
    for train_inx, test_idx in ss_train:
        NW_model.fit(x_vec[train_inx], y_tar[train_inx])

        y_pre_train = NW_model.predict(x_vec[train_inx])
        scores_train.append(((y_tar[train_inx] - y_pre_train)**2).sum()/len(y_pre_train))

        y_pre_test = NW_model.predict(x_vec[test_idx])
        scores_test.append(((y_tar[test_idx] - y_pre_test)**2).sum()/len(y_pre_test))
예제 #9
0
파일: fig8_2_mgs.py 프로젝트: gully/SSC383D
fig = plt.figure(figsize=(9, 5))

fig.subplots_adjust(left=0.2, right=0.95,
                bottom=0.15, top=0.95,
                hspace=0.1, wspace=0.3)
ax = fig.add_subplot(121)
ax2= fig.add_subplot(122)

for i in range(0, NN,1):

    #Sub space for cross validation... 
    #50/100 points for training set, 50/100 for validation
    subs=50 

    # fit the data
    clf = NadarayaWatson('gaussian', h=h_arr[i])
    clf.fit(z_sample[0:subs:, None], mu_sample[0:subs], dmu[0:subs])
    
    mu_sample_fit = clf.predict(z_sample[subs:, None])
    mu_fit = clf.predict(z[:, None])

    crossval1 = (np.sum((mu_sample_fit - mu_sample[subs:]) ** 2)
                / (len(mu_sample[subs:]) - 1)) # n-1  or n here?
    crossval[i]=crossval1

    ax.plot(z, mu_fit, '-', color='#DDDDDD')
    if abs(h_arr[i]-0.1) < 0.02:
        ax.plot(z, mu_fit, '-', color='#0000FF')
        
    ax.plot(z, mu_true, '--', c='red')
    ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
예제 #10
0
 def run(self, dataSlice, slicePoint=None):
     data = Table.read('mastertrainingmatch.fits')  #read in quasar data
     #cut out negative fluxes in each filter band
     mask = ((data['PSFFLUX'][:, 0] > 0) & (data['PSFFLUX'][:, 1] > 0) &
             (data['PSFFLUX'][:, 2] > 0) & (data['PSFFLUX'][:, 3] > 0) &
             (data['PSFFLUX'][:, 4] > 0))
     data = data[mask]
     #array for holding dcr slopes
     tempDCRarray = []
     #calculate DCR slope for each object in our table
     for x in data['ZSPEC_1']:
         #calculate tangent of zenith angle and parallactic offset (tan(Z) and R)
         tanZList, RList = astr.calcR(dataSlice[self.AMcol],
                                      dataSlice[self.Fcol],
                                      zshift=x)
         #calculate a slope and store in tempDCRarray
         slope, intercept, r_value, p_value, std_err = stats.linregress(
             tanZList, RList)
         tempDCRarray.append(slope)
     #add the column of DCR slopes into our table
     data['DCRSLOPE'] = tempDCRarray
     #this just makes sure all the columns are correctly formatted for vstack
     data = data.filled()
     #colors data, properly formatted
     X = np.vstack([
         data['ug'], data['gr'], data['ri'], data['iz'], data['zs1'],
         data['s1s2']
     ]).T
     #spectroscopic redshift, properly formatted
     y = np.array(data['ZSPEC_1'])
     #split data into 80 percent training, 20 percent testing
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         test_size=0.2,
                                                         random_state=73)
     #setup NW model w/ gaussian kernel and kernel width 0.05
     model1 = NadarayaWatson('gaussian', 0.05)
     model1.fit(X_train, y_train)  #fit model to training set
     pred1 = model1.predict(X_test)  #predict based on fit
     #do a test to see what fraction of points are within 0.1 of being correctly predicted
     #total # of points
     n = len(pred1)
     #is the difference between prediction and actual <0.1?
     mask13 = (np.abs(pred1 - y_test) < 0.1)
     #number of points that are within 0.1 of actual value
     m13 = len(pred1[mask13])
     frac13 = 1.0 * m13 / n  #fraction of all points within 0.1 of actual answer
     #colors and DCR, properly formatted
     X2 = np.vstack([
         data['ug'], data['gr'], data['ri'], data['iz'], data['zs1'],
         data['s1s2'], data['DCRSLOPE']
     ]).T
     y2 = np.array(
         data['ZSPEC_1'])  #potentially unnecessary, given existence of y
     #same split as above, so the 4 sets of objects are identical
     X2_train, X2_test, y2_train, y2_test = train_test_split(
         X2, y2, test_size=0.2, random_state=73)
     model2 = NadarayaWatson(
         'gaussian', 0.05
     )  #potentially unnecessary, given existence of model1, not sure if model's can be refit safely
     #fit to new training sets
     model2.fit(X2_train, y2_train)
     pred2 = model2.predict(X2_test)
     #same test as above, measure how many predictions are within 0.1
     n = len(pred2)
     mask23 = (np.abs(pred2 - y2_test) < 0.1)
     m23 = len(pred2[mask23])
     frac23 = 1.0 * m23 / n
     #fraction of points that moved into within 0.1 w/ DCR training
     improve = frac23 - frac13
     return improve