def GridDetectibilites(period, amps, detectability, newAmps): """Put the detectability info onto a regular grid. Args: period: Periods for each data point. amps: Semi-amplitudes for each datapoint. detectability: Detected fraction for each datapoint. Return: 2-D numpy array of detectability gridded onto period by newAmps. """ periods = list(set(period)) periods.sort() grid = np.zeros((len(newAmps), len(periods))) for i, p in enumerate(periods): # Select data for just this period and add endpoints ind = np.where(period == p) ampsThisP = np.hstack((0, amps[ind], 100)) detectThisP = np.hstack((0, detectability[ind], 1)) # Fix the shapes of arrays so they can be used in fitter ampsThisP.shape = (len(ampsThisP), 1) detectThisP.shape = (len(detectThisP),) newAmps.shape = (len(newAmps), 1) # Fit with Gaussian Kernel Regression model = NadarayaWatson('gaussian', h=1) model.fit(ampsThisP, detectThisP) gridDetectability = model.predict(newAmps) #Make sure everything is between limits gridDetectability[np.where(gridDetectability < 1e-3)] = 0 gridDetectability[np.where(gridDetectability > 0.999)] = 1 grid[:, i] = gridDetectability return grid
def test_NW_simple(): X = np.arange(11.) y = X + 1 dy = 1 # by symmetry, NW regression should get these exactly correct Xfit = np.array([4, 5, 6])[:, None] y_true = np.ravel(Xfit + 1) clf = NadarayaWatson(h=0.5).fit(X[:, None], y, dy) y_fit = clf.predict(Xfit) assert_allclose(y_fit, y_true)
def test_NW_simple_laplacian_kernel(): X = np.arange(11.) y = X + 1 dy = 1 # by symmetry, NW regression should get these exactly correct Xfit = np.array([4, 5, 6])[:, None] y_true = np.ravel(Xfit + 1) kwargs = {'gamma': 10.} clf = NadarayaWatson(kernel='laplacian', **kwargs).fit(X[:, None], y, dy) y_fit = clf.predict(Xfit) assert_allclose(y_fit, y_true)
def test_X_invalid_shape_exception(): X = np.arange(11.) y = X + 1 dy = 1 clf = NadarayaWatson(h=0.5).fit(X[:, None], y, dy) # not valid Xfit.shape[1], should raise an exception Xfit = np.array([[4, 5, 6], [1, 2, 3]]) y_true = np.ravel(Xfit + 1) with pytest.raises(Exception) as e: y_fit = clf.predict(Xfit) assert str(e.value) == "dimensions of X do not match training dimension" # not valid Xfit.shape[1], should raise an exception Xfit = np.array([4, 5, 6]) y_true = np.ravel(Xfit + 1) with pytest.raises(Exception) as e: y_fit = clf.predict(Xfit) assert str(e.value) == "X must be two-dimensional"
cosmo = Cosmology() z = np.linspace(0.01, 2, 1000) mu_true = np.asarray(map(cosmo.mu, z)) #------------------------------------------------------------ # Define our classifiers basis_mu = np.linspace(0, 2, 15)[:, None] basis_sigma = 3 * (basis_mu[1] - basis_mu[0]) subplots = [221, 222, 223, 224] classifiers = [ LinearRegression(), PolynomialRegression(4), BasisFunctionRegression('gaussian', mu=basis_mu, sigma=basis_sigma), NadarayaWatson('gaussian', h=0.1) ] text = [ 'Straight-line Regression', '4th degree Polynomial\n Regression', 'Gaussian Basis Function\n Regression', 'Gaussian Kernel\n Regression' ] # number of constraints of the model. Because # Nadaraya-watson is just a weighted mean, it has only one constraint n_constraints = [2, 5, len(basis_mu) + 1, 1] #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust(left=0.1, right=0.95,
def fit_NadarayaWatson(features_train, labels_train, features_pred, kernel='gaussian', alpha=0.05): model = NadarayaWatson(kernel, alpha) model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) return labels_pred
y_tar_train = y_tar_train[idx] print 'Ending with %i elements' % (x_vec_train.shape[0]) print '___________________________________________' print '#### Final shapes of tables' print x_vec_train.shape, y_tar_train.shape ''' print '#### Runing Kernel Regressor' hs = np.arange(0.001, 0.1, 0.005) #mse_test = [] #mse_train = [] mean_train, mean_test, std_train, std_test = [], [], [], [] for h in hs: print h NW_model = NadarayaWatson("gaussian", h = h) print 'Fitting' #NW_model.fit(x_vec_train, y_tar_train) scores_train, scores_test = [], [] print 'Predicting and doing crossvalidation' #y_pre_train = NW_model.predict(x_vec_train[1000:2000]) #mse_train.append(((y_tar_train[1000:2000] - y_pre_train)**2).sum()/len(y_pre_train)) ss_train = cross_validation.ShuffleSplit(len(y_tar), n_iter=5, test_size=1./3.) for train_inx, test_idx in ss_train: NW_model.fit(x_vec[train_inx], y_tar[train_inx]) y_pre_train = NW_model.predict(x_vec[train_inx]) scores_train.append(((y_tar[train_inx] - y_pre_train)**2).sum()/len(y_pre_train)) y_pre_test = NW_model.predict(x_vec[test_idx]) scores_test.append(((y_tar[test_idx] - y_pre_test)**2).sum()/len(y_pre_test))
fig = plt.figure(figsize=(9, 5)) fig.subplots_adjust(left=0.2, right=0.95, bottom=0.15, top=0.95, hspace=0.1, wspace=0.3) ax = fig.add_subplot(121) ax2= fig.add_subplot(122) for i in range(0, NN,1): #Sub space for cross validation... #50/100 points for training set, 50/100 for validation subs=50 # fit the data clf = NadarayaWatson('gaussian', h=h_arr[i]) clf.fit(z_sample[0:subs:, None], mu_sample[0:subs], dmu[0:subs]) mu_sample_fit = clf.predict(z_sample[subs:, None]) mu_fit = clf.predict(z[:, None]) crossval1 = (np.sum((mu_sample_fit - mu_sample[subs:]) ** 2) / (len(mu_sample[subs:]) - 1)) # n-1 or n here? crossval[i]=crossval1 ax.plot(z, mu_fit, '-', color='#DDDDDD') if abs(h_arr[i]-0.1) < 0.02: ax.plot(z, mu_fit, '-', color='#0000FF') ax.plot(z, mu_true, '--', c='red') ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
def run(self, dataSlice, slicePoint=None): data = Table.read('mastertrainingmatch.fits') #read in quasar data #cut out negative fluxes in each filter band mask = ((data['PSFFLUX'][:, 0] > 0) & (data['PSFFLUX'][:, 1] > 0) & (data['PSFFLUX'][:, 2] > 0) & (data['PSFFLUX'][:, 3] > 0) & (data['PSFFLUX'][:, 4] > 0)) data = data[mask] #array for holding dcr slopes tempDCRarray = [] #calculate DCR slope for each object in our table for x in data['ZSPEC_1']: #calculate tangent of zenith angle and parallactic offset (tan(Z) and R) tanZList, RList = astr.calcR(dataSlice[self.AMcol], dataSlice[self.Fcol], zshift=x) #calculate a slope and store in tempDCRarray slope, intercept, r_value, p_value, std_err = stats.linregress( tanZList, RList) tempDCRarray.append(slope) #add the column of DCR slopes into our table data['DCRSLOPE'] = tempDCRarray #this just makes sure all the columns are correctly formatted for vstack data = data.filled() #colors data, properly formatted X = np.vstack([ data['ug'], data['gr'], data['ri'], data['iz'], data['zs1'], data['s1s2'] ]).T #spectroscopic redshift, properly formatted y = np.array(data['ZSPEC_1']) #split data into 80 percent training, 20 percent testing X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=73) #setup NW model w/ gaussian kernel and kernel width 0.05 model1 = NadarayaWatson('gaussian', 0.05) model1.fit(X_train, y_train) #fit model to training set pred1 = model1.predict(X_test) #predict based on fit #do a test to see what fraction of points are within 0.1 of being correctly predicted #total # of points n = len(pred1) #is the difference between prediction and actual <0.1? mask13 = (np.abs(pred1 - y_test) < 0.1) #number of points that are within 0.1 of actual value m13 = len(pred1[mask13]) frac13 = 1.0 * m13 / n #fraction of all points within 0.1 of actual answer #colors and DCR, properly formatted X2 = np.vstack([ data['ug'], data['gr'], data['ri'], data['iz'], data['zs1'], data['s1s2'], data['DCRSLOPE'] ]).T y2 = np.array( data['ZSPEC_1']) #potentially unnecessary, given existence of y #same split as above, so the 4 sets of objects are identical X2_train, X2_test, y2_train, y2_test = train_test_split( X2, y2, test_size=0.2, random_state=73) model2 = NadarayaWatson( 'gaussian', 0.05 ) #potentially unnecessary, given existence of model1, not sure if model's can be refit safely #fit to new training sets model2.fit(X2_train, y2_train) pred2 = model2.predict(X2_test) #same test as above, measure how many predictions are within 0.1 n = len(pred2) mask23 = (np.abs(pred2 - y2_test) < 0.1) m23 = len(pred2[mask23]) frac23 = 1.0 * m23 / n #fraction of points that moved into within 0.1 w/ DCR training improve = frac23 - frac13 return improve