def lasso_cv(y, x=None, cv=10, max_deg=3): """LASSO fit.""" if np.isnan(y).all(): y_pred = y else: y_pred = ap.lasso_cv(x, y, cv=cv, max_deg=max_deg, max_iter=1e3) return y_pred
def std_series_filt(x, y, max_std=2, max_deg=3): """Filter entire vector if the detrended std > max_std.""" std = None if not np.isnan(y).all(): i_notnan, = np.where(~np.isnan(y)) poly = lasso_cv(x[i_notnan], y[i_notnan], max_deg=max_deg) std = np.nanstd(y[i_notnan] - poly) # std of detrended series if std > max_std: y[:] = np.nan return y, std
def _peak_filt(x, y, n_std=3, max_deg=3): """Filter spikes in a vector. See peak_filt() """ assert not np.isnan(y).all(), 'empty array' y2 = y.copy() i_notnan, = np.where(~np.isnan(y)) # detrend poly = lasso_cv(x[i_notnan], y[i_notnan], max_deg=max_deg) y2[i_notnan] = y[i_notnan] - poly # filter i_peaks, = np.where(np.abs(y2) > n_std * np.nanstd(y2)) y[i_peaks] = np.nan return len(i_peaks)
s = df[k] m, c = ap.linear_fit_robust(time, s.values, return_coef=True) x, y = ap.linear_fit_robust(time, s.values, return_coef=False) pol = np.polyfit(time, s.values, 2) yy = np.polyval(pol, time) #axs[i,j].fill_between(time, s.values+2*r, s.values-2*r, facecolor='0.5', # edgecolor='w', alpha=0.2) #axs[i,j].plot(time, zeros, ':', c='0.5', linewidth=0.5) axs[i, j].plot(time, y, c='0.2', linewidth=0.75, zorder=2) axs[i, j].plot(time, s.values, 's', c='0.5', markersize=4, zorder=1) if 0: axs[i, j].plot(time, yy, c='b', linewidth=1.5, zorder=2) if 1: axs[i, j].plot(time, ap.lasso_cv(time, s, max_deg=3), c='b', linewidth=1.75, zorder=4) # set plots #------------------------------------------------------------- if i == 0: ap.intitle('%s %.2f %s' % (k, m, UNITS), ax=axs[i, j], loc=8, pad=-1, borderalpha=0.7) else: ap.intitle('%s %.2f' % (k, m),
if 0: # subset print 'subsetting...' region = ap.dotson data, _, _ = ap.get_subset(region, data, lon, lat) nt, ny, nx = data.shape # i,j,k = t,y,x #------------------------------------------------------ if 0: # plot alphas and MSEs (the prediction error curve) N = 3 x = time y = data[:,5,2] # 6,2 -> PIG y = ap.referenced(y, to='mean') y_pred, lasso = ap.lasso_cv(x, y, cv=10, max_deg=N, max_iter=1e3, return_model=True) mse = lasso.mse_path_.mean(axis=1) std = lasso.mse_path_.std(axis=1, ddof=1) / np.sqrt(10) #plt.plot(np.log(lasso.alphas_), mse) plt.errorbar(np.log(lasso.alphas_), mse, yerr=std) plt.vlines(np.log(lasso.alpha_), ymin=mse.min(), ymax=mse.max(), color='r') plt.xlabel('log(alpha)') plt.ylabel('10-fold-average MSE') plt.show() exit() #------------------------------------------------------ data = as_frame(data, time, lat, lon) data = data.apply(ap.referenced, to='mean', raw=True)