# Train/Test split Ntrain = 25 Ntest = 32 - Ntrain # Corrupt the training data xtrainlist, vtrainlist = corrupt_data(emg[:, :Ntrain], lipacc[:, :Ntrain], timevec, nmissingin=200, nmissingout=200, noisein=0.03, noiseout=0.08) # Put dat in spatio-temporal format Xtrain = spatiotemp.LocObsSet(xtrainlist) Vtrain = spatiotemp.LocObsSet(vtrainlist) xtest = [(timevec, emg[:, i].reshape((641, 1))) for i in range(Ntrain, 32)] vtest = [(timevec, lipacc[:, i].reshape((641, 1))) for i in range(Ntrain, 32)] Xtest = spatiotemp.LocObsSet(xtest) Vtest = spatiotemp.LocObsSet(vtest) fig, ax = plt.subplots(ncols=2) for i in range(32): ax[1].plot(lipacc[:, i]) ax[0].plot(emg[:, i]) # Test for bandwidth parameter # See if our input smoothing has the means to represent well the input functions Dsmoothing = 300 sigmasmoothing = 45
import spatiotempovk.regressors as regressors importlib.reload(spatiotemp) importlib.reload(kernels) importlib.reload(losses) importlib.reload(regularizers) importlib.reload(regressors) importlib.reload(argp2d) # Create synthetic data argp = argp2d.draw_ar1_gp2d(T=50) obs = argp2d.draw_observations(20, argp) # obs = argp2d.draw_observations_sameloc(20, argp) nx, ny = argp[0].shape # Store this data in a SpatioTempData class instance data = spatiotemp.LocObsSet(obs) Ms = data.get_Ms() T = data.get_T() barM = data.get_barM() # Look at time series at a given location test = np.array([data["y"][i][20] for i in range(data.get_T())]) # # Kernels for convolution # gausskerx = kernels.GaussianKernel(sigma=10) # gausskery = kernels.GaussianKernel(sigma=0.2) # # Compute kernel matrices # Kx = gausskerx.compute_K(data["x_flat"]) # Ky = gausskery.compute_K(data["y_flat"]) # convkers = kernels.ConvKernel(gausskerx, gausskery, Kx, Ky)
# Determine (fixed) locations nlocs = 50 # Build the data Ntrain = 50 Ntest = 20 noisein = 0.25 noiseout = 2.5 # Draw random Fourier functions locs = np.linspace(0, 1, nlocs).reshape((nlocs, 1)) datain, dataout = funcs1d.generate_fourier_dataset(Ntrain, noisein, noiseout, norm01, 2, locs) dataintest, dataouttest = funcs1d.generate_fourier_dataset( Ntest, 0, 0, norm01, 2, locs) datain, dataout = spatiotemp.LocObsSet(datain), spatiotemp.LocObsSet(dataout) dataintest, dataouttest = spatiotemp.LocObsSet( dataintest), spatiotemp.LocObsSet(dataouttest) # # Or load dataset # with open(os.getcwd() + "/dumps/datasets.pkl", "rb") as i: # datain, dataout, dataintest, dataouttest = pickle.load(i) i = 0 j = 2 fig, ax = plt.subplots(nrows=2, ncols=2) ax[0, 0].scatter(datain["x"][i].flatten(), datain["y"][i]) ax[0, 1].scatter(dataout["x"][i].flatten(), dataout["y"][i]) ax[0, 0].set_title("Function noisy evaluations") ax[0, 1].set_title("Derivative noisy evaluations") ax[1, 0].scatter(datain["x"][j].flatten(), datain["y"][j])
# Build the data Ntrain = 200 Ntest = 20 # Draw random Fourier functions fouriers = funcs1d.random_fourier_func(norm01, nfreq=2, nsim=Ntrain + Ntest) fouriers_prime = [four.prime() for four in fouriers] datain = [] dataout = [] for n in range(Ntrain + Ntest): Yin = np.array([fouriers[n](x[0]) for x in locs]) Yout = np.array([fouriers_prime[n](x[0]) for x in locs]) datain.append((locs, Yin)) dataout.append((locs, Yout)) # Store them in a spatio temp data instance datain = spatiotemp.LocObsSet(datain) dataout = spatiotemp.LocObsSet(dataout) dataintest = datain.extract_subseq(Ntrain, Ntrain + Ntest) dataouttest = dataout.extract_subseq(Ntrain, Ntrain + Ntest) datain = datain.extract_subseq(0, Ntrain) dataout = dataout.extract_subseq(0, Ntrain) # Kernels kernelx = kernels.GaussianKernel(sigma=0.3) Kxin = kernelx.compute_K(locs) kernely = kernels.GaussianKernel(sigma=0.5) Kyin = kernely.compute_K(datain["y_flat"]) kers = kernels.ConvKernel(kernelx, kernely, Kxin, Kyin, sameloc=True) Ks = kers.compute_K_from_mat(datain.Ms) # Build regressor
"TMP"].mean() datapd_sorted.loc[datapd_sorted["MONTH_LAT_LONG"] == (month, loc[0], loc[1]), "TMP"] -= avg # Dates contained in the data dates = pd.unique(datapd_sorted["DATE"]) # Extract data and put it in the right form for SpatioTempData extract = [datapd_sorted[datapd_sorted.DATE == d] for d in dates] extract = [(subtab.loc[:, ["LAT", "LONG"]].values, subtab.loc[:, ["TMP"]].values) for subtab in extract] # Create a SpatioTempData object from it data = spatiotemp.LocObsSet(extract) # Train test data ntrain = 10 Strain = data.extract_subseq(0, ntrain) Slast = data.extract_subseq(ntrain - 1, ntrain) Stest = data.extract_subseq(ntrain, ntrain + 1) Strain_input = data.extract_subseq(0, ntrain - 1) Strain_output = data.extract_subseq(1, ntrain) Ms = Strain.get_Ms() # ############# EXPLOITING SAME LOCATION ##################################################################### # Timer start = time.time() # Kernels