def test_select_obs2(self): settings = config.Config() settings.OBS_MODE = "single_max" initializer = VDAInit(settings) u_c = random.rand(10, ) - 1 u_c[3] = 1 #this will be max value observations, obs_idx, nobs = initializer.select_obs(settings, u_c) assert nobs == 1 assert obs_idx == [3] assert observations == [1]
def test_vda_init_normalized(self, tmpdir): """End-to-end setup test""" X = np.zeros((3, 4)) X[:, :2] = np.arange(6).reshape((3, 2)) X[0, 3] = 1 X = X.T #X = (m x n) INTERMEDIATE_FP = "inter" p = tmpdir.mkdir(INTERMEDIATE_FP).join("X_fp.npy") p.dump(X) p.allow_pickel = True settings = config.Config() settings.set_X_fp(str(p)) settings.set_n(3) settings.FORCE_GEN_X = False settings.OBS_MODE = "single_max" settings.OBS_VARIANCE = 0.1 settings.TDA_IDX_FROM_END = 0 settings.HIST_FRAC = 0.5 settings.SHUFFLE_DATA = False settings.NORMALIZE = True vda_initilizer = VDAInit(settings) data = vda_initilizer.run() X_ret = data.get("X") X_train = data.get("train_X") V = vda_initilizer.create_V_from_X(data["train_X"], settings) M, n = X_ret.shape u_c = data.get("u_c") u_0 = data.get("u_0") nobs = len(data["observations"]) mean_exp = np.array([0.5, 2.5, 4.5]) std_exp = np.array([0.5, 0.5, 0.5]) X_exp = ((X - mean_exp) * 2) X_00_exp = -1 print("X_exp", X_exp.shape) print("X_ret", X_ret.shape) assert X_00_exp == X_train[0, 0] and X_00_exp == X_ret[0, 0] assert np.array_equal(X_exp, X_ret) assert (3, 4) == (n, M) assert np.array_equal(np.array(X_exp[:2, :]), data.get("train_X")) assert np.array_equal(np.array(X_exp[-1, :]), u_c) assert np.array_equal(X_exp[:2, :], V) assert np.allclose(np.zeros((3)), u_0) assert data.get("observations") == [1.] assert nobs == 1 #assert np.allclose(np.array([1, 0, 0]), data.get("G")) assert [1.] == data.get("d") assert np.array_equal(std_exp, data.get("std")) assert np.array_equal(mean_exp, data.get("mean"))
def test_select_obs1(self): settings = config.Config() settings.OBS_MODE = "rand" settings.OBS_FRAC = 1.0 / 3.0 initializer = VDAInit(settings) u_c = random.rand(10, ) observations, obs_idx, nobs = initializer.select_obs(settings, u_c) for idx, obs_idx in enumerate(obs_idx): assert u_c[obs_idx] == observations[idx] assert nobs == 3, "nobs should be 3"
def __settings(self, tmpdir, normalize, force_init=False): if hasattr(self, "settings") and not force_init: return self.settings else: X = np.zeros((3, 4)) X[:, :2] = np.arange(6).reshape((3, 2)) X[0, 3] = 1 X = X.T INTERMEDIATE_FP = "inter" p = tmpdir.mkdir(INTERMEDIATE_FP).join("X_fp.npy") p.dump(X) p.allow_pickel = True settings = config.Config() settings.set_X_fp(str(p)) print(str(p)) print(settings.get_X_fp()) settings.set_n(3) settings.FORCE_GEN_X = False settings.OBS_MODE = "single_max" settings.OBS_VARIANCE = 0.5 settings.TDA_IDX_FROM_END = 0 settings.HIST_FRAC = 0.5 settings.ALPHA = 1.0 settings.COMPRESSION_METHOD = "SVD" #we aren't actually using SVD truncated matrix here settings.SAVE = False settings.TOL = 1e-8 settings.NORMALIZE = normalize settings.SHUFFLE_DATA = False vda_initilizer = VDAInit(settings) data = vda_initilizer.run() data["V"] = vda_initilizer.create_V_from_X(data["train_X"], settings).T data["G_V"] = (data["V"][data["obs_idx"]]).astype(float) self.G_V = data["G_V"] self.u_0 = data.get("u_0") self.d = data.get("d") nobs = len(data["observations"]) self.R_inv = (1 / settings.OBS_VARIANCE) * np.eye(nobs) data["R_inv"] = self.R_inv self.nobs = nobs self.settings = settings self.data = data return settings
def DA_SVD(self, force_init=False, save_vtu=False): if self.data.get("V") is None or force_init: V = VDAInit.create_V_from_X(self.data.get("train_X"), self.settings) if self.settings.THREE_DIM: #(M x nx x ny x nz) V = V.reshape((V.shape[0], -1)).T #(n x M) else: #(M x n) V = V.T #(n x M) V_trunc, U, s, W = SVD.TSVD(V, self.settings, self.settings.get_number_modes()) #Define intial w_0 V_trunc_plus = SVD.SVD_V_trunc_plus(U, s, W, self.settings.get_number_modes()) if self.settings.NORMALIZE: w_0 = V_trunc_plus @ np.zeros_like(self.data["u_0"].flatten()) #i.e. this is the value given in Rossella et al (2019). else: w_0 = V_trunc_plus @ self.data["u_0"].flatten() #w_0 = np.zeros((W.shape[-1],)) #TODO - I'm not sure about this - can we assume is it 0? self.data["V_trunc"] = V_trunc self.data["V"] = V self.data["w_0"] = w_0 self.data["V_grad"] = None if self.data.get("G") is 1: self.data["G_V"] = self.data["V_trunc"] elif self.data.get("G") is None: assert self.data.get("obs_idx") is not None self.data["G_V"] = self.data["V_trunc"][self.data.get("obs_idx")] else: raise ValueError("G has be deprecated in favour of `obs_idx`. It should be None") DA_results = self.perform_VarDA(self.data, self.settings, save_vtu=save_vtu) return DA_results
def DA_AE(self, force_init=False, save_vtu=False): if self.data.get("model") == None or force_init: self.model = ML_utils.load_model_from_settings(self.settings, self.data.get("device")) self.data["model"] = self.model else: self.model = self.data.get("model") self.data["model"].eval() if self.settings.REDUCED_SPACE: if self.data.get("V_trunc") is None or force_init: #only init if not already init V_red = VDAInit.create_V_red(self.data.get("train_X"), self.data.get("encoder"), self.settings) self.data["V_trunc"] = V_red.T #tau x M self.data["w_0"] = np.zeros((V_red.shape[0])) if self.data["G"] is 1: self.data["G_V"] =self.data["V_trunc"] else: self.data["G_V"] = (self.data["G"] @ self.data["V_trunc"] ).astype(float) self.data["V_grad"] = None else: # Now access explicit gradient function self.data["V_grad"] = self.__maybe_get_jacobian() DA_results = self.perform_VarDA(self.data, self.settings, save_vtu=save_vtu) return DA_results
def test_create_H(self): settings = config.Config() settings.OBS_MODE = "single_max" initializer = VDAInit(settings) n = 3 u_c = random.rand(n, ) - 1 u_c[2] = 1 #this will be max value initializer = VDAInit(settings) observations, obs_idx, nobs = initializer.select_obs(settings, u_c) H = VDAInit.create_H(obs_idx, n, nobs) assert H @ u_c == [1] assert H.shape == (1, 3) assert np.array_equal(H, np.array([[0, 0, 1]]))
def run(self, print_every=10, print_small=True): shuffle = self.settings.SHUFFLE_DATA #save value self.settings.SHUFFLE_DATA = False if self.settings.COMPRESSION_METHOD == "SVD": if self.settings.REDUCED_SPACE: raise NotImplementedError("Cannot have reduced space SVD") fp_base = self.settings.get_X_fp().split("/")[-1][1:] U = np.load(self.settings.INTERMEDIATE_FP + "U" + fp_base) s = np.load(self.settings.INTERMEDIATE_FP + "s" + fp_base) W = np.load(self.settings.INTERMEDIATE_FP + "W" + fp_base) num_modes = self.settings.get_number_modes() V_trunc = SVD.SVD_V_trunc(U, s, W, modes=num_modes) V_trunc_plus = SVD.SVD_V_trunc_plus(U, s, W, modes=num_modes) self.DA_pipeline = DAPipeline(self.settings) DA_data = self.DA_pipeline.data DA_data["V_trunc"] = V_trunc DA_data["V"] = None DA_data["w_0"] = V_trunc_plus @ DA_data.get("u_0").flatten() DA_data["V_grad"] = None elif self.settings.COMPRESSION_METHOD == "AE": if self.model is None: raise ValueError( "Must provide an AE torch.nn model if settings.COMPRESSION_METHOD == 'AE'" ) self.DA_pipeline = DAPipeline(self.settings, self.model) DA_data = self.DA_pipeline.data if self.reconstruction: encoder = DA_data.get("encoder") decoder = DA_data.get("decoder") else: raise ValueError( "settings.COMPRESSION_METHOD must be in ['AE', 'SVD']") self.settings.SHUFFLE_DATA = shuffle if self.reconstruction: L1 = torch.nn.L1Loss(reduction='sum') L2 = torch.nn.MSELoss(reduction="sum") totals = { "percent_improvement": 0, "ref_MAE_mean": 0, "da_MAE_mean": 0, "mse_DA": 0, "mse_ref": 0, "counts": 0, "l1_loss": 0, "l2_loss": 0, "time": 0, "time_online": 0 } tot_DA_MAE = np.zeros_like(self.control_states[0]).flatten() tot_ref_MAE = np.zeros_like(self.control_states[0]).flatten() results = [] if len(self.control_states.shape) in [1, 3]: raise ValueError("This is not batched control_state input") else: num_states = self.control_states.shape[0] for idx in range(num_states): u_c = self.control_states[idx] if self.settings.REDUCED_SPACE: self.DA_pipeline.data = VDAInit.provide_u_c_update_data_reduced_AE( DA_data, self.settings, u_c) else: self.DA_pipeline.data = VDAInit.provide_u_c_update_data_full_space( DA_data, self.settings, u_c) t1 = time.time() if self.settings.COMPRESSION_METHOD == "AE": DA_results = self.DA_pipeline.DA_AE(save_vtu=self.save_vtu) elif self.settings.COMPRESSION_METHOD == "SVD": DA_results = self.DA_pipeline.DA_SVD(save_vtu=self.save_vtu) t2 = time.time() t_tot = t2 - t1 #print("time_online {:.4f}s".format(DA_results["time_online"])) if self.reconstruction: data_tensor = torch.Tensor(u_c) if self.settings.COMPRESSION_METHOD == "AE": device = ML_utils.get_device() #device = ML_utils.get_device(True, 1) data_tensor = data_tensor.to(device) data_hat = decoder(encoder(u_c)) data_hat = torch.Tensor(data_hat) data_hat = data_hat.to(device) elif self.settings.COMPRESSION_METHOD == "SVD": data_hat = SVD.SVD_reconstruction_trunc( u_c, U, s, W, num_modes) data_hat = torch.Tensor(data_hat) with torch.no_grad(): l1 = L1(data_hat, data_tensor) l2 = L2(data_hat, data_tensor) else: l1, l2 = None, None result = {} result["percent_improvement"] = DA_results["percent_improvement"] result["ref_MAE_mean"] = DA_results["ref_MAE_mean"] result["da_MAE_mean"] = DA_results["da_MAE_mean"] result["counts"] = DA_results["counts"] result["mse_ref"] = DA_results["mse_ref"] result["mse_DA"] = DA_results["mse_DA"] if self.reconstruction: result["l1_loss"] = l1.detach().cpu().numpy() result["l2_loss"] = l2.detach().cpu().numpy() result["time"] = t2 - t1 result["time_online"] = DA_results["time_online"] if self.save_vtu: tot_DA_MAE += DA_results.get("da_MAE") tot_ref_MAE += DA_results.get("ref_MAE") #add to results list (that will become a .csv) results.append(result) #add to aggregated dict results totals = self.__add_result_to_totals(result, totals) if idx % print_every == 0 and idx > 0: if not print_small: print("idx:", idx) self.__print_totals(totals, idx + 1, print_small) if not print_small: print("------------") self.__print_totals(totals, num_states, print_small) if not print_small: print("------------") results_df = pd.DataFrame(results) if self.save_vtu: tot_DA_MAE /= num_states tot_ref_MAE /= num_states out_fp_ref = self.save_vtu_fp + "av_ref_MAE.vtu" out_fp_DA = self.save_vtu_fp + "av_da_MAE.vtu" fluidity.utils.save_vtu(self.settings, out_fp_ref, tot_ref_MAE) fluidity.utils.save_vtu(self.settings, out_fp_DA, tot_DA_MAE) #save to csv if self.csv_fp: results_df.to_csv(self.csv_fp) if self.plot: raise NotImplementedError( "plotting functionality not implemented yet") return results_df
def __init__(self, settings, AEmodel=None, u_c=None): self.settings = settings vda_initilizer = VDAInit(self.settings, AEmodel, u_c=u_c) self.data = vda_initilizer.run()
def test_check_import(self): initializer = VDAInit(config.Config()) method = initializer.run assert callable(method), "Should be able to import DA method"