def __init__(self,
                 AE_settings,
                 expdir,
                 batch_sz=BATCH,
                 model=None,
                 start_epoch=None):
        """Initilaizes the AE training class.

        ::AE_settings - a settings.config.Config class with the DA settings
        ::expdir - a directory of form `experiments/<possible_path>` to keep logs
        ::calc_DA_MAE - boolean. If True, training will evaluate DA Mean Absolute Error
            during the training cycle. Note: this is *MUCH* slower
        """

        self.settings = AE_settings

        err_msg = """AE_settings must be an AE configuration class"""
        assert self.settings.COMPRESSION_METHOD == "AE", err_msg

        if model is not None:  #for retraining
            assert start_epoch is not None, "If you are RE-training model you must pass start_epoch"
            assert start_epoch >= 0
            self.start_epoch = start_epoch
            self.model = model
            print("Loaded model, ", end="")
        else:
            self.start_epoch = 0
            self.model = ML_utils.load_model_from_settings(AE_settings)
            print("Initialized model, ", end="")

        print("Number of parameters:",
              sum(p.numel() for p in self.model.parameters()))

        self.batch_sz = batch_sz
        self.settings.batch_sz = batch_sz

        self.expdir = init_expdir(expdir)
        self.settings_fp = self.expdir + "settings.txt"

        if self.settings.SAVE == True:
            with open(self.settings_fp, "wb") as f:
                pickle.dump(self.settings, f)
        ML_utils.set_seeds()  #set seeds before init model

        self.device = ML_utils.get_device()
        self.columns = [
            "epoch", "reconstruction_err", "DA_MAE", "DA_ratio_improve_MAE",
            "time_DA(s)", "time_epoch(s)"
        ]
예제 #2
0
def check_init(config, config_kwargs, prnt, activation=None):

    if not config_kwargs:
        config_kwargs = {}
    assert isinstance(config_kwargs, dict)

    settings = config(**config_kwargs)
    settings.DEBUG = False
    #settings.ACTIVATION = activation

    assert isinstance(settings, Config)

    model = ML_utils.load_model_from_settings(settings)

    print(settings.__class__.__name__)
    if config_kwargs != {}:
        for k, v in config_kwargs.items():
            print("{}: {}".format(
                k,
                v,
            ), end=", ")
        print(end="\n")
    if prnt:
        print(model.layers_encode)

    num_params = sum(p.numel() for p in model.parameters())
    print("num params", num_params)
    print("CHANNELS", settings.get_channels())
    def DA_AE(self, force_init=False, save_vtu=False):
        if self.data.get("model") == None or force_init:
            self.model = ML_utils.load_model_from_settings(self.settings, self.data.get("device"))
            self.data["model"] = self.model
        else:
            self.model = self.data.get("model")

        self.data["model"].eval()
        if self.settings.REDUCED_SPACE:
            if self.data.get("V_trunc") is None or force_init: #only init if not already init
                V_red = VDAInit.create_V_red(self.data.get("train_X"),
                                            self.data.get("encoder"),
                                            self.settings)
                self.data["V_trunc"] = V_red.T #tau x M

                self.data["w_0"] = np.zeros((V_red.shape[0]))
                if self.data["G"] is 1:
                    self.data["G_V"] =self.data["V_trunc"]
                else:
                    self.data["G_V"] = (self.data["G"] @ self.data["V_trunc"] ).astype(float)

            self.data["V_grad"] = None
        else:

            # Now access explicit gradient function
            self.data["V_grad"] = self.__maybe_get_jacobian()

        DA_results = self.perform_VarDA(self.data, self.settings, save_vtu=save_vtu)
        return DA_results
def calc_DA_dir(dir,
                params,
                expdir,
                prnt=True,
                all_data=True,
                epoch=None,
                save_vtu=False,
                gpu_device=0,
                return_df=False):
    gpu = False
    if gpu_device is not "CPU":
        gpu = True

    model, settings = ML_utils.load_model_and_settings_from_dir(
        dir, device_idx=gpu_device, choose_epoch=epoch, gpu=gpu)

    df = run_DA_batch(settings,
                      model,
                      all_data,
                      expdir,
                      params,
                      save_vtu,
                      gpu_device=gpu_device)
    mse_DA = df["mse_DA"].mean()
    model_data = get_model_specific_data(settings, dir, model=model)
    model_data["num_params"] = sum(p.numel() for p in model.parameters())
    if prnt:
        print(mse_DA, model_data, expdir)
        print(df.tail(5))

    if return_df:
        return mse_DA, model_data, df
    return mse_DA, model_data
예제 #5
0
def main():
    res_layers = [3, 9, 27]
    cardinalities = [1, 8, 32]

    idx = 0
    layer = 3
    cardinality = 1
    expdir = exp_base + str(0) + "/"

    print("Layers", layer)
    print("Cardinality", cardinality)

    kwargs = {"layers": layer, "cardinality": cardinality}
    _, settings = ML_utils.load_model_and_settings_from_dir(exp_load)
    settings.AE_MODEL_FP = model_fp
    settings.GPU_DEVICE = GPU_DEVICE
    settings.export_env_vars()

    expdir = exp_base + str(idx) + "/"

    trainer = TrainAE(settings, expdir, calc_DA_MAE)
    expdir = trainer.expdir  #get full path

    model = trainer.train(EPOCHS,
                          test_every=test_every,
                          num_epochs_cv=num_epochs_cv,
                          learning_rate=LR,
                          print_every=print_every,
                          small_debug=SMALL_DEBUG_DOM)
예제 #6
0
    def train_test_DA_split_maybe_normalize(X, settings):
        """Returns non-overlapping train/test and DA control state data.
        This function also deals with normalization (to ensure than only the
        training data is used for normalization mean and std)"""

        M, n = SplitData.get_dim_X(X, settings)

        hist_idx = int(M * settings.HIST_FRAC)
        hist_X = X[:
                   hist_idx]  #select historical data (i.e. training set in ML terminology)
        # that will be used for normalize

        #use only the training set to calculate mean and std
        mean = np.mean(hist_X, axis=0)
        std = np.std(hist_X, axis=0)

        #Some std are zero - set the norm to 1 in this case so that feature is zero post-normalization
        std = np.where(std <= 0., 1, std)

        if settings.NORMALIZE:
            X = (X - mean)
            X = (X / std)

        # Split X into historical and present data. We will
        # assimilate "observations" at a single timestep t_DA
        # which corresponds to the control state u_c
        # We will take initial condition u_0, as mean of historical data

        t_DA = M - (settings.TDA_IDX_FROM_END + 1)  #idx of Data Assimilation
        assert t_DA >= hist_idx, (
            "Cannot select observation from historical data."
            "Reduce HIST_FRAC or reduce TDA_IDX_FROM_END to prevent overlap.\n"
            "t_DA = {} and hist_idx = {}".format(t_DA, hist_idx))
        assert t_DA > hist_idx, ("Test set cannot have zero size")

        train_X = X[:hist_idx]
        test_X = X[hist_idx:t_DA]
        u_c = X[t_DA]  #control state (for DA)

        if settings.SHUFFLE_DATA:
            ML_utils.set_seeds()
            np.random.shuffle(train_X)
            np.random.shuffle(test_X)

        return train_X, test_X, u_c, X, mean, std
예제 #7
0
def get_attenuation_from_dir(dir, model=None):
    if not model:
        model, settings = ML_utils.load_model_and_settings_from_dir(dir)
    encode, decode = None, None
    for k, v in model.named_parameters():
        if "attenuate_res" in k:
            if "encode" in k:
                encode =  v.item()
            else:
                decode =  v.item()
    return encode, decode
예제 #8
0
def check_DA_dir(dir, kwargs, all_data, expdir, params, prnt):
    try:
        model, settings = ML_utils.load_model_and_settings_from_dir(dir)
        df = run_DA_batch(settings, model, all_data, expdir, params)
        if prnt:
            print(df.tail(10))
    except Exception as e:
        try:
            shutil.rmtree(expdir, ignore_errors=False, onerror=None)
            raise e
        except Exception as z:
            raise e
    def training_loop_AE(self,
                         device=None,
                         print_every=2,
                         test_every=5,
                         save_every=5,
                         model_dir=None):
        """Runs a torch AE model training loop.
        NOTE: Ensure that the loss_fn is in mode "sum"
        """
        model = self.model
        self.model_dir = model_dir

        if device == None:
            device = ML_utils.get_device()
        self.device = device

        ML_utils.set_seeds()
        train_losses = []
        test_losses = []

        self.start = self.num_epochs_cv + self.start_epoch
        self.end = self.start_epoch + self.num_epoch
        epoch = self.end - 1  #for case where no training occurs

        for epoch in range(self.start, self.end):

            self.epoch = epoch

            train_loss, test_loss = self.train_one_epoch(
                epoch, print_every, test_every)
            train_losses.append(train_loss)
            if test_loss:
                test_losses.append(test_loss)

        if epoch % save_every != 0 and self.model_dir != None:
            #Save model (if new model hasn't just been saved)
            model_fp_new = "{}{}.pth".format(self.model_dir, epoch)
            torch.save(model.state_dict(), model_fp_new)

        return train_losses, test_losses
예제 #10
0
 def act_constr(activation_fn):
     if activation_fn == "relu":
         activation_constructor = lambda x, y: nn.ReLU()
     elif activation_fn == "lrelu":
         activation_constructor = lambda x, y: nn.LeakyReLU(0.05)
     elif activation_fn == "GDN":
         activation_constructor = lambda x, y: GDN(x, ML_utils.get_device(),
                                                   y)
     elif callable(activation_fn):
         activation_constructor = lambda x, y: activation_fn
     elif activation_fn == "prelu":  # must be initilalized in situ
         activation_constructor = lambda x, y: nn.PReLU(x)
     else:
         raise NotImplementedError("Activation function not implemented")
     return activation_constructor
    def test_CAE_forward_nobatch(self):
        settings = CAEConfig()
        Cin = settings.get_channels()[0]
        size = (Cin,) + settings.get_n()
        device = ML.get_device()
        x = torch.rand(size, requires_grad=True, device = device)

        model = CAE_3D(**settings.get_kwargs())

        model.to(device)
        try:

            y = model(x)
        except:
            pytest.fail("Unable to do forward pass")
def retrain(dir, gpu_device, new_expdir, batch_sz=None):
    """This function accepts an expdir and returns an initialized TrainAE class"""

    model, settings, prev_epoch = ML_utils.load_model_and_settings_from_dir(
        dir, device_idx=gpu_device, return_epoch=True)

    start_epoch = prev_epoch + 1

    batch_sz = batch_sz if batch_sz is not None else settings.batch_sz

    trainer = TrainAE(settings,
                      new_expdir,
                      batch_sz=batch_sz,
                      model=model,
                      start_epoch=start_epoch)

    return trainer
    def test_CAE_linear_latent_nonbatched(self):
        settings = CAEConfig()
        Cin = settings.get_channels()[0]
        size = (Cin, ) + settings.get_n()
        device = ML.get_device()
        x = torch.rand(size, requires_grad=True, device = device)

        model = CAE_3D(**settings.get_kwargs())



        model.to(device)
        encode = model.encode
        try:

            w = encode(x)
        except:
            pytest.fail("Unable to do forward pass")

        assert len(w.shape) == 1, "There should only be one dimension"
        assert w.shape[0] == settings.get_number_modes()
예제 #14
0
    def run(self):
        """Generates matrices for VarDA. All returned matrices are in the
        (M X n) or (M x nx x ny x nz) format """

        data = {}
        loader = self.settings.get_loader()
        splitter = SplitData()
        settings = self.settings

        X = loader.get_X(settings)

        train_X, test_X, u_c_std, X, mean, std = splitter.train_test_DA_split_maybe_normalize(
            X, settings)

        if self.u_c is None:
            self.u_c = u_c_std

        #self.u_c = train_X[62] #good
        #self.u_c = train_X[-1] #bad

        # We will take initial condition u_0, as mean of historical data
        if settings.NORMALIZE:
            u_0 = np.zeros_like(mean)  #since the data is mean centred
        else:
            u_0 = mean

        encoder = None
        decoder = None

        device = ML_utils.get_device()
        model = self.AEmodel
        if model:
            model.to(device)

        if self.settings.COMPRESSION_METHOD == "AE":
            #get encoder
            if model is None:
                model = ML_utils.load_model_from_settings(settings)

            def __create_encoderOrDecoder(fn):
                """This returns a function that deals with encoder/decoder
                input dimensions (e.g. adds channel dim for 3D case)"""
                def ret_fn(vec):
                    vec = torch.Tensor(vec).to(device)

                    #for 3D case, unsqueeze for channel
                    if self.settings.THREE_DIM:
                        dims = len(vec.shape)
                        if dims == 3:

                            vec = vec.unsqueeze(0)
                        elif dims == 4:
                            #batched input
                            vec = vec.unsqueeze(1)
                    with torch.no_grad():
                        res = fn(vec).detach().cpu()
                    #for 3D case, squeeze for channel
                    dims = len(res.shape)
                    if self.settings.THREE_DIM and dims > 2:
                        if dims == 4:
                            res = res.squeeze(0)
                        elif dims == 5:  #batched input
                            res = res.squeeze(1)
                    return res.numpy()

                return ret_fn

            encoder = __create_encoderOrDecoder(model.encode)
            decoder = __create_encoderOrDecoder(model.decode)

        H_0, obs_idx = None, None

        if self.settings.REDUCED_SPACE == True:
            if self.settings.COMPRESSION_METHOD == "SVD":
                raise NotImplementedError(
                    "SVD in reduced space not implemented")

            self.settings.OBS_MODE = "all"

            observations, H_0, w_0, d = self.__get_obs_and_d_reduced_space(
                self.settings, self.u_c, u_0, encoder)

        else:
            observations, w_0, d, obs_idx = self.__get_obs_and_d_not_reduced(
                self.settings, self.u_c, u_0, encoder)

        #TODO - **maybe** get rid of this monstrosity...:
        #i.e. you could return a class that has these attributes:

        data = {
            "d": d,
            "G": H_0,
            "observations": observations,
            "model": model,
            "obs_idx": obs_idx,
            "encoder": encoder,
            "decoder": decoder,
            "u_c": self.u_c,
            "u_0": u_0,
            "X": X,
            "train_X": train_X,
            "test_X": test_X,
            "std": std,
            "mean": mean,
            "device": device
        }

        if w_0 is not None:
            data["w_0"] = w_0

        return data
    def __maybe_cross_val_lr(self, test_every, num_epochs_cv=8):
        if not num_epochs_cv:
            self.num_epochs_cv = 0
            return self.learning_rate
        elif self.num_epoch < num_epochs_cv:
            self.num_epochs_cv = self.num_epoch
        else:
            self.num_epochs_cv = num_epochs_cv

        mult = 1
        if self.settings.BATCH_NORM:  #i.e. generally larger learning_rate with BN
            mult = 5

        mult *= BATCH_MULT  #linear multiply by size of batch: https://arxiv.org/abs/1706.02677

        lrs_base = [0.0001, 0.0003, 0.001]
        lrs = [mult * x for x in lrs_base]

        res = []
        optimizers = []

        for idx, lr in enumerate(lrs):

            ML_utils.set_seeds()  #set seeds before init model
            self.model = ML_utils.load_model_from_settings(self.settings)
            self.optimizer = optim.Adam(self.model.parameters(), lr)
            test_losses = []
            train_losses = []
            print("learning rate:", lr)
            for epoch in range(self.start_epoch,
                               self.num_epochs_cv + self.start_epoch):
                self.epoch = epoch
                train, test = self.train_one_epoch(epoch, self.print_every,
                                                   test_every,
                                                   self.num_epochs_cv)
                if test:
                    test_losses.append(test)
                train_losses.append(train)

            df = pd.DataFrame(train_losses, columns=self.columns)
            train_final = df.tail(1).reconstruction_err

            res.append(train_final.values[0])
            optimizers.append(self.optimizer)

            #save model if best so far

            if res[-1] == min(res):
                best_test = test_losses
                best_train = train_losses
                best_idx = idx
                model_fp_new = "{}{}-{}.pth".format(self.model_dir, epoch, lr)
                torch.save(self.model.state_dict(), model_fp_new)
                best_model = self.model

        self.learning_rate = lrs[best_idx] * 0.8
        self.optimizer = optimizers[best_idx]
        self.model = best_model
        test_loss = best_test
        train_loss = best_train
        return self.learning_rate, train_loss, test_loss
예제 #16
0
 def test_set_seeds_raiseNameError(self):
     env = os.environ
     if env.get("SEED"):
         del env["SEED"]
     with pytest.raises(NameError):
         ML_utils.set_seeds()
예제 #17
0
    def run(self, print_every=10, print_small=True):

        shuffle = self.settings.SHUFFLE_DATA  #save value
        self.settings.SHUFFLE_DATA = False

        if self.settings.COMPRESSION_METHOD == "SVD":
            if self.settings.REDUCED_SPACE:
                raise NotImplementedError("Cannot have reduced space SVD")

            fp_base = self.settings.get_X_fp().split("/")[-1][1:]

            U = np.load(self.settings.INTERMEDIATE_FP + "U" + fp_base)
            s = np.load(self.settings.INTERMEDIATE_FP + "s" + fp_base)
            W = np.load(self.settings.INTERMEDIATE_FP + "W" + fp_base)

            num_modes = self.settings.get_number_modes()

            V_trunc = SVD.SVD_V_trunc(U, s, W, modes=num_modes)
            V_trunc_plus = SVD.SVD_V_trunc_plus(U, s, W, modes=num_modes)

            self.DA_pipeline = DAPipeline(self.settings)
            DA_data = self.DA_pipeline.data
            DA_data["V_trunc"] = V_trunc
            DA_data["V"] = None
            DA_data["w_0"] = V_trunc_plus @ DA_data.get("u_0").flatten()
            DA_data["V_grad"] = None

        elif self.settings.COMPRESSION_METHOD == "AE":
            if self.model is None:
                raise ValueError(
                    "Must provide an AE torch.nn model if settings.COMPRESSION_METHOD == 'AE'"
                )

            self.DA_pipeline = DAPipeline(self.settings, self.model)
            DA_data = self.DA_pipeline.data

            if self.reconstruction:
                encoder = DA_data.get("encoder")
                decoder = DA_data.get("decoder")

        else:
            raise ValueError(
                "settings.COMPRESSION_METHOD must be in ['AE', 'SVD']")

        self.settings.SHUFFLE_DATA = shuffle

        if self.reconstruction:
            L1 = torch.nn.L1Loss(reduction='sum')
            L2 = torch.nn.MSELoss(reduction="sum")

        totals = {
            "percent_improvement": 0,
            "ref_MAE_mean": 0,
            "da_MAE_mean": 0,
            "mse_DA": 0,
            "mse_ref": 0,
            "counts": 0,
            "l1_loss": 0,
            "l2_loss": 0,
            "time": 0,
            "time_online": 0
        }

        tot_DA_MAE = np.zeros_like(self.control_states[0]).flatten()
        tot_ref_MAE = np.zeros_like(self.control_states[0]).flatten()
        results = []

        if len(self.control_states.shape) in [1, 3]:
            raise ValueError("This is not batched control_state input")
        else:
            num_states = self.control_states.shape[0]

        for idx in range(num_states):
            u_c = self.control_states[idx]
            if self.settings.REDUCED_SPACE:
                self.DA_pipeline.data = VDAInit.provide_u_c_update_data_reduced_AE(
                    DA_data, self.settings, u_c)
            else:
                self.DA_pipeline.data = VDAInit.provide_u_c_update_data_full_space(
                    DA_data, self.settings, u_c)
            t1 = time.time()
            if self.settings.COMPRESSION_METHOD == "AE":
                DA_results = self.DA_pipeline.DA_AE(save_vtu=self.save_vtu)
            elif self.settings.COMPRESSION_METHOD == "SVD":
                DA_results = self.DA_pipeline.DA_SVD(save_vtu=self.save_vtu)
            t2 = time.time()
            t_tot = t2 - t1
            #print("time_online {:.4f}s".format(DA_results["time_online"]))

            if self.reconstruction:
                data_tensor = torch.Tensor(u_c)
                if self.settings.COMPRESSION_METHOD == "AE":
                    device = ML_utils.get_device()
                    #device = ML_utils.get_device(True, 1)

                    data_tensor = data_tensor.to(device)

                    data_hat = decoder(encoder(u_c))
                    data_hat = torch.Tensor(data_hat)
                    data_hat = data_hat.to(device)

                elif self.settings.COMPRESSION_METHOD == "SVD":

                    data_hat = SVD.SVD_reconstruction_trunc(
                        u_c, U, s, W, num_modes)

                    data_hat = torch.Tensor(data_hat)
                with torch.no_grad():
                    l1 = L1(data_hat, data_tensor)
                    l2 = L2(data_hat, data_tensor)
            else:
                l1, l2 = None, None

            result = {}
            result["percent_improvement"] = DA_results["percent_improvement"]
            result["ref_MAE_mean"] = DA_results["ref_MAE_mean"]
            result["da_MAE_mean"] = DA_results["da_MAE_mean"]
            result["counts"] = DA_results["counts"]
            result["mse_ref"] = DA_results["mse_ref"]
            result["mse_DA"] = DA_results["mse_DA"]
            if self.reconstruction:
                result["l1_loss"] = l1.detach().cpu().numpy()
                result["l2_loss"] = l2.detach().cpu().numpy()
            result["time"] = t2 - t1
            result["time_online"] = DA_results["time_online"]
            if self.save_vtu:
                tot_DA_MAE += DA_results.get("da_MAE")
                tot_ref_MAE += DA_results.get("ref_MAE")
            #add to results list (that will become a .csv)
            results.append(result)

            #add to aggregated dict results
            totals = self.__add_result_to_totals(result, totals)

            if idx % print_every == 0 and idx > 0:
                if not print_small:
                    print("idx:", idx)
                self.__print_totals(totals, idx + 1, print_small)
        if not print_small:
            print("------------")
        self.__print_totals(totals, num_states, print_small)
        if not print_small:
            print("------------")

        results_df = pd.DataFrame(results)
        if self.save_vtu:
            tot_DA_MAE /= num_states
            tot_ref_MAE /= num_states
            out_fp_ref = self.save_vtu_fp + "av_ref_MAE.vtu"
            out_fp_DA = self.save_vtu_fp + "av_da_MAE.vtu"
            fluidity.utils.save_vtu(self.settings, out_fp_ref, tot_ref_MAE)
            fluidity.utils.save_vtu(self.settings, out_fp_DA, tot_DA_MAE)

        #save to csv
        if self.csv_fp:
            results_df.to_csv(self.csv_fp)

        if self.plot:
            raise NotImplementedError(
                "plotting functionality not implemented yet")
        return results_df