def __init__(self, simulator, quick_scale, noise_model, sparse, init_mode): if noise_model is None: raise ValueError("noise_model is None") else: if noise_model == "nb": from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM elif noise_model == "norm": from batchglm.api.models import Estimator, InputDataGLM elif noise_model == "beta": from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM else: raise ValueError("noise_model not recognized") batch_size = 2000 provide_optimizers = { "gd": True, "adam": True, "adagrad": True, "rmsprop": True, "nr": True, "nr_tr": True, "irls": noise_model in ["nb", "norm"], "irls_gd": noise_model in ["nb", "norm"], "irls_tr": noise_model in ["nb", "norm"], "irls_gd_tr": noise_model in ["nb", "norm"] } if sparse: input_data = InputDataGLM( data=scipy.sparse.csr_matrix(simulator.input_data.x), design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale, design_loc_names=simulator.input_data.design_loc_names, design_scale_names=simulator.input_data.design_scale_names, constraints_loc=simulator.input_data.constraints_loc, constraints_scale=simulator.input_data.constraints_scale, size_factors=simulator.input_data.size_factors, as_dask=False) else: input_data = InputDataGLM( data=simulator.input_data.x, design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale, design_loc_names=simulator.input_data.design_loc_names, design_scale_names=simulator.input_data.design_scale_names, constraints_loc=simulator.input_data.constraints_loc, constraints_scale=simulator.input_data.constraints_scale, size_factors=simulator.input_data.size_factors, as_dask=False) self.estimator = Estimator(input_data=input_data, batch_size=batch_size, quick_scale=quick_scale, provide_optimizers=provide_optimizers, provide_batched=True, provide_fim=noise_model in ["nb", "norm"], provide_hessian=True, init_a=init_mode, init_b=init_mode) self.sim = simulator
def get_hessians(self, input_data: InputDataGLM): if self.noise_model is None: raise ValueError("noise_model is None") else: if self.noise_model == "nb": from batchglm.api.models.tf1.glm_nb import Estimator elif self.noise_model == "norm": from batchglm.api.models import Estimator elif self.noise_model == "beta": from batchglm.api.models.tf1.glm_beta import Estimator else: raise ValueError("noise_model not recognized") provide_optimizers = { "gd": True, "adam": True, "adagrad": True, "rmsprop": True, "nr": False, "nr_tr": False, "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False } estimator = Estimator(input_data=input_data, quick_scale=False, provide_optimizers=provide_optimizers, provide_fim=False, provide_hessian=False, init_a="standard", init_b="standard") estimator.initialize() estimator_store = estimator.finalize() return -estimator_store.fisher_inv
def __init__(self, simulator, quick_scale, algo, batched, noise_model, sparse): if noise_model is None: raise ValueError("noise_model is None") else: if noise_model == "nb": from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM elif noise_model == "norm": from batchglm.api.models import Estimator, InputDataGLM elif noise_model == "beta": from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM else: raise ValueError("noise_model not recognized") batch_size = 200 provide_optimizers = { "gd": False, "adam": False, "adagrad": False, "rmsprop": False, "nr": False, "nr_tr": False, "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False } provide_optimizers[algo.lower()] = True if sparse: input_data = InputDataGLM( data=scipy.sparse.csr_matrix(simulator.input_data.x), design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale) else: input_data = InputDataGLM( data=simulator.input_data.x, design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale) estimator = Estimator(input_data=input_data, batch_size=batch_size, quick_scale=quick_scale, provide_optimizers=provide_optimizers, provide_batched=batched, optim_algos=[algo.lower()]) self.estimator = estimator self.sim = simulator self.algo = algo.lower()
def get_jacs( self, input_data: InputDataGLM ): if self.noise_model is None: raise ValueError("noise_model is None") else: if self.noise_model == "nb": from batchglm.api.models.tf1.glm_nb import Estimator elif self.noise_model == "norm": from batchglm.api.models import Estimator elif self.noise_model == "beta": from batchglm.api.models.tf1.glm_beta import Estimator else: raise ValueError("noise_model not recognized") provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True, "nr": False, "nr_tr": False, "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} estimator = Estimator( input_data=input_data, quick_scale=False, provide_optimizers=provide_optimizers, provide_fim=False, provide_hessian=False, init_a="standard", init_b="standard" ) estimator.initialize() # Do not train, evaluate at initialization! estimator.train_sequence(training_strategy=[ { "convergence_criteria": "step", "stopping_criteria": 0, "use_batching": False, "optim_algo": "gd", "train_mu": False, "train_r": False }, ]) estimator_store = estimator.finalize() return estimator_store.gradients.values
class _TestAccuracyGlmAllEstim: def __init__(self, simulator, quick_scale, noise_model, sparse, init_mode): if noise_model is None: raise ValueError("noise_model is None") else: if noise_model == "nb": from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM elif noise_model == "norm": from batchglm.api.models import Estimator, InputDataGLM elif noise_model == "beta": from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM else: raise ValueError("noise_model not recognized") batch_size = 2000 provide_optimizers = { "gd": True, "adam": True, "adagrad": True, "rmsprop": True, "nr": True, "nr_tr": True, "irls": noise_model in ["nb", "norm"], "irls_gd": noise_model in ["nb", "norm"], "irls_tr": noise_model in ["nb", "norm"], "irls_gd_tr": noise_model in ["nb", "norm"] } if sparse: input_data = InputDataGLM( data=scipy.sparse.csr_matrix(simulator.input_data.x), design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale, design_loc_names=simulator.input_data.design_loc_names, design_scale_names=simulator.input_data.design_scale_names, constraints_loc=simulator.input_data.constraints_loc, constraints_scale=simulator.input_data.constraints_scale, size_factors=simulator.input_data.size_factors, as_dask=False) else: input_data = InputDataGLM( data=simulator.input_data.x, design_loc=simulator.input_data.design_loc, design_scale=simulator.input_data.design_scale, design_loc_names=simulator.input_data.design_loc_names, design_scale_names=simulator.input_data.design_scale_names, constraints_loc=simulator.input_data.constraints_loc, constraints_scale=simulator.input_data.constraints_scale, size_factors=simulator.input_data.size_factors, as_dask=False) self.estimator = Estimator(input_data=input_data, batch_size=batch_size, quick_scale=quick_scale, provide_optimizers=provide_optimizers, provide_batched=True, provide_fim=noise_model in ["nb", "norm"], provide_hessian=True, init_a=init_mode, init_b=init_mode) self.sim = simulator def estimate(self, algo, batched, acc, lr): self.estimator.initialize() self.estimator.train_sequence(training_strategy=[ { "learning_rate": lr, "convergence_criteria": "all_converged", "stopping_criteria": acc, "use_batching": batched, "optim_algo": algo, }, ]) def eval_estimation(self, batched, train_loc, train_scale): if batched: threshold_dev_a = 0.4 threshold_dev_b = 0.4 threshold_std_a = 2 threshold_std_b = 2 else: threshold_dev_a = 0.2 threshold_dev_b = 0.2 threshold_std_a = 1 threshold_std_b = 1 success = True if train_loc: mean_rel_dev_a = np.mean( (self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) std_rel_dev_a = np.std( (self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) logging.getLogger("batchglm").info("mean_rel_dev_a %f" % mean_rel_dev_a) logging.getLogger("batchglm").info("std_rel_dev_a %f" % std_rel_dev_a) if np.abs(mean_rel_dev_a ) > threshold_dev_a or std_rel_dev_a > threshold_std_a: success = False if train_scale: mean_rel_dev_b = np.mean( (self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) std_rel_dev_b = np.std( (self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) logging.getLogger("batchglm").info("mean_rel_dev_b %f" % mean_rel_dev_b) logging.getLogger("batchglm").info("std_rel_dev_b %f" % std_rel_dev_b) if np.abs(mean_rel_dev_b ) > threshold_dev_b or std_rel_dev_b > threshold_std_b: success = False return success