def update(self, data): # create the input_data tensor data_loader = build_data_loader(data) input_data = self.create_input_data_tensor(data_loader) t = [] sess = util.get_session() for i in range(self.epochs): for j in range(self.batches): # evaluate the data tensor to get an evaluated one which can be used to observe varoables local_input_data = sess.run(input_data) # reshape data in case it does not match exactly with the shape used when building the random variable # i.e.: (..., 1) dimension clean_local_input_data = {k: np.reshape(v, self.expanded_variables["p"][k].observed_value.shape.as_list()) for k, v in local_input_data.items()} with contextmanager.observe(self.expanded_variables["p"], clean_local_input_data): with contextmanager.observe(self.expanded_variables["q"], clean_local_input_data): sess.run(self.train_tensor) t.append(sess.run(self.debug.loss_tensor)) if j == 0 and i % 200 == 0: print("\n {} epochs\t {}".format(i, t[-1]), end="", flush=True) if j == 0 and i % 20 == 0: print(".", end="", flush=True) # set the protected _losses attribute for the losses property self.debug.losses += t
def run(self, pmodel, sample_dict): # NOTE: right now we use a session in a with context, so it is open and close. # If we want to use consecutive inference, we need the same session to reuse the same variables. # In this case, the build_in_session function from RandomVariables should not be used. # get the plate size plate_size = util.iterables.get_plate_size(pmodel.vars, sample_dict) # Create the loss function tensor loss_tensor = self.loss_fn(pmodel, self.qmodel, plate_size=plate_size) train = self.optimizer.minimize(loss_tensor) t = [] sess = inf.get_session() # Initialize all variables which are not in the probmodel p, because they have been initialized before model_variables = [ v for v in itertools.chain(pmodel.params.values(), ( pmodel._last_expanded_params or {}).values(), ( pmodel._last_fitted_params or {} ).values(), self.qmodel.params.values(), ( self.qmodel._last_expanded_params or {}).values(), ( self.qmodel._last_fitted_params or {}).values()) ] sess.run( tf.variables_initializer([ v for v in tf.global_variables() if v not in model_variables and not v.name.startswith("inferpy-") ])) with contextmanager.observe(pmodel._last_expanded_vars, sample_dict): with contextmanager.observe(self.qmodel._last_expanded_vars, sample_dict): for i in range(self.epochs): sess.run(train) t.append(sess.run(loss_tensor)) if i % 200 == 0: print("\n {} epochs\t {}".format(i, t[-1]), end="", flush=True) if i % 10 == 0: print(".", end="", flush=True) # set the private __losses attribute for the losses property self.__losses = t return self.qmodel._last_expanded_vars, self.qmodel._last_expanded_params
def log_prob(self, data): """ Computes the log probabilities of a (set of) sample(s)""" with contextmanager.observe(self.vars, data): return { k: self.vars[k].log_prob(tf.convert_to_tensor(v)) for k, v in data.items() }
def log_prob(self): """ Computes the log probabilities of a (set of) sample(s)""" with util.interceptor.enable_interceptor( *self.enable_interceptor_variables): with contextmanager.observe(self.observed_variables, self.data): result = util.runtime.try_run({ k: v.log_prob(v.value) for k, v in self.target_variables.items() }) return result
def update(self, data): # data must be a sample dictionary sample_dict = build_sample_dict(data) sample_dict["x"].shape # ensure that the size of the data matches with the self.plate_size data_size = util.iterables.get_plate_size(self.pmodel.vars, sample_dict) if data_size != self.plate_size: raise ValueError( "The size of the data must be equal to the plate size: {}". format(self.plate_size)) t = [] sess = util.get_session() # reshape data in case it does not match exactly with the shape used when building the random variable # i.e.: (..., 1) dimension clean_sample_dict = { k: np.reshape( v, self.expanded_variables["p"][k].observed_value.shape.as_list()) for k, v in sample_dict.items() } with contextmanager.observe(self.expanded_variables["p"], clean_sample_dict): with contextmanager.observe(self.expanded_variables["q"], clean_sample_dict): for i in range(self.epochs): sess.run(self.train_tensor) t.append(sess.run(self.debug.loss_tensor)) if i % 200 == 0: print("\n {} epochs\t {}".format(i, t[-1]), end="", flush=True) if i % 10 == 0: print(".", end="", flush=True) # set the protected _losses attribute for the losses property self.debug.losses += t
def post_predictive_sample(self): """ Sample from the posterior predictive distribution, i.e., from the observed variables with the inferred posterior fixed to the model. :return: dictionary of samples with an entry for each observed variable. """ sess = util.session.get_session() post = {k: sess.run(v.loc) for k, v in self.posterior.items()} with contextmanager.observe(self.posterior, post): samples = { var: self._last_expanded_vars[var].sample() for var in self.vars.keys() if var not in self.posterior.keys() } return samples
def parameters(self, names=None): """ Return the parameters of the Random Variables of the model. If `names` is None, then return all the parameters of all the Random Variables. If `names` is a list, then return the parameters specified in the list (if exists) for all the Random Variables. If `names` is a dict, then return all the parameters specified (value) for each Random Variable (key). Note: If `tf_run=True`, but any of the returned parameters is not a Tensor and therefore cannot be evaluated) this returns a not evaluated dict (because the evaluation will raise an Exception) Args: names: A list, a dict or None. Specify the parameters for the Random Variables to be obtained. Returns: A dict, where the keys are the names of the Random Variables and the values a dict of parameters (name-value) """ # argument type checking if not (names is None or isinstance(names, (list, dict))): raise TypeError( "The argument 'names' must be None, a list or a dict, not {}.". format(type(names))) # now we can assume that names is None, a list or a dict # function to filter the parameters for each Random Variable def filter_parameters(varname, parameters): parameter_names = list(parameters.keys()) if names is None: # use all the parameters selected_parameters = parameter_names else: # filter by names; if is a dict and key not in, use all the parameters selected_parameters = set(names if isinstance(names, list) else names.get(varname, parameters)) return { k: util.runtime.try_run(v) for k, v in parameters.items() if k in selected_parameters } with contextmanager.observe(self.observed_variables, self.data): result = { k: filter_parameters(k, v.parameters) for k, v in self.target_variables.items() } return result
def sample(self, size=1): """ Generates a sample for eache variable in the model """ with util.interceptor.enable_interceptor( *self.enable_interceptor_variables): with contextmanager.observe(self.observed_variables, self.data): # each iteration for `size` run the dict in the session, so if there are dependencies among random vars # they are computed in the same graph operations, and reflected in the results samples = [ util.runtime.try_run(self.target_variables) for _ in range(size) ] if size == 1: result = samples[0] else: # compact all samples in one single dict result = { k: np.array([sample[k] for sample in samples]) for k in self.target_variables.keys() } return result
def predict(self, observations={}): # TODO: this function is under design. Should not be used as it is right now. sess = util.session.get_session() with contextmanager.observe(self.posterior, observations): return sess.run({k: v for k, v in self.posterior.items()})
def run(self, pmodel, sample_dict): # create a tf dataset and an iterator, specifying the batch size plate_size = util.iterables.get_plate_size(pmodel.vars, sample_dict) batches = int(plate_size / self.batch_size) # M/N batch_weight = self.batch_size / plate_size # N/M tfdataset = ( tf.data.Dataset.from_tensor_slices(sample_dict).shuffle( plate_size ) # use the size of the complete dataset for shuffle buffer, so we use a perfect shuffle .batch( self.batch_size, drop_remainder=True ) # discard the remainder batch with less elements if exists .repeat()) iterator = tfdataset.make_one_shot_iterator() input_data = iterator.get_next( ) # each time this tensor is evaluated in a session it contains new data # Create the loss function tensor loss_tensor = self.loss_fn(pmodel, self.qmodel, plate_size=self.batch_size, batch_weight=batch_weight) train = self.optimizer.minimize(loss_tensor) t = [] sess = inf.get_session() # Initialize all variables which are not in the probmodel p, because they have been initialized before model_variables = set([ v for v in itertools.chain(pmodel.params.values(), ( pmodel._last_expanded_params or {}).values(), ( pmodel._last_fitted_params or {} ).values(), self.qmodel.params.values(), ( self.qmodel._last_expanded_params or {}).values(), ( self.qmodel._last_fitted_params or {}).values()) ]) sess.run( tf.variables_initializer([ v for v in tf.global_variables() if v not in model_variables and not v.name.startswith("inferpy-") ])) for i in range(self.epochs): for j in range(batches): # evaluate the data tensor to get an evaluated one which can be used to observe varoables local_input_data = sess.run(input_data) with contextmanager.observe(pmodel._last_expanded_vars, local_input_data): with contextmanager.observe( self.qmodel._last_expanded_vars, local_input_data): sess.run(train) t.append(sess.run(loss_tensor)) if i % 200 == 0: print("\n {} epochs\t {}".format(i, t[-1]), end="", flush=True) if i % 20 == 0: print(".", end="", flush=True) # set the private __losses attribute for the losses property self.__losses = t return self.qmodel._last_expanded_vars, self.qmodel._last_expanded_params