def calculate_log_probability(self, input_values, reevaluate=True, for_gradient=False, include_parents=True, normalized=True): """ Method. It returns the log probability of the values given the model. This value is always 0 since the probability of a deterministic variable having its value is always 1. Args: values: Dictionary(brancher.Variable: chainer.Variable). A dictionary having the brancher.variables of the model as keys and chainer.Variables as values. This dictionary has to provide values for all variables of the model except for the deterministic variables. reevaluate: Bool. If false it returns the output of the latest call. It avoid unnecessary computations when multiple children variables ask for the log probability of the same paternt variable. Returns: torch.Tensor. The log probability of the input values given the model. """ if self._evaluated and not reevaluate: return 0. value = self._get_its_own_value_from_input(input_values, reevaluate) self._evaluated = True parameters_dict = self._get_parameters_from_input_values(input_values) log_probability = self.distribution.calculate_log_probability(value, **parameters_dict) parents_log_probability = sum([parent.calculate_log_probability(input_values, reevaluate, for_gradient, normalized=normalized) for parent in self.parents]) if self.is_observed: log_probability = log_probability.sum(dim=1, keepdim=True) if is_tensor(log_probability) and is_tensor(parents_log_probability): log_probability, parents_log_probability = partial_broadcast(log_probability, parents_log_probability) if include_parents: return log_probability + parents_log_probability else: return log_probability
def _get_sample(self, differentiable, **parameters): """ One line description Parameters ---------- Returns ------- Without replacement """ dataset = parameters["dataset"] if "indices" not in parameters: if "weights" in parameters: weights = parameters["weights"] p = np.array(weights).astype("float64") p = p/np.sum(p) else: p = None if is_tensor(dataset): if self.is_observed: dataset_size = dataset.shape[1] else: dataset_size = dataset.shape[2] else: dataset_size = len(dataset) if dataset_size < self.batch_size: raise ValueError("It is impossible to have more samples than the size of the dataset without replacement") if is_discrete(dataset): # indices = np.random.choice(range(dataset_size), size=self.batch_size, replace=False, p=p) else: number_samples = dataset.shape[0] indices = [np.random.choice(range(dataset_size), size=self.batch_size, replace=False, p=p) for _ in range(number_samples)] else: indices = parameters["indices"] if is_tensor(dataset): if isinstance(indices, list) and isinstance(indices[0], np.ndarray): if self.is_observed: sample = torch.cat([dataset[n, k, :].unsqueeze(dim=0) for n, k in enumerate(indices)], dim=0) else: sample = torch.cat([dataset[n, :, k, :].unsqueeze(dim=0) for n, k in enumerate(indices)], dim=0) elif isinstance(indices, list) and isinstance(indices[0], (int, np.int32, np.int64)): if self.is_observed: sample = dataset[:, indices, :] else: sample = dataset[:, :, indices, :] else: raise IndexError("The indices of an empirical variable should be either a list of integers or a list of arrays") else: sample = list(np.array(dataset)[indices]) return sample
def _get_entropy(self, **parameters): if "weights" in parameters: probs = parameters["weights"] else: if is_tensor(parameters["dataset"]): n = int(parameters["dataset"].shape[0]) else: n = len(parameters["dataset"]) probs = torch.Tensor(np.ones((n, ))).float().to(device) return distributions.categorical.Categorical(probs=probs).entropy()
def update_locations(self): if isinstance(self.particles, list): if is_tensor(self.particles[0]): self.locations = [part.data for part in self.particles] elif isinstance(self.particles[0], DeterministicVariable): self.locations = [part.value[0, 0, :].data for part in self.particles] else: raise ValueError("The location of the particles should be either deterministic brancher variables, chainer variables or np.array") else: raise ValueError("The location of the particles should be inserted as a list of locations")
def reformat_value(value, index): if is_tensor(value): if np.prod(value[index, :].shape) == 1: return float(value[index, :].cpu().detach().numpy()) elif value.shape[1] == 1: return value[index, :].cpu().detach().numpy()[0, :] else: return value.cpu().detach().numpy() elif isinstance(value, Iterable): return map_iterable(lambda x: reformat_value(x, index), value) else: return value
def _apply_link(self, parents_values): number_samples, number_datapoints = get_number_samples_and_datapoints(parents_values) cont_values, discrete_values = split_dict(parents_values, condition=lambda key, val: not is_discrete(val) or contains_tensors(val)) reshaped_dict = discrete_values if cont_values: reshaped_dict.update(map_iterable(lambda x: broadcast_and_reshape_parent_value(x, number_samples, number_datapoints), cont_values, recursive=True)) reshaped_output = self.link(reshaped_dict) cast_to_new_shape = lambda tensor: tensor.view(size=(number_samples, number_datapoints) + tensor.shape[1:]) output = {key: cast_to_new_shape(val) if is_tensor(val) else map_iterable(cast_to_new_shape, val) if contains_tensors(val) else val for key, val in reshaped_output.items()} return output
def __getitem__(self, key): if isinstance(key, collections.Iterable) and all([isinstance(k, int) for k in key]): variable_slice = (slice(None, None, None), *key) elif isinstance(key, int): variable_slice = (slice(None, None, None), key) elif isinstance(key, collections.Hashable): variable_slice = key else: raise ValueError("The input to __getitem__ is neither numeric nor a hashabble key") vars = self.vars fn = lambda values: self.fn(values)[variable_slice] if is_tensor(self.fn(values)) \ else self.fn(values)[key] links = set() return PartialLink(vars=vars, fn=fn, links=self.links)
def _apply_link( self, parents_values ): #TODO: This is for allowing discrete data, temporary? (for julia) #For Julia: Very important method cont_values, discrete_values = split_dict( parents_values, condition=lambda key, val: not is_discrete(val)) if cont_values: reshaped_dict, number_samples, number_datapoints = broadcast_parent_values( cont_values) reshaped_dict.update(discrete_values) else: reshaped_dict = discrete_values reshaped_output = self.link(reshaped_dict) output = { key: val.view(size=(number_samples, number_datapoints) + val.shape[1:]) if is_tensor(val) else val for key, val in reshaped_output.items() } return output
def truncated_calculate_log_probability(rv_values, for_gradient=False, normalized=True): unnormalized_log_probability = model.calculate_log_probability( rv_values, normalized=normalized, for_gradient=for_gradient) if not normalized: return unnormalized_log_probability else: if for_gradient: nondiff_values = { var: value.detach() if is_tensor(value) else value for var, value in rv_values.items() } normalization = -model.calculate_log_probability( nondiff_values, for_gradient=False, normalized=True).mean() return unnormalized_log_probability + normalization else: number_samples = list(rv_values.values())[0].shape[0] acceptance_ratio = get_acceptance_probability( number_samples=number_samples) return unnormalized_log_probability - np.log(acceptance_ratio)
def get_sample(self, dataset, indices, number_samples, weights=None): """ One line description Parameters ---------- Returns ------- Without replacement """ if not indices: if weights: p = np.array(weights).astype("float64") p = p / np.sum(p) else: p = None if is_tensor(dataset): if self.is_observed: dataset_size = dataset.shape[1] else: dataset_size = dataset.shape[2] else: dataset_size = len(dataset) if dataset_size < self.batch_size: raise ValueError( "It is impossible to have more samples than the size of the dataset without replacement" ) if is_discrete( dataset ): # TODO: This is for allowing discrete data, temporary? indices = np.random.choice(range(dataset_size), size=self.batch_size, replace=False, p=p) else: indices = [ np.random.choice(range(dataset_size), size=self.batch_size, replace=False, p=p) for _ in range(number_samples) ] if is_tensor(dataset): if isinstance(indices, list) and isinstance( indices[0], np.ndarray): if self.is_observed: # sample = F.concat([F.expand_dims(dataset[n, k, :], axis=0) for n, k in enumerate(indices)], axis=0) sample = torch.cat([ dataset[n, k, :].unsqueeze(dim=0) for n, k in enumerate(indices) ], dim=0) else: # sample = F.concat([F.expand_dims(dataset[n, :, k, :], axis=0) for n, k in enumerate(indices)], axis=0) sample = torch.cat([ dataset[n, :, k, :].unsqueeze(dim=0) for n, k in enumerate(indices) ], dim=0) elif isinstance(indices, list) and isinstance( indices[0], (int, np.int32, np.int64)): if self.is_observed: sample = dataset[:, indices, :] else: sample = dataset[:, :, indices, :] else: raise IndexError( "The indices of an empirical variable should be either a list of integers or a list of arrays" ) else: sample = list( np.array(dataset)[indices] ) # TODO: This is for allowing discrete data, temporary? For julia return sample