def _log_likelihood_inner(self, i_batch, params, dsetname, autograph): # Does for loop over datasets and sources, not batches # Sum over sources is first in likelihood # Compute differential rates from all sources # drs = list[n_sources] of [n_events] tensors drs = tf.zeros((self.batch_size[dsetname],), dtype=fd.float_type()) for sname, s in self.sources.items(): if not self.d_for_s[sname] == dsetname: continue rate_mult = self._get_rate_mult(sname, params) dr = s.differential_rate(s.data_tensor[i_batch], autograph=autograph, **self._filter_source_kwargs(params, sname)) drs += dr * rate_mult # Sum over events and remove padding n = tf.where(tf.equal(i_batch, tf.constant(self.n_batches[dsetname] - 1, dtype=fd.int_type())), self.batch_size[dsetname] - self.n_padding[dsetname], self.batch_size[dsetname]) ll = tf.reduce_sum(tf.math.log(drs[:n])) # Add mu once (to the first batch) # and constraint really only once (to first batch of first dataset) ll += tf.where(tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())), -self.mu(dsetname, **params) + (self.log_constraint(**params) if dsetname == self.dsetnames[0] else 0.), 0.) return ll
def log_likelihood(self, autograph=True, second_order=False, omit_grads=tuple(), **kwargs): if second_order: # Compute the likelihood, jacobian and hessian # Use only non-tf.function version, in principle works with # but this leads to very long tracing times and we only need # hessian once f = self._log_likelihood_grad2 else: # Computes the likelihood and jacobian f = self._log_likelihood_tf if autograph else self._log_likelihood params = self.prepare_params(kwargs) n_grads = len(self.param_defaults) - len(omit_grads) ll = tf.constant(0., dtype=fd.float_type()) llgrad = tf.zeros(n_grads, dtype=fd.float_type()) llgrad2 = tf.zeros((n_grads, n_grads), dtype=fd.float_type()) for dsetname in self.dsetnames: for i_batch in tf.range(self.n_batches[dsetname], dtype=fd.int_type()): v = f(i_batch, dsetname, autograph, omit_grads=omit_grads, **params) ll += v[0] llgrad += v[1] if second_order: llgrad2 += v[2] if second_order: return ll, llgrad, llgrad2 return ll, llgrad
def log_likelihood(self, second_order=False, omit_grads=tuple(), **kwargs): params = self.prepare_params(kwargs) n_grads = len(self.param_defaults) - len(omit_grads) ll = 0. llgrad = np.zeros(n_grads, dtype=np.float64) llgrad2 = np.zeros((n_grads, n_grads), dtype=np.float64) for dsetname in self.dsetnames: # Getting this from the batch_info tensor is much slower n_batches = self.sources[self.sources_in_dset[dsetname] [0]].n_batches for i_batch in range(n_batches): # Iterating over tf.range seems much slower! results = self._log_likelihood( tf.constant(i_batch, dtype=fd.int_type()), dsetname=dsetname, data_tensor=self.data_tensors[dsetname][i_batch], batch_info=self.batch_info, omit_grads=omit_grads, second_order=second_order, **params) ll += results[0].numpy().astype(np.float64) if len(self.param_names): llgrad += results[1].numpy().astype(np.float64) if second_order: llgrad2 += results[2].numpy().astype(np.float64) if second_order: return ll, llgrad, llgrad2 return ll, llgrad, None
def _fetch(self, x, data_tensor=None): """Return a tensor column from the original dataframe (self.data) :param x: column name :param data_tensor: Data tensor, columns as in self.name_id """ if data_tensor is None: # We're inside annotate, just return the column return fd.np_to_tf(self.data[x].values) col_id = tf.dtypes.cast(self.name_id.lookup(tf.constant(x)), fd.int_type()) # if i_batch is None: # return tf.reshape(self.data_tensor[:,:,col_id], [-1]) # else: return data_tensor[:, col_id]
def _log_likelihood_inner(self, i_batch, params, dsetname, data_tensor, batch_info): """Return log likelihood contribution of one batch in a dataset This loops over sources in the dataset and events in the batch, but not not over datasets or batches. """ # Retrieve batching info. Cannot use tuple-unpacking, tensorflow # doesn't like it when you iterate over tenstors dataset_index = self.dsetnames.index(dsetname) n_batches = batch_info[dataset_index, 0] batch_size = batch_info[dataset_index, 1] n_padding = batch_info[dataset_index, 2] # Compute differential rates from all sources # drs = list[n_sources] of [n_events] tensors drs = tf.zeros((batch_size, ), dtype=fd.float_type()) for source_i, sname in enumerate(self.sources_in_dset[dsetname]): s = self.sources[sname] rate_mult = self._get_rate_mult(sname, params) col_start, col_stop = self.column_indices[dsetname][source_i] dr = s.differential_rate( data_tensor[:, col_start:col_stop], # We are already tracing; if we call the traced function here # it breaks the Hessian (it will give NaNs) autograph=False, **self._filter_source_kwargs(params, sname)) drs += dr * rate_mult # Sum over events and remove padding n = tf.where(tf.equal(i_batch, n_batches - 1), batch_size - n_padding, batch_size) ll = tf.reduce_sum(tf.math.log(drs[:n])) # Add mu once (to the first batch) # and constraint really only once (to first batch of first dataset) ll += tf.where( tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())), -self.mu(dsetname, **params) + (self.log_constraint( **params) if dsetname == self.dsetnames[0] else 0.), 0.) return ll
def _fetch_param(self, param, ptensor): if ptensor is None: return self.defaults[param] id = tf.dtypes.cast(self.param_id[param], dtype=fd.int_type()) return ptensor[id]
def _fetch_param(self, param, ptensor): if ptensor is None: return self.defaults[param] id = tf.dtypes.cast(self.param_id.lookup(tf.constant(param)), dtype=fd.int_type()) return ptensor[id]
def set_data(self, data: ty.Union[pd.DataFrame, ty.Dict[str, pd.DataFrame]]): """set new data for sources in the likelihood. Data is passed in the same format as for __init__ Data can contain any subset of the original data keys to only update specific datasets. """ if isinstance(data, pd.DataFrame): assert len(self.dsetnames) == 1, \ "You passed one DataFrame but there are multiple datasets" data = {DEFAULT_DSETNAME: data} is_none = [d is None for d in data.values()] if any(is_none): if not all(is_none): warnings.warn( "Cannot set only one dataset to None: " "setting all to None instead.", UserWarning) for s in self.sources.values(): s.set_data(None) return batch_info = np.zeros((len(self.dsetnames), 3), dtype=np.int) for sname, source in self.sources.items(): dname = self.dset_for_source[sname] if dname not in data: warnings.warn(f"Dataset {dname} not provided in set_data") continue # Copy ensures annotations don't clobber source.set_data(deepcopy(data[dname])) dset_index = self.dsetnames.index(dname) batch_info[dset_index, :] = [ source.n_batches, source.batch_size, source.n_padding ] self.batch_info = tf.convert_to_tensor(batch_info, dtype=fd.int_type()) # Build a big data tensor for each dataset. # Each source has an [n_batches, batch_size, n_columns] tensor. # Since the number of columns are different, we must concat along # axis=2 and track which indices belong to which source. self.data_tensors = { dsetname: tf.concat([ self.sources[sname].data_tensor for sname in self.sources_in_dset[dsetname] ], axis=2) for dsetname in self.dsetnames } self.column_indices = dict() for dsetname in self.dsetnames: # Do not use len(cols_to_cache), some sources have extra columns... stop_idx = np.cumsum([ self.sources[sname].data_tensor.shape[2] for sname in self.sources_in_dset[dsetname] ]) self.column_indices[dsetname] = np.transpose( [np.concatenate([[0], stop_idx[:-1]]), stop_idx])
def energy_spectrum(self, i_batch): """Return (energies in keV, events at these energies) """ batch = tf.dtypes.cast(i_batch[0], dtype=fd.int_type()) return self.all_es_centers, self.energy_tensor[batch, :, :]
def spatial_rate_mult(self, i_batch): batch = tf.dtypes.cast(i_batch[0], dtype=fd.int_type()) return self.spatial_rate_tensor[batch]