class LFIRE(ParameterInference): """Likelihood-Free Inference by Ratio Estimation (LFIRE). For a describtion of the LFIRE, see e.g. Thomas et al. 2018. References ---------- O. Thomas, R. Dutta, J. Corander, S. Kaski, and M. U. Gutmann, Likelihood-Free Inference by Ratio Estimation, arXiv preprint arXiv:1611.10242, 2018. """ def __init__(self, model, params_grid, marginal=None, logreg_config=None, output_names=None, parallel_cv=True, seed_marginal=None, **kwargs): """Initializes LFIRE. Parameters ---------- model: ElfiModel The elfi graph used by the algorithm. params_grid: np.ndarray A grid over which posterior values are evaluated. marginal: np.ndarray, optional Marginal data. logreg_config: dict, optional A config dictionary for logistic regression. output_names: list, optional Names of the nodes whose outputs are included in the batches. parallel_cv: bool, optional Either cross-validation or elfi can be run in parallel. batch_size: int, optional A size of training data. seed_marginal: int, optional Seed for marginal data generation. kwargs: See InferenceMethod. """ super(LFIRE, self).__init__(model, output_names, **kwargs) self.summary_names = self._get_summary_names() if len(self.summary_names) == 0: raise NotImplementedError( 'Your model must have at least one Summary node.') self.params_grid = self._resolve_params_grid(params_grid) self.marginal = self._resolve_marginal(marginal, seed_marginal) self.observed = self._get_observed_summary_values() self.joint_prior = ModelPrior(self.model) self.logreg_config = self._resolve_logreg_config( logreg_config, parallel_cv) self._resolve_elfi_client(parallel_cv) n_batches = self.params_grid.shape[0] self.state['posterior'] = np.empty(n_batches) self.state['lambda'] = np.empty(n_batches) self.state['coef'] = np.empty((n_batches, self.observed.shape[1])) self.state['intercept'] = np.empty(n_batches) self.state['infinity'] = { parameter_name: [] for parameter_name in self.parameter_names } for parameter_name in self.parameter_names: self.state[parameter_name] = np.empty(n_batches) def set_objective(self): """Sets objective for inference.""" self.objective['n_batches'] = self.params_grid.shape[0] self.objective['n_sim'] = self.params_grid.shape[0] * self.batch_size def extract_result(self): """Extracts the result from the current state. Returns ------- LFIREPosterior """ return LFIREPosterior(method_name='LFIRE', outputs=self.state, parameter_names=self.parameter_names) def update(self, batch, batch_index): """Updates the inference state with a new batch and performs LFIRE. Parameters ---------- batch: dict batch_index: int """ # TODO: beautify this super(LFIRE, self).update(batch, batch_index) # Parse likelihood values likelihood = [ batch[summary_name] for summary_name in self.summary_names ] likelihood = np.column_stack(likelihood) # Create training data X = np.vstack((likelihood, self.marginal)) y = np.concatenate((np.ones(likelihood.shape[0]), -1 * np.ones(self.marginal.shape[0]))) # Logistic regression m = LogitNet(**self.logreg_config) m.fit(X, y) # Likelihood value log_likelihood_value = m.intercept_ + np.sum( np.multiply(m.coef_, self.observed)) likelihood_value = np.exp(log_likelihood_value) # Joint prior value parameter_values = [ batch[parameter_name] for parameter_name in self.parameter_names ] joint_prior_value = self.joint_prior.pdf(parameter_values) # Posterior value posterior_value = joint_prior_value * likelihood_value # Check if posterior value is non-finite if np.isinf(posterior_value): params = self.params_grid[batch_index] warnings.warn( f'Posterior value is not finite for parameters \ {self.parameter_names} = {params} and thus will be replaced with zero!', RuntimeWarning) posterior_value = 0 for i, parameter_name in enumerate(self.parameter_names): self.state['infinity'][parameter_name] += [params[i]] # Update state dictionary self.state['posterior'][batch_index] = posterior_value self.state['lambda'][batch_index] = m.lambda_best_ self.state['coef'][batch_index, :] = m.coef_ self.state['intercept'][batch_index] = m.intercept_ for parameter_name in self.parameter_names: self.state[parameter_name][batch_index] = batch[parameter_name] def prepare_new_batch(self, batch_index): """Prepares a new batch for elfi. Parameters ---------- batch_index: int Returns ------- dict """ params = self.params_grid[batch_index] names = self.parameter_names batch = {p: params[i] for i, p in enumerate(names)} return batch def _resolve_params_grid(self, params_grid): """Resolves parameters grid. Parameters ---------- params_grid: np.ndarray Returns ------- np.ndarray """ if isinstance(params_grid, np.ndarray) and len(params_grid.shape) == 2: return params_grid else: raise TypeError('params_grid must be 2d numpy array.') def _resolve_marginal(self, marginal, seed_marginal=None): """Resolves marginal data. Parameters ---------- marginal: np.ndarray seed_marginal: int, optional Returns ------- np.ndarray """ if marginal is None: marginal = self._generate_marginal(seed_marginal) x, y = marginal.shape logger.info(f'New marginal data ({x} x {y}) are generated.') return marginal elif isinstance(marginal, np.ndarray) and len(marginal.shape) == 2: return marginal else: raise TypeError('marginal must be 2d numpy array.') def _generate_marginal(self, seed_marginal=None): """Generates marginal data. Parameters ---------- seed_marginal: int, optional Returns ------- np.ndarray """ if seed_marginal is None: batch = self.model.generate(self.batch_size) else: batch = self.model.generate(self.batch_size, seed=seed_marginal) marginal = [batch[summary_name] for summary_name in self.summary_names] marginal = np.column_stack(marginal) return marginal def _get_summary_names(self): """Gets the names of summary statistics. Returns ------- list """ summary_names = [] for node in self.model.nodes: if isinstance(self.model[node], Summary) and not node.startswith('_'): summary_names.append(node) return summary_names def _get_observed_summary_values(self): """Gets observed values for summary statistics. Returns ------- np.ndarray """ observed_ss = [ self.model[summary_name].observed for summary_name in self.summary_names ] observed_ss = np.column_stack(observed_ss) return observed_ss def _resolve_logreg_config(self, logreg_config, parallel_cv): """Resolves logistic regression config. Parameters ---------- logreg_config: dict Config dictionary for logistic regression. parallel_cv: bool Returns ------- dict """ if isinstance(logreg_config, dict): # TODO: check valid kwargs return logreg_config else: return self._get_default_logreg_config(parallel_cv) def _get_default_logreg_config(self, parallel_cv): """Creates logistic regression config. Parameters ---------- parallel_cv: bool Returns ------- dict """ logreg_config = { 'alpha': 1, 'n_splits': 10, 'n_jobs': cpu_count() if parallel_cv else 1, 'cut_point': 0 } return logreg_config def _resolve_elfi_client(self, parallel_cv): """Resolves elfi client. Either elfi or cross-validation can be run in parallel. Parameters ---------- parallel_cv: bool """ if parallel_cv: set_client('native')
def test_pdf(self, ma2): prior = ModelPrior(ma2) rv = prior.rvs(size=10) assert np.allclose(prior.pdf(rv), np.exp(prior.logpdf(rv)))
class LFIRE(ParameterInference): """Likelihood-Free Inference by Ratio Estimation (LFIRE). For a describtion of the LFIRE, see e.g. Thomas et al. 2018. References ---------- O. Thomas, R. Dutta, J. Corander, S. Kaski, and M. U. Gutmann, Likelihood-Free Inference by Ratio Estimation, arXiv preprint arXiv:1611.10242, 2018. """ def __init__(self, model, params_grid, marginal=None, classifier=None, output_names=None, seed_marginal=None, precomputed_models=None, **kwargs): """Initializes LFIRE. Parameters ---------- model: ElfiModel The elfi graph used by the algorithm. params_grid: np.ndarray A grid over which posterior values are evaluated. marginal: np.ndarray, optional Marginal data. classifier: str, optional Classifier to be used. Default LogisticRegression. output_names: list, optional Names of the nodes whose outputs are included in the batches. batch_size: int, optional A size of training data. seed_marginal: int, optional Seed for marginal data generation. precomputed_models: file or str, optional Precomputed classifier parameters file. kwargs: See InferenceMethod. """ super(LFIRE, self).__init__(model, output_names, **kwargs) # 1. parse model: self.summary_names = self._get_summary_names() if len(self.summary_names) == 0: raise NotImplementedError( 'Your model must have at least one Summary node.') self.joint_prior = ModelPrior(self.model) # 2. LFIRE setup: self.params_grid = self._resolve_params_grid(params_grid) self.classifier = self._resolve_classifier(classifier) self._resolve_elfi_client(self.classifier.parallel_cv) n_batches = self.params_grid.shape[0] # 3. initialise results containers: self.state['posterior'] = np.empty(n_batches) self.state['infinity'] = { parameter_name: [] for parameter_name in self.parameter_names } # 4. initialise or load likelihood ratio models: if precomputed_models is None: self.marginal = self._resolve_marginal(marginal, seed_marginal) for parameter_name in self.parameter_names: self.state[parameter_name] = np.empty(n_batches) else: self.load_models(precomputed_models) # 5. calculate prior probabilities: self.state['prior'] = self.joint_prior.pdf(params_grid) def set_objective(self): """Sets objective for inference.""" self.objective['n_batches'] = self.params_grid.shape[0] self.objective['n_sim'] = self.params_grid.shape[0] * self.batch_size def extract_result(self): """Extracts the result from the current state. Returns ------- LFIREPosterior """ return LFIREPosterior(method_name='LFIRE', outputs=copy.deepcopy(self.state), parameter_names=self.parameter_names) def update(self, batch, batch_index): """Updates the inference state with a new batch and performs LFIRE. Parameters ---------- batch: dict batch_index: int """ # TODO: beautify this super(LFIRE, self).update(batch, batch_index) # Parse likelihood values likelihood = [ batch[summary_name] for summary_name in self.summary_names ] likelihood = np.column_stack(likelihood) # Create training data X = np.vstack((likelihood, self.marginal)) y = np.concatenate((np.ones(likelihood.shape[0]), -1 * np.ones(self.marginal.shape[0]))) # Classification self.classifier.fit(X, y, index=batch_index) # Likelihood value likelihood_ratio = self.classifier.predict_likelihood_ratio( self.observed, index=batch_index) # Posterior value posterior_value = self.state['prior'][batch_index] * likelihood_ratio # Check if posterior value is non-finite if np.isinf(posterior_value): params = self.params_grid[batch_index] warnings.warn( f'Posterior value is not finite for parameters \ {self.parameter_names} = {params} and thus will be replaced with zero!', RuntimeWarning) posterior_value = 0 for i, parameter_name in enumerate(self.parameter_names): self.state['infinity'][parameter_name] += [params[i]] # Update state dictionary self.state['posterior'][batch_index] = posterior_value for parameter_name in self.parameter_names: self.state[parameter_name][batch_index] = batch[parameter_name] def prepare_new_batch(self, batch_index): """Prepares a new batch for elfi. Parameters ---------- batch_index: int Returns ------- dict """ params = self.params_grid[batch_index] names = self.parameter_names batch = {p: params[i] for i, p in enumerate(names)} return batch def infer(self, *args, observed=None, **kwargs): """Set the objective and start the iterate loop until the inference is finished. See the other arguments from the `set_objective` method. Parameters ---------- observed: dict, optional Observed data with node names as keys. bar : bool, optional Flag to remove (False) or keep (True) the progress bar from/in output. Returns ------- LFIREPosterior """ # 1. extract observed sum stats if observed is not None: self.model.observed = observed self.observed = self._get_observed_summary_values() # 2. evaluate posterior if self.state['n_batches'] < self.params_grid.shape[0]: post = super(LFIRE, self).infer(*args, **kwargs) else: post = self._evaluate_posterior() return post def save_models(self, file): """Save parameter grid and classifier parameters. Parameters ---------- file: file or str File or filename to which the data is saved. """ data = defaultdict(list) # 1. parameter grid for parameter_name in self.parameter_names: data[parameter_name] = self.state[parameter_name] # 2. classifier parameters # store classifier parameters in the same order as parameter names: for batch_index in range(self.state['n_batches']): params = self.classifier.get(batch_index) for param in params.keys(): data[param].append(params[param]) np.savez(file, **data) def load_models(self, file): """Load parameter grid and classifier parameters. Parameters ---------- file: file or str File or filename that contains the data. """ data = np.load(file) # 1. load parameter grid: for index, parameter_name in enumerate(self.parameter_names): if parameter_name not in data: raise KeyError('Model parameter {} '.format(parameter_name) + 'not found in saved data') if np.all(self.params_grid[:, index] != data[parameter_name]): raise ValueError( 'Parameter values in saved data do not match ' + 'the input parameter grid.') self.state[parameter_name] = data[parameter_name] # 2. load classifier parameters: n_batches = self.params_grid.shape[0] for batch_index in range(n_batches): params = {param: data[param][batch_index] for param in data.files} self.classifier.set(params, batch_index) # 3. update inference state: self.state['n_batches'] = n_batches def _evaluate_posterior(self): """Evaluates posterior probabilities. ~ Returns ------- LFIREPosterior """ for ii in range(self.state['n_batches']): # evaluate likelihood ratio with precomputed classifier parameters ratio = self.classifier.predict_likelihood_ratio(self.observed, index=ii) # calculate posterior value self.state['posterior'][ii] = self.state['prior'][ii] * ratio return self.extract_result() def _resolve_params_grid(self, params_grid): """Resolves parameters grid. Parameters ---------- params_grid: np.ndarray Returns ------- np.ndarray """ if isinstance(params_grid, np.ndarray) and len(params_grid.shape) == 2: return params_grid else: raise TypeError('params_grid must be 2d numpy array.') def _resolve_marginal(self, marginal, seed_marginal=None): """Resolves marginal data. Parameters ---------- marginal: np.ndarray seed_marginal: int, optional Returns ------- np.ndarray """ if marginal is None: marginal = self._generate_marginal(seed_marginal) x, y = marginal.shape logger.info(f'New marginal data ({x} x {y}) are generated.') return marginal elif isinstance(marginal, np.ndarray) and len(marginal.shape) == 2: return marginal else: raise TypeError('marginal must be 2d numpy array.') def _generate_marginal(self, seed_marginal=None): """Generates marginal data. Parameters ---------- seed_marginal: int, optional Returns ------- np.ndarray """ if seed_marginal is None: batch = self.model.generate(self.batch_size) else: batch = self.model.generate(self.batch_size, seed=seed_marginal) marginal = [batch[summary_name] for summary_name in self.summary_names] marginal = np.column_stack(marginal) return marginal def _get_summary_names(self): """Gets the names of summary statistics. Returns ------- list """ summary_names = [] for node in self.model.nodes: if isinstance(self.model[node], Summary) and not node.startswith('_'): summary_names.append(node) return summary_names def _get_observed_summary_values(self): """Gets observed values for summary statistics. Returns ------- np.ndarray """ observed_ss = [ self.model[summary_name].observed for summary_name in self.summary_names ] observed_ss = np.column_stack(observed_ss) return observed_ss def _resolve_classifier(self, classifier): """Resolves classifier.""" if classifier is None: return LogisticRegression() elif isinstance(classifier, Classifier): return classifier else: raise ValueError('classifier must be an instance of Classifier.') def _resolve_elfi_client(self, parallel_cv): """Resolves elfi client. Either elfi or cross-validation can be run in parallel. Parameters ---------- parallel_cv: bool """ if parallel_cv: set_client('native')