Example #1
0
File: learn.py Project: dirko/pyugm
class LearnMrfParameters(object):
    """
    Find a Gaussian approximation to the posterior given a model and prior.
    """
    def __init__(self, model, prior=1.0, initial_noise=10.0**-12, update_order=None):
        """
        The learner object.

        :param prior: Float representing the prior sigma squared of all parameters.
        """
        self._model = model

        # Get number of parameters and map parameters to position in parameter vector
        parameter_set = set()
        for factor in self._model.factors:
            if factor.parameters is not None:
                for parameter in factor.parameters.reshape(-1, ):
                    if isinstance(parameter, str):
                        parameter_set.add(parameter)
        self._index_to_parameters = {}
        self._parameters_to_index = {}
        for index, key in enumerate(sorted(list(parameter_set))):
            self._index_to_parameters[index] = key
            self._parameters_to_index[key] = index

        self._dimension = len(self._index_to_parameters)
        self._parameters = numpy.random.randn(self._dimension) * initial_noise
        if self._dimension > 0:
            self._prior_location = numpy.zeros(self._dimension)
            self._prior_precision = numpy.eye(self._dimension) * prior
            self._prior_normaliser = (-0.5 * self._dimension * numpy.log(2.0 * numpy.pi)
                                      + 0.5 * numpy.log(numpy.linalg.det((self._prior_precision))))
        self._update_order = update_order
        if not self._update_order:
            self._update_order = FloodingProtocol(self._model)

        # Results
        self._iterations = []
        self._optimizer_result = None

    @property
    def parameters(self):
        """
        The current learned parameter values.
        :returns: Dictionary where the key is a parameter name and the value its value.
        """
        parameter_dictionary = {}
        for i, value in enumerate(self._parameters):
            parameter_dictionary[self._index_to_parameters[i]] = value
        return parameter_dictionary

    def log_likelihood_and_gradient(self, evidence):
        """
        Run inference on the model to find the log-likelihood of the model given evidence and its gradient with respect
            to the model parameters.
        :param evidence: A dictionary where the key is a variable name and the value its observed value.
        :returns: The log-likelihood and a vector of derivatives.
        """
        self._update_order.reset()
        inference = LoopyBeliefUpdateInference(self._model, update_order=self._update_order)
        inference.calibrate(parameters=self.parameters)
        log_z_total = inference.partition_approximation()
        model_expected_counts = self._accumulate_expected_counts(inference)

        self._update_order.reset()
        inference = LoopyBeliefUpdateInference(self._model, update_order=self._update_order)
        inference.calibrate(evidence, parameters=self.parameters)
        log_z_observed = inference.partition_approximation()
        empirical_expected_counts = self._accumulate_expected_counts(inference)

        log_likelihood = log_z_observed - log_z_total
        derivative = empirical_expected_counts - model_expected_counts

        if self._dimension > 0:
            derivative += -numpy.dot(self._prior_precision, (self._parameters - self._prior_location))
            log_likelihood += -0.5 * numpy.dot(numpy.dot((self._parameters - self._prior_location).T,
                                                         self._prior_precision),
                                               (self._parameters - self._prior_location))
            log_likelihood += self._prior_normaliser
        return log_likelihood, derivative

    def _accumulate_expected_counts(self, inference):
        """
        Iterate through beliefs and add parameter values.
        :returns: Vector of expected counts for each parameter.
        """
        expected_counts = numpy.zeros(self._parameters.shape)
        for belief in inference.beliefs.values():
            factor_sum = numpy.sum(belief.data)
            for parameter, value in zip(belief.parameters.flatten(), belief.data.flatten()):
                if isinstance(parameter, str):
                    expected_counts[self._parameters_to_index[parameter]] += (value / factor_sum)  # * norm / normalizer
        return expected_counts

    def _parameter_dictionary_to_vector(self, dictionary):
        """
        Helper to switch between a dictionary representation of parameter values to vector representation.
        :param dictionary: A dictionary where the key is a parameter name and the value its value.
        :returns: A numpy array.
        """
        return_vector = numpy.array(len(self._parameters_to_index))
        for i in xrange(len(return_vector)):
            return_vector[i] = dictionary[self._index_to_parameters[i]]
        return return_vector

    def fit(self,
            evidence,
            initial_parameters=None,
            optimizer=scipy.optimize.fmin_l_bfgs_b,
            optimizer_kwargs=None):
        """
        Fit the model to the data.
        :param evidence: Dictionary where the key is a variable name and the value the observed value of that variable.
        :param initial_parameters: numpy.array of initial parameter values. If None then random values around 0 is used.
        :param optimizer: The optimization function to use.
        :param optimizer_kwargs: Keyword arguments that are passed to the optimizer.
        :returns: The learner object.
        """
        initial_parameter_vector = self._parameters
        if initial_parameters is not None:
            initial_parameter_vector = self._parameter_dictionary_to_vector(initial_parameters)
        if not optimizer_kwargs:
            optimizer_kwargs = {'pgtol': 10.0**-10}

        def objective_function(parameter_vector):
            """
            Function that is passed to the optimizer.
            :param parameter_vector: Parameter vector.
            :returns: Negative log-likelihood. gradient.
            """
            self._parameters = parameter_vector
            log_likelihood, grad = self.log_likelihood_and_gradient(evidence)
            self._iterations.append([log_likelihood, parameter_vector])
            return -log_likelihood, -grad

        self._optimizer_result = optimizer(objective_function, initial_parameter_vector, **optimizer_kwargs)
        return self

    def result(self):
        """
        Get the learning results.
        :returns: Log-likelihood, parameters that maximises the log-likelihood.
        """
        if self._optimizer_result:
            return self._optimizer_result[1], self._optimizer_result[0]
        else:
            raise Exception('No result yet - run fit.')