Beispiel #1
0
class MaxEnt:
    # -*- mode: Python; coding: utf-8 -*-

    def __init__(self):
        self.parameters = np.zeros((0, 0))

    def train(self,
              instances,
              dev_set=None,
              max_epoch=30,
              learning_rate=.5,
              batch_size=30):
        # Construct a statistical model from labeled instances.

        self.codebook = Codebook()
        self.codebook.supervised_populate(instances)

        self.parameters = np.zeros((self.codebook.dimension()))
        self._train_sgd(instances, dev_set, max_epoch, learning_rate,
                        batch_size)

    def _mini_batch(self, instances, batch_size):
        # Yield mini-batches from the original data

        shuffle(instances)
        for i in range(0, len(instances), batch_size):
            yield instances[i:i + batch_size]

    def _compute_gradient(self, batch):
        # Compute the gradient given the current batch of data

        log_likelihood = 0
        observed_count = np.zeros(self.codebook.dimension())
        expected_count = np.zeros(self.codebook.dimension())

        for datapoint in batch:
            feature_map = [
                self.codebook.feature_index(feature)
                for feature in datapoint.features()
            ]

            observed_count[feature_map,
                           self.codebook.label_index(datapoint.label)] += 1
            lambda_vector = self.parameters[feature_map, :].sum(0)
            log_likelihood -= sum(lambda_vector) - logsumexp(lambda_vector)
            posterior = np.exp(
                lambda_vector[self.codebook.label_index(datapoint.label)] -
                logsumexp(lambda_vector))
            expected_count[
                feature_map,
                self.codebook.label_index(datapoint.label)] += posterior

        return observed_count - expected_count, log_likelihood

    def _train_sgd(self, train_instances, dev_set, max_epoch, learning_rate,
                   batch_size):
        # Train MaxEnt model with Mini-batch Gradient Descent

        for epoch in range(1, max_epoch + 1):
            for batch in self._mini_batch(train_instances, batch_size):
                gradient, log_likelihood = self._compute_gradient(batch)
                self.parameters += gradient * learning_rate
            if dev_set:
                print("(Epoch, accuracy):", (epoch, self.accuracy(dev_set)))

    def accuracy(self, instances):
        # Simple accuracy test for the dev set

        current_state = [self.classify(x) == x.label for x in instances]
        return float(sum(current_state)) / len(current_state)

    def classify(self, instance):
        feature_map = [
            self.codebook.feature_index(feature)
            for feature in instance.features()
            if feature in self.codebook._features2index
        ]

        lambda_vector = self.parameters[feature_map, :].sum(0)
        posteriors = np.exp(lambda_vector - logsumexp(lambda_vector))
        return self.codebook.get_label(np.argmax(posteriors))