class MaxEnt: # -*- mode: Python; coding: utf-8 -*- def __init__(self): self.parameters = np.zeros((0, 0)) def train(self, instances, dev_set=None, max_epoch=30, learning_rate=.5, batch_size=30): # Construct a statistical model from labeled instances. self.codebook = Codebook() self.codebook.supervised_populate(instances) self.parameters = np.zeros((self.codebook.dimension())) self._train_sgd(instances, dev_set, max_epoch, learning_rate, batch_size) def _mini_batch(self, instances, batch_size): # Yield mini-batches from the original data shuffle(instances) for i in range(0, len(instances), batch_size): yield instances[i:i + batch_size] def _compute_gradient(self, batch): # Compute the gradient given the current batch of data log_likelihood = 0 observed_count = np.zeros(self.codebook.dimension()) expected_count = np.zeros(self.codebook.dimension()) for datapoint in batch: feature_map = [ self.codebook.feature_index(feature) for feature in datapoint.features() ] observed_count[feature_map, self.codebook.label_index(datapoint.label)] += 1 lambda_vector = self.parameters[feature_map, :].sum(0) log_likelihood -= sum(lambda_vector) - logsumexp(lambda_vector) posterior = np.exp( lambda_vector[self.codebook.label_index(datapoint.label)] - logsumexp(lambda_vector)) expected_count[ feature_map, self.codebook.label_index(datapoint.label)] += posterior return observed_count - expected_count, log_likelihood def _train_sgd(self, train_instances, dev_set, max_epoch, learning_rate, batch_size): # Train MaxEnt model with Mini-batch Gradient Descent for epoch in range(1, max_epoch + 1): for batch in self._mini_batch(train_instances, batch_size): gradient, log_likelihood = self._compute_gradient(batch) self.parameters += gradient * learning_rate if dev_set: print("(Epoch, accuracy):", (epoch, self.accuracy(dev_set))) def accuracy(self, instances): # Simple accuracy test for the dev set current_state = [self.classify(x) == x.label for x in instances] return float(sum(current_state)) / len(current_state) def classify(self, instance): feature_map = [ self.codebook.feature_index(feature) for feature in instance.features() if feature in self.codebook._features2index ] lambda_vector = self.parameters[feature_map, :].sum(0) posteriors = np.exp(lambda_vector - logsumexp(lambda_vector)) return self.codebook.get_label(np.argmax(posteriors))