Example #1
0
    def value_and_gradient(self, weights, verbose=False):
        if weights == self.last_vg_weights:
            return self.last_vg
        objective = 0.0
        gradient = CounterMap()

        if verbose:
            print "Calculating log probabilities and objective..."

        # log_prob
        log_probs = list()
        for pos, (label, features) in enumerate(self.labeled_extracted_features):
            log_probs.append(get_log_probs(features, weights, self.labels))
            assert (
                abs(sum(exp(log_probs[pos][label]) for label in self.labels) - 1.0) < 0.0001
            ), "Not a distribution: P[any | features] = %f" % (sum(exp(log_probs[pos][label]) for label in self.labels))

        objective = -sum(log_prob[label] for (log_prob, (label, _)) in zip(log_probs, self.labeled_extracted_features))

        if verbose:
            print "Raw objective: %f" % objective

        if verbose:
            print "Calculating expected counts..."

        expected_counts = get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap())

        if verbose:
            print "Calculating gradient..."

        gradient = expected_counts - self.empirical_counts

        if verbose:
            print "Applying penalty"

        # Apply a penalty (e.g. smooth the results)
        if self.sigma:
            penalty = 0.0

            for label, feature_weights in gradient.iteritems():
                for feature in feature_weights:
                    weight = weights[label][feature]
                    penalty += weight ** 2
                    gradient[label][feature] += weight / (self.sigma ** 2)

            penalty /= 2 * self.sigma ** 2
            objective += penalty
            if verbose:
                print "Penalized objective: %f" % objective

        self.last_vg_weights = weights
        self.last_vg = (objective, gradient)
        return (objective, gradient)
Example #2
0
    def test_fast_slow_equal(self):
        weights = CounterMap()
        weights['cat'] = Counter(
            (key, 1.0)
            for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))
        weights['bear'] = Counter(
            (key, 1.0)
            for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))

        log_probs = [
            maxent.get_log_probabilities(datum[1], weights, self.labels)
            for datum in self.labeled_extracted_features
        ]

        slow_expectation = maximumentropy.slow_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs)
        fast_expectation = maxent.get_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs,
            CounterMap())

        self.assertEqual(slow_expectation, fast_expectation)

        # And try again with different weights
        weights['cat'] = Counter(
            (key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium'))
        weights['bear'] = Counter(
            (key, 1.0) for key in ('fuzzy', 'claws', 'big'))

        log_probs = [
            maxent.get_log_probabilities(datum[1], weights, self.labels)
            for datum in self.labeled_extracted_features
        ]

        slow_expectation = maximumentropy.slow_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs)
        fast_expectation = maxent.get_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs,
            CounterMap())

        self.assertEqual(slow_expectation, fast_expectation)
Example #3
0
    def value_and_gradient(self, weights, verbose=False):
        if weights == self.last_vg_weights:
            return self.last_vg
        objective = 0.0
        gradient = CounterMap()

        if verbose: print "Calculating log probabilities and objective..."

        # log_prob
        log_probs = list()
        for pos, (label,
                  features) in enumerate(self.labeled_extracted_features):
            log_probs.append(get_log_probs(features, weights, self.labels))
            assert abs(
                sum(exp(log_probs[pos][label]) for label in self.labels) -
                1.0) < 0.0001, "Not a distribution: P[any | features] = %f" % (
                    sum(exp(log_probs[pos][label]) for label in self.labels))

        objective = -sum(log_prob[label] for (log_prob, (
            label, _)) in zip(log_probs, self.labeled_extracted_features))

        if verbose: print "Raw objective: %f" % objective

        if verbose: print "Calculating expected counts..."

        expected_counts = get_expected_counts(self.labeled_extracted_features,
                                              self.labels, log_probs,
                                              CounterMap())

        if verbose: print "Calculating gradient..."

        gradient = expected_counts - self.empirical_counts

        if verbose: print "Applying penalty"

        # Apply a penalty (e.g. smooth the results)
        if self.sigma:
            penalty = 0.0

            for label, feature_weights in gradient.iteritems():
                for feature in feature_weights:
                    weight = weights[label][feature]
                    penalty += weight**2
                    gradient[label][feature] += (weight / (self.sigma**2))

            penalty /= 2 * self.sigma**2
            objective += penalty
            if verbose: print "Penalized objective: %f" % objective

        self.last_vg_weights = weights
        self.last_vg = (objective, gradient)
        return (objective, gradient)
Example #4
0
	def test_fast_slow_equal(self):
		weights = CounterMap()
		weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))
		weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))

		log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features]

		slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs)
		fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap())

		self.assertEqual(slow_expectation, fast_expectation)

		# And try again with different weights
		weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium'))
		weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'big'))

		log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features]

		slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs)
		fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap())

		self.assertEqual(slow_expectation, fast_expectation)
from itertools import izip, repeat, chain

from maxent import get_log_probabilities, get_expected_counts
from countermap import CounterMap
from counter import Counter


def cnter(l):
    return Counter(izip(l, repeat(1.0, len(l))))


training_data = (('cat', cnter(
    ('fuzzy', 'claws', 'small'))), ('bear', cnter(
        ('fuzzy', 'claws', 'big'))), ('cat', cnter(('claws', 'medium'))))

labels = set([label for label, _ in training_data])
features = set()
for _, counter in training_data:
    features.update(set(counter.keys()))

weights = CounterMap()

log_probs = list()
for pos, (label, features) in enumerate(training_data):
    log_probs.append(get_log_probabilities(features, weights, labels))

test = get_expected_counts(training_data, labels, log_probs, CounterMap())

print test
from itertools import izip, repeat, chain

from maxent import get_log_probabilities, get_expected_counts
from countermap import CounterMap
from counter import Counter

def cnter(l):
	return Counter(izip(l, repeat(1.0, len(l))))

training_data = (('cat', cnter(('fuzzy', 'claws', 'small'))),
				 ('bear', cnter(('fuzzy', 'claws', 'big'))),
				 ('cat', cnter(('claws', 'medium'))))

labels = set([label for label, _ in training_data])
features = set()
for _, counter in training_data:
	features.update(set(counter.keys()))

weights = CounterMap()

log_probs = list()
for pos, (label, features) in enumerate(training_data):
	log_probs.append(get_log_probabilities(features, weights, labels))

test = get_expected_counts(training_data, labels, log_probs, CounterMap())

print test