コード例 #1
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def test_extraneous_label(self):
        weights = CounterMap()
        weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 0.5})
        labels = set(weights.iterkeys())
        logp = maxent.get_log_probabilities(self.features, weights, labels)

        self.assertEqual(logp['cat'], float('-inf'))
コード例 #2
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def test_zero_weight(self):
        weights = CounterMap()
        weights['dog'] = Counter({'warm': 2.0})
        labels = set(weights.iterkeys())
        logp = maxent.get_log_probabilities(self.features, weights, labels)

        self.assertEqual(logp['dog'], 0.0)
コード例 #3
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def test_zero_weight(self):
		weights = CounterMap()
		weights['dog'] = Counter({'warm' : 2.0})
		labels = set(weights.iterkeys())
		logp = maxent.get_log_probabilities(self.features, weights, labels)

		self.assertEqual(logp['dog'], 0.0)
コード例 #4
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def test_extraneous_label(self):
		weights = CounterMap()
		weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 0.5})
		labels = set(weights.iterkeys())
		logp = maxent.get_log_probabilities(self.features, weights, labels)

		self.assertEqual(logp['cat'], float('-inf'))
コード例 #5
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def setUp(self):
		self.features = Counter((key, 1.0) for key in ['warm', 'fuzzy'])

		self.weights = CounterMap()
		self.weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 0.5})
		self.weights['cat'] = Counter({'warm' : 0.5, 'fuzzy' : 2.0})

		self.labels = set(self.weights.iterkeys())
		self.logp = maxent.get_log_probabilities(self.features, self.weights, self.labels)
コード例 #6
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def setUp(self):
        self.features = Counter((key, 1.0) for key in ['warm', 'fuzzy'])

        self.weights = CounterMap()
        self.weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 0.5})
        self.weights['cat'] = Counter({'warm': 0.5, 'fuzzy': 2.0})

        self.labels = set(self.weights.iterkeys())
        self.logp = maxent.get_log_probabilities(self.features, self.weights,
                                                 self.labels)
コード例 #7
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def test_fast_slow_equal(self):
        weights = CounterMap()
        weights['cat'] = Counter(
            (key, 1.0)
            for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))
        weights['bear'] = Counter(
            (key, 1.0)
            for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))

        log_probs = [
            maxent.get_log_probabilities(datum[1], weights, self.labels)
            for datum in self.labeled_extracted_features
        ]

        slow_expectation = maximumentropy.slow_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs)
        fast_expectation = maxent.get_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs,
            CounterMap())

        self.assertEqual(slow_expectation, fast_expectation)

        # And try again with different weights
        weights['cat'] = Counter(
            (key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium'))
        weights['bear'] = Counter(
            (key, 1.0) for key in ('fuzzy', 'claws', 'big'))

        log_probs = [
            maxent.get_log_probabilities(datum[1], weights, self.labels)
            for datum in self.labeled_extracted_features
        ]

        slow_expectation = maximumentropy.slow_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs)
        fast_expectation = maxent.get_expected_counts(
            self.labeled_extracted_features, self.labels, log_probs,
            CounterMap())

        self.assertEqual(slow_expectation, fast_expectation)
コード例 #8
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def test_fast_slow_equal(self):
		weights = CounterMap()
		weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))
		weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large'))

		log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features]

		slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs)
		fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap())

		self.assertEqual(slow_expectation, fast_expectation)

		# And try again with different weights
		weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium'))
		weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'big'))

		log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features]

		slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs)
		fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap())

		self.assertEqual(slow_expectation, fast_expectation)
コード例 #9
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def test_performance(self):
		"""
		C api should be faster than python API (this is potentialy flakey, depending on system load patterns)
		"""
		start = time.time()
		for i in xrange(100000):
			test = maximumentropy.slow_log_probs(self.features, self.weights, self.labels)

		slow_time = time.time() - start

		start = time.time()
		for i in xrange(100000):
			test = maxent.get_log_probabilities(self.features, self.weights, self.labels)

		fast_time = time.time() - start

		self.assertTrue(fast_time < slow_time)
コード例 #10
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def test_performance(self):
        """
		C api should be faster than python API (this is potentialy flakey, depending on system load patterns)
		"""
        start = time.time()
        for i in xrange(100000):
            test = maximumentropy.slow_log_probs(self.features, self.weights,
                                                 self.labels)

        slow_time = time.time() - start

        start = time.time()
        for i in xrange(100000):
            test = maxent.get_log_probabilities(self.features, self.weights,
                                                self.labels)

        fast_time = time.time() - start

        self.assertTrue(fast_time < slow_time)
コード例 #11
0
ファイル: test-maxent.py プロジェクト: tensorspace/nlp-python
    def test_uneven_weights(self):
        weights = CounterMap()
        weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 1.0})
        weights['cat'] = Counter({'warm': 1.0, 'fuzzy': 1.0})
        labels = set(weights.iterkeys())
        logp = maxent.get_log_probabilities(self.features, weights, labels)

        # construct scores
        scores = Counter()
        scores['dog'] = 2.0 * 1.0 + 1.0 * 1.0
        scores['cat'] = 1.0 * 1.0 + 1.0 * 1.0
        scores.log_normalize()

        # check scores explicitly
        self.assertAlmostEqual(scores['dog'], log(0.731), 3)
        self.assertAlmostEqual(scores['cat'], log(0.269), 3)

        # check that log probs is correct
        self.assertEqual(logp['dog'], scores['dog'])
        self.assertEqual(logp['cat'], scores['cat'])
コード例 #12
0
ファイル: test-maxent.py プロジェクト: beckgom/python-nlp
	def test_uneven_weights(self):
		weights = CounterMap()
		weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 1.0})
		weights['cat'] = Counter({'warm' : 1.0, 'fuzzy' : 1.0})
		labels = set(weights.iterkeys())
		logp = maxent.get_log_probabilities(self.features, weights, labels)

		# construct scores
		scores = Counter()
		scores['dog'] = 2.0 * 1.0 + 1.0 * 1.0
		scores['cat'] = 1.0 * 1.0 + 1.0 * 1.0
		scores.log_normalize()

		# check scores explicitly
		self.assertAlmostEqual(scores['dog'], log(0.731), 3)
		self.assertAlmostEqual(scores['cat'], log(0.269), 3)

		# check that log probs is correct
		self.assertEqual(logp['dog'], scores['dog'])
		self.assertEqual(logp['cat'], scores['cat'])
コード例 #13
0
from itertools import izip, repeat, chain

from maxent import get_log_probabilities, get_expected_counts
from countermap import CounterMap
from counter import Counter


def cnter(l):
    return Counter(izip(l, repeat(1.0, len(l))))


training_data = (('cat', cnter(
    ('fuzzy', 'claws', 'small'))), ('bear', cnter(
        ('fuzzy', 'claws', 'big'))), ('cat', cnter(('claws', 'medium'))))

labels = set([label for label, _ in training_data])
features = set()
for _, counter in training_data:
    features.update(set(counter.keys()))

weights = CounterMap()

log_probs = list()
for pos, (label, features) in enumerate(training_data):
    log_probs.append(get_log_probabilities(features, weights, labels))

test = get_expected_counts(training_data, labels, log_probs, CounterMap())

print test
コード例 #14
0
from itertools import izip, repeat, chain

from maxent import get_log_probabilities, get_expected_counts
from countermap import CounterMap
from counter import Counter

def cnter(l):
	return Counter(izip(l, repeat(1.0, len(l))))

training_data = (('cat', cnter(('fuzzy', 'claws', 'small'))),
				 ('bear', cnter(('fuzzy', 'claws', 'big'))),
				 ('cat', cnter(('claws', 'medium'))))

labels = set([label for label, _ in training_data])
features = set()
for _, counter in training_data:
	features.update(set(counter.keys()))

weights = CounterMap()

log_probs = list()
for pos, (label, features) in enumerate(training_data):
	log_probs.append(get_log_probabilities(features, weights, labels))

test = get_expected_counts(training_data, labels, log_probs, CounterMap())

print test