コード例 #1
0
    def label(self, datum):
        datum_features = Counter()
        for feature in ngrams(datum, 1):
            datum_features[feature] += 1.0

        log_probs = self.get_log_probabilities(datum_features)

        return log_probs.arg_max()
コード例 #2
0
    def label_distribution(self, datum):
        datum_features = Counter()
        for feature in ngrams(datum, 1):
            datum_features[feature] += 1.0

        log_probs = self.get_log_probabilities(datum_features)

        return log_probs
コード例 #3
0
    def label_distribution(self, datum):
        datum_features = Counter()
        for feature in ngrams(datum, 1):
            datum_features[feature] += 1.0

        log_probs = self.get_log_probabilities(datum_features)

        return log_probs
コード例 #4
0
    def label(self, datum):
        datum_features = Counter()
        for feature in ngrams(datum, 1):
            datum_features[feature] += 1.0

        log_probs = self.get_log_probabilities(datum_features)

        return log_probs.arg_max()
コード例 #5
0
	def label(self, datum):
		distribution = None

		for feature in ngrams(datum, 3):
			if distribution:
				distribution += self.feature_distribution[feature]
			else:
				distribution = copy(self.feature_distribution[feature])

		return distribution.arg_max()
コード例 #6
0
ファイル: test_features.py プロジェクト: beckgom/python-nlp
	def test_one_gram(self):
		test_string = "hello"

		test_features = set(tuple(x) for x in features.ngrams(test_string, 1))
		expected_features = set(tuple(x) for x in "hello")

		for f in expected_features:
			self.assertTrue(f in test_features)
		for f in test_features:
			self.assertTrue(f in expected_features)
コード例 #7
0
    def test_one_gram(self):
        test_string = "hello"

        test_features = set(tuple(x) for x in features.ngrams(test_string, 1))
        expected_features = set(tuple(x) for x in "hello")

        for f in expected_features:
            self.assertTrue(f in test_features)
        for f in test_features:
            self.assertTrue(f in expected_features)
コード例 #8
0
	def label_distribution(self, datum):
		distribution = None

		for feature in ngrams(datum, 3):
			if distribution:
				distribution += self.feature_distribution[feature]
			else:
				distribution = copy(self.feature_distribution[feature])

		distribution.log_normalize()

		return distribution
コード例 #9
0
ファイル: test_features.py プロジェクト: beckgom/python-nlp
	def test_three_grams(self):
		test_string = "hello"
		start = "<START>"
		stop = "<STOP>"

		test_features = set(tuple(x) for x in features.ngrams(test_string, 3, start, stop))
		expected_features = set(tuple(x) for x in ([start, start, 'h'], [start, 'h'], ['h'],
												   [start, 'h', 'e'], ['h', 'e'], ['e'],
												   ['h', 'e', 'l'], ['e', 'l'], ['l'],
												   ['e', 'l', 'l'], ['l', 'l'], ['l'],
												   ['l', 'l', 'o'], ['l', 'o'], ['o'],
												   ['l', 'o', stop], ['o', stop],
												   ['o', stop, stop]))
コード例 #10
0
	def train(self, labeled_data):
		self.feature_distribution = CounterMap()
		labels = set()

		for label, datum in labeled_data:
			labels.add(label)
			for feature in ngrams(datum, 3)
				self.feature_distribution[feature][label] += 1

		for feature in self.feature_distribution.iterkeys():
			self.feature_distribution[feature].default = 0.01

		self.feature_distribution.normalize()
		self.feature_distribution.log()
コード例 #11
0
    def test_three_grams(self):
        test_string = "hello"
        start = "<START>"
        stop = "<STOP>"

        test_features = set(
            tuple(x) for x in features.ngrams(test_string, 3, start, stop))
        expected_features = set(
            tuple(x)
            for x in ([start, start, 'h'], [start, 'h'], ['h'],
                      [start, 'h', 'e'], ['h', 'e'], ['e'], ['h', 'e', 'l'],
                      ['e', 'l'], ['l'], ['e', 'l', 'l'], ['l', 'l'], ['l'],
                      ['l', 'l', 'o'], ['l', 'o'], ['o'], ['l', 'o', stop],
                      ['o', stop], ['o', stop, stop]))
コード例 #12
0
    def train(self, labeled_data):
        self.labels, self.features = set(), set()

        print "Building features..."
        labeled_features = []
        for label, datum in labeled_data:
            self.labels.add(label)
            features = Counter()

            for feature in ngrams(datum, 1):
                features[feature] += 1.0
                self.features.add(feature)

            labeled_features.append((label, features))

        print "%d features" % len(self.features)
        print "%d labels" % len(self.labels)

        self.train_with_features(labeled_features)
コード例 #13
0
    def train(self, labeled_data):
        self.labels, self.features = set(), set()

        print "Building features..."
        labeled_features = []
        for label, datum in labeled_data:
            self.labels.add(label)
            features = Counter()

            for feature in ngrams(datum, 1):
                features[feature] += 1.0
                self.features.add(feature)

            labeled_features.append((label, features))

        print "%d features" % len(self.features)
        print "%d labels" % len(self.labels)

        self.train_with_features(labeled_features)