Example #1
0
class NaiveBayes(Classifier):
    """
    Implements a classifier that uses the Bayes' theorem.
    """

    def learn(self):
        # Frequency count of target classes
        self.C = OnlineLogProbability()
        # Frequency count of P(Fi|C):
        self.Fi = defaultdict(lambda:  # For each class,
                      defaultdict(lambda:  # For each attribute,
                          OnlineLogProbability()))  # For each value, count it

        for example in self.dataset:
            class_ = self.target(example)
            self.C.add(class_)
            for attribute in self.attributes:
                value = attribute(example)
                self.Fi[class_][attribute].add(value)
        if not self.C:
            raise ValueError("Dataset is empty")

        # Cripple defaultdict to a regular dict, so now it can rasie KeyError
        self.Fi.default_factory = None
        for d in self.Fi.itervalues():
            d.default_factory = None

    def classify(self, example):
        values = [(attribute, attribute(example))
                  for attribute in self.attributes]
        hypotheses = []
        for class_ in self.C:
            try:
                ps = [self.Fi[class_][attr][val] for attr, val in values]
            except KeyError:
                continue  # A value not seen in training, so Prob(class) == 0
            ps.append(self.C[class_])
            hypotheses.append((sum(ps), class_))

        if hypotheses:
            logprob, best = max(hypotheses)
            Z = numpy.logaddexp.reduce([p for p, class_ in hypotheses])
            logprob = logprob - Z
        else:  # Something not at all seen in training, return best a priori
            logprob, best = max((p, class_) for class_, p
                                            in self.C.iteritems())
        p = numpy.exp(logprob)
        assert 0.0 <= p and p <= 1.0
        return best, p
Example #2
0
class NaiveBayes(Classifier):
    """
    Implements a classifier that uses the Bayes' theorem.
    """
    def learn(self):
        # Frequency count of target classes
        self.C = OnlineLogProbability()
        # Frequency count of P(Fi|C):
        self.Fi = defaultdict(
            lambda:  # For each class,
            defaultdict(lambda:  # For each attribute,
                        OnlineLogProbability()))  # For each value, count it

        for example in self.dataset:
            class_ = self.target(example)
            self.C.add(class_)
            for attribute in self.attributes:
                value = attribute(example)
                self.Fi[class_][attribute].add(value)
        if not self.C:
            raise ValueError("Dataset is empty")

        # Cripple defaultdict to a regular dict, so now it can rasie KeyError
        self.Fi.default_factory = None
        for d in self.Fi.itervalues():
            d.default_factory = None

    def classify(self, example):
        values = [(attribute, attribute(example))
                  for attribute in self.attributes]
        hypotheses = []
        for class_ in self.C:
            try:
                ps = [self.Fi[class_][attr][val] for attr, val in values]
            except KeyError:
                continue  # A value not seen in training, so Prob(class) == 0
            ps.append(self.C[class_])
            hypotheses.append((sum(ps), class_))

        if hypotheses:
            logprob, best = max(hypotheses)
            Z = numpy.logaddexp.reduce([p for p, class_ in hypotheses])
            logprob = logprob - Z
        else:  # Something not at all seen in training, return best a priori
            logprob, best = max(
                (p, class_) for class_, p in self.C.iteritems())
        p = numpy.exp(logprob)
        assert 0.0 <= p and p <= 1.0
        return best, p
Example #3
0
    def learn(self):
        # Frequency count of target classes
        self.C = OnlineLogProbability()
        # Frequency count of P(Fi|C):
        self.Fi = defaultdict(
            lambda:  # For each class,
            defaultdict(lambda:  # For each attribute,
                        OnlineLogProbability()))  # For each value, count it

        for example in self.dataset:
            class_ = self.target(example)
            self.C.add(class_)
            for attribute in self.attributes:
                value = attribute(example)
                self.Fi[class_][attribute].add(value)
        if not self.C:
            raise ValueError("Dataset is empty")

        # Cripple defaultdict to a regular dict, so now it can rasie KeyError
        self.Fi.default_factory = None
        for d in self.Fi.itervalues():
            d.default_factory = None
Example #4
0
    def learn(self):
        # Frequency count of target classes
        self.C = OnlineLogProbability()
        # Frequency count of P(Fi|C):
        self.Fi = defaultdict(lambda:  # For each class,
                      defaultdict(lambda:  # For each attribute,
                          OnlineLogProbability()))  # For each value, count it

        for example in self.dataset:
            class_ = self.target(example)
            self.C.add(class_)
            for attribute in self.attributes:
                value = attribute(example)
                self.Fi[class_][attribute].add(value)
        if not self.C:
            raise ValueError("Dataset is empty")

        # Cripple defaultdict to a regular dict, so now it can rasie KeyError
        self.Fi.default_factory = None
        for d in self.Fi.itervalues():
            d.default_factory = None