def train(self, X, Y):
     """
     Find the optimal split point
     """
     # train
     self.thresh = Threshold([x[0] for x in X],
                             [y == self.hit for y in Y])
Example #2
0
 def train(self, X, Y):
     # construct values tables
     table = {self.hit: [], self.miss: []}
     for (x, y) in zip(X, Y):
         table[y].append(x)
     # transpose columns in table
     for col in (self.hit, self.miss):
         table[col] = zip(*table[col])
     # compute weights
     delta = mean(table[self.hit], 1) - mean(table[self.miss], 1)
     covar = asmatrix(cov(table[self.hit]) + cov(table[self.miss]))
     self.W = ravel(covar.I.dot(delta))
     # compute threshold using this weight function
     self.thresh = Threshold((self.score(x) for x in X), Y, self.hit)
Example #3
0
class LDA(BinaryClassifier):
    """
    Compute a linear discriminant classifier

    >>> from csv import DictReader
    >>> X = []
    >>> Y = []
    >>> for row in DictReader(open('iris.csv', 'r')):
    ...     X.append([float(row['Sepal.Length']),
    ...               float(row['Sepal.Width']),
    ...               float(row['Petal.Length']),
    ...               float(row['Petal.Width'])])
    ...     Y.append(row['Species'])
    >>> L = LDA(X, Y, 'versicolor')
    >>> cm = L.leave_one_out(X, Y)
    >>> round(cm.accuracy, 2)
    0.96
    >>> round(AUC(LDA, X, Y), 2)
    1.0
    """

    def __repr__(self):
        return "{}(weights=[{}], {})".format(
            self.__class__.__name__, ", ".join("{: 02.3f}".format(w) for w in self.W), self.thresh
        )

    def train(self, X, Y):
        # construct values tables
        table = {self.hit: [], self.miss: []}
        for (x, y) in zip(X, Y):
            table[y].append(x)
        # transpose columns in table
        for col in (self.hit, self.miss):
            table[col] = zip(*table[col])
        # compute weights
        delta = mean(table[self.hit], 1) - mean(table[self.miss], 1)
        covar = asmatrix(cov(table[self.hit]) + cov(table[self.miss]))
        self.W = ravel(covar.I.dot(delta))
        # compute threshold using this weight function
        self.thresh = Threshold((self.score(x) for x in X), Y, self.hit)

    def score(self, x):
        return sum(w * f for w, f in zip(self.W, x))

    def classify(self, x):
        return self.hit if self.thresh.is_hit(self.score(x)) else self.miss
class Stump(BinaryClassifier):
    """
    Compute a classifier which makes a single "cut" in a continuous
    predictor vector X that splits the outcomes into "hit" and "miss"
    so as to maximize the number of correct classifications

    >>> from csv import DictReader
    >>> X = []
    >>> Y = []
    >>> for row in DictReader(open('iris.csv', 'r')):
    ...     X.append([float(row['Petal.Width'])])
    ...     Y.append(row['Species'])
    >>> s = Stump(X, Y, 'versicolor')
    >>> s.leave_one_out(X, Y)
    >>> round(s.accuracy(), 2)
    0.88
    >>> round(s.AUC(X, Y), 2)
    0.99
    """

    def __repr__(self):
        lower = self.miss
        upper = self.hit
        if not self.thresh.hit_upper: # swap
            (lower, upper) = (upper, lower)
        return 'Stump({} < {: 02.3f} < {})'.format(lower,
                                            self.thresh.split, upper)

    def train(self, X, Y):
        """
        Find the optimal split point
        """
        # train
        self.thresh = Threshold([x[0] for x in X],
                                [y == self.hit for y in Y])

    def score(self, x):
        return x[0]

    def classify(self, x):
        return self.hit if self.thresh.is_hit(self.score(x)) else self.miss
class Stump(BinaryClassifier):
    """
    Compute a classifier which makes a single "cut" in a continuous
    predictor vector X that splits the outcomes into "hit" and "miss"
    so as to maximize the number of correct classifications

    >>> from csv import DictReader
    >>> X = []
    >>> Y = []
    >>> for row in DictReader(open('iris.csv', 'r')):
    ...     X.append([float(row['Petal.Width'])])
    ...     Y.append(row['Species'])
    >>> s = Stump(X, Y, 'versicolor')
    >>> s.leave_one_out(X, Y)
    >>> round(s.accuracy(), 2)
    0.88
    >>> round(s.AUC(X, Y), 2)
    0.99
    """

    def __repr__(self):
        lower = self.miss
        upper = self.hit
        if not self.thresh.hit_upper:  # swap
            (lower, upper) = (upper, lower)
        return "Stump({} < {: 02.3f} < {})".format(lower, self.thresh.split, upper)

    def train(self, X, Y):
        """
        Find the optimal split point
        """
        # train
        self.thresh = Threshold([x[0] for x in X], [y == self.hit for y in Y])

    def score(self, x):
        return x[0]

    def classify(self, x):
        return self.hit if self.thresh.is_hit(self.score(x)) else self.miss
 def train(self, X, Y):
     """
     Find the optimal split point
     """
     # train
     self.thresh = Threshold([x[0] for x in X], [y == self.hit for y in Y])