class Least_Square_Discriminant(Classifier):
    def __init__(self, X, y):
        # initialize and train the classifier
        # Data_Preprocessor will copy X
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        self.y_vals = np.unique(y)
        y_recode = self.recode_y_to_bit_vector(y)
        self.weight = self.calc_weight(X, y_recode)

    def predict(self, X_new, output=0):
        # used for making prediction
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new, self.weight)
        predicted_class = self.predict_class(predicted_score, self.y_vals)
        return predicted_class

    def validate(self, X_new, y_new, output=0):
        # used for validating the prediction performance
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new, self.weight)
        predicted_class = self.predict_class(predicted_score, self.y_vals)
        prediction_error = self.calc_predict_error(predicted_class, y_new)
        return prediction_error

    def recode_y_to_bit_vector(self, y):
        y_vals = self.y_vals
        y_new = np.zeros((y.size, y_vals.size))
        for i in range(0, y.size):
            y_new[i, np.argmax(y_vals == y[i])] = 1
        return y_new

    def calc_weight(self, X, y):
        p = X.shape[1]
        # Here we add an identity matrix to X'X to fix the condition
        return np.linalg.inv(mat(X.T) * mat(X) + np.identity(p)) * mat(
            X.T) * mat(y)

    def predict_score(self, X, weight):
        return mat(X) * mat(weight)

    def predict_class(self, score, y_vals):
        max_indicator = np.argmax(score, axis=1)
        return np.array([y_vals[i][0] for i in max_indicator])

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
class Least_Square_Discriminant (Classifier):
    def __init__(self, X, y):
        # initialize and train the classifier
        # Data_Preprocessor will copy X
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        self.y_vals = np.unique(y)
        y_recode = self.recode_y_to_bit_vector(y)
        self.weight = self.calc_weight(X, y_recode)

    def predict(self, X_new, output=0):
        # used for making prediction
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new, self.weight)
        predicted_class = self.predict_class(predicted_score, self.y_vals)
        return predicted_class

    def validate(self, X_new, y_new, output=0):
        # used for validating the prediction performance
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new, self.weight)
        predicted_class = self.predict_class(predicted_score, self.y_vals)
        prediction_error = self.calc_predict_error(predicted_class, y_new)
        return prediction_error

    def recode_y_to_bit_vector(self, y):
        y_vals = self.y_vals
        y_new = np.zeros((y.size, y_vals.size))
        for i in range(0, y.size):
            y_new[i, np.argmax(y_vals == y[i])] = 1
        return y_new

    def calc_weight(self, X, y):
        p = X.shape[1]
        # Here we add an identity matrix to X'X to fix the condition
        return np.linalg.inv(mat(X.T) * mat(X) + np.identity(p)) * mat(X.T) * mat(y)

    def predict_score(self, X, weight):
        return mat(X) * mat(weight)

    def predict_class(self, score, y_vals):
        max_indicator = np.argmax(score, axis=1)
        return np.array([y_vals[i][0] for i in max_indicator])

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
class Fisher_Projection(Classifier):
    def __init__(self, X, y):
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        y = np.copy(y)
        self.y_vals = np.unique(y)
        self.weight = self.calc_fisher_weight_vector(X, y)

    def predict(self, X):
        X = self.data_preprocessor.predict(X)
        return np.dot(X, self.weight)

    def validate(self):
        pass

    def calc_between_class_variance(self, X, y):
        num_obs, num_features = X.shape
        mu_all = np.mean(X, axis=0)
        between_class_variance = np.zeros((num_features, num_features))
        for k in range(0, self.y_vals.size):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            mu = np.mean(X_sub, axis=0)
            between_class_variance += X_sub.shape[0] * np.outer(
                mu - mu_all, mu - mu_all)
        return between_class_variance

    def calc_within_class_variance(self, X, y):
        num_obs, num_features = X.shape
        within_class_var = np.zeros((num_features, num_features))
        for k in range(0, self.y_vals.size):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            within_class_var += X_sub.shape[0] * np.cov(
                X_sub, rowvar=0, bias=1)
        # add an identity matrix to the variance matrix to fix its condition
        within_class_var += np.identity(num_features)
        return within_class_var

    def calc_fisher_weight_vector(self, X, y):
        between_class_variance = self.calc_between_class_variance(X, y)
        within_class_variance = self.calc_within_class_variance(X, y)
        tmp_matrix = mat(
            np.linalg.inv(within_class_variance)) * mat(between_class_variance)
        w, v = eigs(tmp_matrix, k=self.y_vals.size - 1)
        # print(w.real)
        return v.real
class Fisher_Projection (Classifier):
    def __init__(self, X, y):
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        y = np.copy(y)
        self.y_vals = np.unique(y)
        self.weight = self.calc_fisher_weight_vector(X, y)

    def predict(self, X):
        X = self.data_preprocessor.predict(X)
        return np.dot(X, self.weight)

    def validate(self):
        pass

    def calc_between_class_variance(self, X, y):
        num_obs, num_features = X.shape
        mu_all = np.mean(X, axis=0)
        between_class_variance = np.zeros((num_features, num_features))
        for k in range(0, self.y_vals.size):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            mu = np.mean(X_sub, axis=0)
            between_class_variance += X_sub.shape[0] * np.outer(mu - mu_all, mu - mu_all)
        return between_class_variance

    def calc_within_class_variance(self, X, y):
        num_obs, num_features = X.shape
        within_class_var = np.zeros((num_features, num_features))
        for k in range(0, self.y_vals.size):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            within_class_var += X_sub.shape[0] * np.cov(X_sub, rowvar=0, bias=1)
        # add an identity matrix to the variance matrix to fix its condition
        within_class_var += np.identity(num_features)
        return within_class_var

    def calc_fisher_weight_vector(self, X, y):
        between_class_variance = self.calc_between_class_variance(X, y)
        within_class_variance = self.calc_within_class_variance(X, y)
        tmp_matrix = mat(np.linalg.inv(within_class_variance)) * mat(between_class_variance)
        w, v = eigs(tmp_matrix, k=self.y_vals.size - 1)
        # print(w.real)
        return v.real
Ejemplo n.º 5
0
class Logistic_Regression(Classifier):
    def __init__(self, X, y, lambda_):
        # preprocess X
        self.data_preprocessor = Data_Preprocessor(
            X)  # Data_Preprocessor will copy X
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        # preprocess y
        y = np.copy(y)
        self.y_vals = np.unique(y)
        # check number of classes here
        assert self.y_vals.size == 2
        y[y == self.y_vals[0]] = -1
        y[y == self.y_vals[1]] = +1
        # train the model
        self.weight = self.lr_train(X, y, lambda_)

    def predict(self, X, output=0):
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X, y, output=0):
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y)
        return prediction_error

    def add_intercept(self, X):
        num_obs = X.shape[0]
        X_new = np.concatenate((np.ones((num_obs, 1)), X), axis=1)
        return X_new

    def lr_loss(self, w, X, y, lambda_):
        # y must be in {-1, +1}
        # the first column of X should be all 1
        num_obs, num_features = X.shape
        loss = 0
        grad = np.zeros((1, num_features))
        H = -y * np.dot(X, w)
        H = [h if h > 10 else log(1 + exp(h)) for h in H]
        loss -= np.sum(H)
        loss -= lambda_ / 2 * np.dot(w[1:], w[1:])
        return -loss

    def lr_gradient(self, w, X, y, lambda_):
        # y must be in {-1, +1}
        num_obs, num_features = X.shape
        grad = np.zeros((1, num_features))
        grad += mat((1 - sigmoid(y * np.dot(X, w))) * y) * mat(X)
        # do not regularize intercep
        grad -= lambda_ * np.concatenate(([0], w[1:]))
        return -grad[0]

    def grad_check(self, w, X, y, lambda_):
        num_obs, num_features = X.shape
        grad0 = lr_gradient(w, X, y, lambda_)
        print(grad0)
        eps = 1e-05
        grad1 = np.zeros_like(grad0)
        for i in range(0, num_features):
            delta = np.zeros_like(w)
            delta[i] = eps
            grad1[i] = (lr_loss(w + delta, X, y, lambda_) -
                        lr_loss(w - delta, X, y, lambda_)) / 2 / eps
        print(np.linalg.norm(grad1 - grad0) / np.linalg.norm(grad0))

    def lr_train(self, X, y, lambda_):
        # random initialization
        num_obs, num_features = X.shape
        w = (np.random.rand(num_features) - 0.5) * 2
        lr_fmin_result = fmin(f=self.lr_loss,
                              x0=w,
                              fprime=self.lr_gradient,
                              args=(X, y, lambda_),
                              maxiter=50,
                              disp=False)
        return lr_fmin_result

    def predict_score(self, X):
        return mat(X) * mat(self.weight).T

    def predict_class(self, predicted_score):
        return [
            self.y_vals[0] if s < 0 else self.y_vals[1]
            for s in predicted_score
        ]

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
class Logistic_Regression (Classifier):
    def __init__(self, X, y, lambda_):
        # preprocess X
        self.data_preprocessor = Data_Preprocessor(X)  # Data_Preprocessor will copy X
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        # preprocess y
        y = np.copy(y)
        self.y_vals = np.unique(y)
        # check number of classes here
        assert self.y_vals.size == 2
        y[y == self.y_vals[0]] = -1
        y[y == self.y_vals[1]] = +1
        # train the model
        self.weight = self.lr_train(X, y, lambda_)

    def predict(self, X, output=0):
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X, y, output=0):
        X = self.data_preprocessor.predict(X)
        X = self.add_intercept(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y)
        return prediction_error

    def add_intercept(self, X):
        num_obs = X.shape[0]
        X_new = np.concatenate((np.ones((num_obs, 1)), X), axis=1)
        return X_new

    def lr_loss(self, w, X, y, lambda_):
        # y must be in {-1, +1}
        # the first column of X should be all 1
        num_obs, num_features = X.shape
        loss = 0
        grad = np.zeros((1, num_features))
        H = - y * np.dot(X, w)
        H = [h if h > 10 else log(1 + exp(h)) for h in H]
        loss -= np.sum(H)
        loss -= lambda_ / 2 * np.dot(w[1:], w[1:])
        return -loss

    def lr_gradient(self, w, X, y, lambda_):
        # y must be in {-1, +1}
        num_obs, num_features = X.shape
        grad = np.zeros((1, num_features))
        grad += mat((1 - sigmoid(y * np.dot(X, w))) * y) * mat(X)
        # do not regularize intercep
        grad -= lambda_ * np.concatenate(([0], w[1:]))
        return -grad[0]

    def grad_check(self, w, X, y, lambda_):
        num_obs, num_features = X.shape
        grad0 = lr_gradient(w, X, y, lambda_)
        print(grad0)
        eps = 1e-05
        grad1 = np.zeros_like(grad0)
        for i in range(0, num_features):
            delta = np.zeros_like(w)
            delta[i] = eps
            grad1[i] = (lr_loss(w + delta, X, y, lambda_) - lr_loss(w - delta, X, y, lambda_)) / 2 / eps
        print(np.linalg.norm(grad1 - grad0) / np.linalg.norm(grad0))

    def lr_train(self, X, y, lambda_):
        # random initialization
        num_obs, num_features = X.shape
        w = (np.random.rand(num_features) - 0.5) * 2
        lr_fmin_result = fmin(f=self.lr_loss, x0=w, fprime=self.lr_gradient, args=(X, y, lambda_), maxiter=50, disp=False)
        return lr_fmin_result

    def predict_score(self, X):
        return mat(X) * mat(self.weight).T

    def predict_class(self, predicted_score):
        return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score]

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
Ejemplo n.º 7
0
class SVM_SMO (Classifier):
    """class SVM_SMO which implements the SMO algorithm for training (linear) SVM

    Attributes:
        data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance
        loglist (list): store dual objective function value for each iteration
        X (numpy.array): Design Matrix
        y (numpy.array): Response Vector
        y_vals (numpy.array): class labels
    """

    def __init__(self, X, y, C, iter_max):
        """initialzie the classifier and train the model

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            C (float): penalty parameter
            iter_max (int): maximum interation for the algorithm
        """
        self.data_preprocessor = Data_Preprocessor(X)
        self.X = self.data_preprocessor.predict(X)
        self.y = copy(y)
        self.y_vals = unique(self.y)
        self.y[self.y == self.y_vals[0]] = -1
        self.y[self.y == self.y_vals[1]] = 1
        self.loglist = []
        self.alpha_array, self.b = self.train_svm(self.X, self.y, C, iter_max)

    def predict(self, X, output=0):
        """make prediction for the new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix
            output (int, optional): Description

        Returns:
            numpy.array: vector of prediction class
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X, y, output=0):
        """validate prediction result for the new Desgin Matrix X and new Response Vector y

        Args:
            X (numpy.array): new Design Matrix
            y (numpy.array): new Response Vector
            output (int, optional): Description

        Returns:
            float: prediction error on the new Inputs
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y)
        return prediction_error

    def predict_score(self, X):
        """calculate prediction score for new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix

        Returns:
            numpy.array: vector of prediction score
        """
        return self.get_output(X, self.alpha_array, self.X, self.y, self.b)

    def predict_class(self, predicted_score):
        """predict the class label for each observation in the new Design Matrix X

        Args:
            predicted_score (numpy.array): vector of prediction score

        Returns:
            numpy.array: vector of predicted class label
        """
        return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score]

    def calc_predict_error(self, predicted_class, y):
        """Calculate the prediction error

        Args:
            predicted_class (numpy.array): vector of predicted class label
            y (numpy.array): vector of true class label

        Returns:
            float: overall error rate
        """
        predicted_indicator = array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - sum(predicted_indicator) / y.size

    def get_output(self, x_new, alpha_array, X, y, b):
        """Calculate f(x_new)

        Args:
            x_new (numpy.array): new design matrix
            alpha_array (numpy.array): current alpha array
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            b (list): threshold

        Returns:
            numpy.array: f(x_new)
        """
        signed_y = y * alpha_array
        return dot(dot(signed_y, X), x_new.T) - b

    def calc_objective_fast(self, alpha_array, y, K):
        """Calculate the dual objective function

        Args:
            alpha_array (numpy.array): current alpha array
            y (numpy.array): Response Vector
            K (numpy.array): kernel Matrix dot(X, X.T)

        Returns:
            float: dual objective function value
        """
        signed_y = y * alpha_array
        return sum(alpha_array) - 0.5 * dot(dot(signed_y, K), signed_y)

    def calc_LH(self, alpha1, alpha2, y1, y2, C):
        """Calculate the lower and upper bound for new alpha2

        Args:
            alpha1 (float):
            alpha2 (float):
            y1 (numpy.array):
            y2 (numpy.array):
            C (float):

        Returns:
            (L, H):
        """
        L = H = 0
        if y1 != y2:
            L = maximum(0, alpha2 - alpha1)
            H = minimum(C, C + alpha2 - alpha1)
        else:
            L = maximum(0, alpha1 + alpha2 - C)
            H = minimum(C, alpha1 + alpha2)
        return (L, H)

    def choose_i1(self, i2, alpha_index_nonbound, alpha_array, X, y, b):
        """Heuristically choose alpha1 to optimize given alpha2

        Args:
            i2 (int): index of alpha2
            alpha_index_nonbound (numpy.array): array of nonbound alpha's index
            alpha_array (numpy.array): current alpha array
            X (numpy.array):
            y (numpy.array):
            b (list):

        Returns:
            int: index of alpha1
        """
        E2 = self.get_output(X[i2, :], alpha_array, X, y, b) - y[i2]
        E1_array = self.get_output(
            X[alpha_index_nonbound, :], alpha_array, X, y, b) - y[alpha_index_nonbound]
        index = argmax(fabs(E1_array - E2[0]))
        return alpha_index_nonbound[index]

    def update_b(self, b_old, alpha1, alpha2, a1, a2, x1, x2, y1, y2, E1, E2, C):
        """update the threshold parameter b

        Args:
            b_old (list):
            alpha1 (float):
            alpha2 (float):
            a1 (float):
            a2 (float):
            x1 (numpy.array):
            x2 (numpy.array):
            y1 (int):
            y2 (int):
            E1 (float):
            E2 (float):

        Returns:
            float: updated parameter b
        """
        b1 = E1 + y1 * (a1 - alpha1) * dot(x1, x1) + y2 * \
            (a2 - alpha2) * dot(x1, x2) + b_old
        b2 = E2 + y1 * (a1 - alpha1) * dot(x1, x2) + y2 * \
            (a2 - alpha2) * dot(x2, x2) + b_old
        if (a1 == C or a1 == 0) and (a2 == C or a2 == 0):
            return (b1 + b2) / 2
        elif a1 == C or a1 == 0:
            return b2
        else:
            return b1

    def take_step(self, i1, i2, alpha_array, X, y, C, b, K):
        """Optimize given alpha1 and alpha2

        Args:
            i1 (int): index of alpha1
            i2 (int): index of alpha2
            alpha_array (numpy.array): current alpha array
            X (numpy.array):
            y (numpy.array):
            C (float):
            b (list):
            K (numpy.array):

        Returns:
            boolean: True if we optimize the alpha pairs, otherwise False
        """
        if i1 == i2:
            return False

        eps = 1e-05
        alpha1, alpha2 = alpha_array[[i1, i2]]
        y1, y2 = y[[i1, i2]]
        X1, X2 = X[[i1, i2]]
        E1 = self.get_output(X1, alpha_array, X, y, b) - y1
        E2 = self.get_output(X2, alpha_array, X, y, b) - y2
        s = y1 * y2
        L, H = self.calc_LH(alpha1, alpha2, y1, y2, C)

        if fabs(L - H) < eps:
            return False

        k11 = dot(X1, X1)
        k12 = dot(X1, X2)
        k22 = dot(X2, X2)
        eta = 2 * k12 - k11 - k22

        if eta < 0:
            a2 = alpha2 - y2 * (E1 - E2) / eta
            if a2 < L:
                a2 = L
            elif a2 > H:
                a2 = H
        else:
            alpha_array[i2] = L
            alpha_array[i1] = alpha1 + s * (alpha2 - L)
            Lobj = self.calc_objective_fast(alpha_array, X, y, K)

            alpha_array[i2] = H
            alpha_array[i1] = alpha1 + s * (alpha2 - H)
            Hobj = self.calc_objective_fast(alpha_array, X, y, K)

            alpha_array[i1] = alpha1
            alpha_array[i2] = alpha2

            if Lobj > Hobj + eps:
                a2 = L
            elif Lobj < Hobj - eps:
                a2 = H
            else:
                a2 = alpha2

        if a2 < eps:
            a2 = 0
        elif a2 > C - eps:
            a2 = C

        if fabs(a2 - alpha2) < eps * (a2 + alpha2 + eps):
            return False

        a1 = alpha1 + s * (alpha2 - a2)

        # update b
        b_old = b[0]
        b_new = self.update_b(b_old, alpha1, alpha2, a1,
                              a2, X1, X2, y1, y2, E1, E2, C)
        b[0] = b_new[0]

        alpha_array[i1] = a1
        alpha_array[i2] = a2
        return True

    def examine_example(self, i2, alpha_array, X, y, C, b, K):
        """Given alpha2, find alpha1 and optimize them

        Args:
            i2 (index): index of alpha2
            alpha_array (numpy.array): current array of alpha
            X (numpy.array):
            y (numpy.array):
            C (float):
            b (list):
            K (numpy.array):

        Returns:
            boolean: True if we optimize alpha2, otherwise False
        """
        n, p = X.shape
        y2 = y[i2]
        alpha2 = alpha_array[i2]
        X2 = X[i2]
        E2 = self.get_output(X2, alpha_array, X, y, b) - y2
        r2 = E2 * y2
        tol = 1e-03

        if (r2 < -tol and alpha2 < C) or (r2 > tol and alpha2 > 0):
            # find thoses nonbound alphas
            alpha_index_nonbound = [i for i in range(n)
                                    if alpha_array[i] != 0 and alpha_array[i] != C]
            num_nonbound = len(alpha_index_nonbound)

            if num_nonbound > 1:
                # heuristicly choose i1
                i1 = self.choose_i1(
                    i2, alpha_index_nonbound, alpha_array, X, y, b)
                if i1 >= 0 and self.take_step(i1, i2, alpha_array, X, y, C, b, K):
                    return True

                # iterate over all nonbound alphas using random start index
                start_index = choice(len(alpha_index_nonbound), 1)
                alpha_index_nonbound_modified = concatenate(
                    (alpha_index_nonbound[start_index:],
                     alpha_index_nonbound[0:start_index]))
                for i1 in alpha_index_nonbound_modified:
                    if self.take_step(i1, i2, alpha_array, X, y, C, b, K):
                        return True

            # iterate over all alphas using random start index
            start_index = choice(n, 1)
            alpha_index_modified = concatenate(
                (arange(start_index, n),
                 arange(start_index, 2)))
            for i1 in alpha_index_modified:
                if self.take_step(i1, i2, alpha_array, X, y, C, b, K):
                    return True

        return False

    def train_svm(self, X, y, C, iter_max):
        """Train SVM using SMO algorithm

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector {-1, 1}
            C (float): penalty parameter
            iter_max (int): maximum number of iterations

        Returns:
            (numpy.array, list): (alpha_array, b), trained parameters
        """
        n, p = X.shape
        K = dot(X, X.T)
        alpha_array = zeros(n)
        b = [0]
        num_changed = 0
        examine_all = True

        while num_changed > 0 or examine_all:
            num_changed = 0

            if examine_all:
                alpha_index = permutation(range(n))
                for i2 in alpha_index:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break
            else:
                alpha_index_nonbound = [i for i in range(n)
                                        if alpha_array[i] != 0 and alpha_array[i] != C]
                alpha_index_nonbound = permutation(alpha_index_nonbound)
                for i2 in alpha_index_nonbound:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break

            # stop if the number of changed alphas are less than n / 10
            if num_changed < n / 10:
                break

            if examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

        return (alpha_array, b)
Ejemplo n.º 8
0
class Naive_Bayes (Classifier):
    def __init__(self, X, y):
        # Data_Preprocessor will copy X
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        y = np.copy(y)
        self.y_vals = np.unique(y)
        self.mean_array, self.std_array = self.estimate_mean_std(X, y)
        self.prior = self.calc_prior(y)

    def predict(self, X_new, output=0):
        # used for making prediction
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X_new, y_new, output=0):
        # used for validating the prediction performance
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y_new)
        return prediction_error

    def estimate_mean_std(self, X, y):
        num_obs, num_features = X.shape
        num_classes = self.y_vals.size
        mean_array = np.zeros((num_classes, num_features))
        std_array = np.zeros((num_classes, num_features))
        for k in range(0, num_classes):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            mean_array[k, :] = np.mean(X_sub, axis=0)
            std_array[k, :] = np.std(X_sub, axis=0, ddof=1)
            std_array[std_array < 1e-03] = 1e-03
        # print(mean_array)
        # print(std_array)
        return (mean_array, std_array)

    def calc_prior(self, y):
        prior = [np.sum(y == y_val) / y.size for y_val in self.y_vals]
        return prior

    def predict_score(self, X_new):
        num_obs, num_features = X_new.shape
        num_classes = self.mean_array.shape[0]
        ans = np.zeros((num_obs, num_classes))
        for k in range(0, num_classes):
            for j in range(0, num_features):
                ans[:, k] += norm.logpdf(X_new[:, j], loc=self.mean_array[k, j], scale=self.std_array[k, j])
        log_prior = [log(p) for p in self.prior]
        ans += log_prior
        return ans

    def predict_class(self, predicted_score):
        max_indicator = np.argmax(predicted_score, axis=1)
        return np.array([self.y_vals[i] for i in max_indicator])

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
Ejemplo n.º 9
0
class Naive_Bayes(Classifier):
    def __init__(self, X, y):
        # Data_Preprocessor will copy X
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        y = np.copy(y)
        self.y_vals = np.unique(y)
        self.mean_array, self.std_array = self.estimate_mean_std(X, y)
        self.prior = self.calc_prior(y)

    def predict(self, X_new, output=0):
        # used for making prediction
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X_new, y_new, output=0):
        # used for validating the prediction performance
        X_new = self.data_preprocessor.predict(X_new)
        predicted_score = self.predict_score(X_new)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y_new)
        return prediction_error

    def estimate_mean_std(self, X, y):
        num_obs, num_features = X.shape
        num_classes = self.y_vals.size
        mean_array = np.zeros((num_classes, num_features))
        std_array = np.zeros((num_classes, num_features))
        for k in range(0, num_classes):
            index = y == self.y_vals[k]
            X_sub = X[index, :]
            mean_array[k, :] = np.mean(X_sub, axis=0)
            std_array[k, :] = np.std(X_sub, axis=0, ddof=1)
            std_array[std_array < 1e-03] = 1e-03
        # print(mean_array)
        # print(std_array)
        return (mean_array, std_array)

    def calc_prior(self, y):
        prior = [np.sum(y == y_val) / y.size for y_val in self.y_vals]
        return prior

    def predict_score(self, X_new):
        num_obs, num_features = X_new.shape
        num_classes = self.mean_array.shape[0]
        ans = np.zeros((num_obs, num_classes))
        for k in range(0, num_classes):
            for j in range(0, num_features):
                ans[:, k] += norm.logpdf(X_new[:, j],
                                         loc=self.mean_array[k, j],
                                         scale=self.std_array[k, j])
        log_prior = [log(p) for p in self.prior]
        ans += log_prior
        return ans

    def predict_class(self, predicted_score):
        max_indicator = np.argmax(predicted_score, axis=1)
        return np.array([self.y_vals[i] for i in max_indicator])

    def calc_predict_error(self, predicted_class, y):
        predicted_indicator = np.array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - np.sum(predicted_indicator) / y.size
Ejemplo n.º 10
0
class SVM_SMO(Classifier):
    """class SVM_SMO which implements the SMO algorithm for training (linear) SVM

    Attributes:
        data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance
        loglist (list): store dual objective function value for each iteration
        X (numpy.array): Design Matrix
        y (numpy.array): Response Vector
        y_vals (numpy.array): class labels
    """
    def __init__(self, X, y, C, iter_max):
        """initialzie the classifier and train the model

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            C (float): penalty parameter
            iter_max (int): maximum interation for the algorithm
        """
        self.data_preprocessor = Data_Preprocessor(X)
        self.X = self.data_preprocessor.predict(X)
        self.y = copy(y)
        self.y_vals = unique(self.y)
        self.y[self.y == self.y_vals[0]] = -1
        self.y[self.y == self.y_vals[1]] = 1
        self.loglist = []
        self.alpha_array, self.b = self.train_svm(self.X, self.y, C, iter_max)

    def predict(self, X, output=0):
        """make prediction for the new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix
            output (int, optional): Description

        Returns:
            numpy.array: vector of prediction class
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X, y, output=0):
        """validate prediction result for the new Desgin Matrix X and new Response Vector y

        Args:
            X (numpy.array): new Design Matrix
            y (numpy.array): new Response Vector
            output (int, optional): Description

        Returns:
            float: prediction error on the new Inputs
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y)
        return prediction_error

    def predict_score(self, X):
        """calculate prediction score for new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix

        Returns:
            numpy.array: vector of prediction score
        """
        return self.get_output(X, self.alpha_array, self.X, self.y, self.b)

    def predict_class(self, predicted_score):
        """predict the class label for each observation in the new Design Matrix X

        Args:
            predicted_score (numpy.array): vector of prediction score

        Returns:
            numpy.array: vector of predicted class label
        """
        return [
            self.y_vals[0] if s < 0 else self.y_vals[1]
            for s in predicted_score
        ]

    def calc_predict_error(self, predicted_class, y):
        """Calculate the prediction error

        Args:
            predicted_class (numpy.array): vector of predicted class label
            y (numpy.array): vector of true class label

        Returns:
            float: overall error rate
        """
        predicted_indicator = array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - sum(predicted_indicator) / y.size

    def get_output(self, x_new, alpha_array, X, y, b):
        """Calculate f(x_new)

        Args:
            x_new (numpy.array): new design matrix
            alpha_array (numpy.array): current alpha array
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            b (list): threshold

        Returns:
            numpy.array: f(x_new)
        """
        signed_y = y * alpha_array
        return dot(dot(signed_y, X), x_new.T) - b

    def calc_objective_fast(self, alpha_array, y, K):
        """Calculate the dual objective function

        Args:
            alpha_array (numpy.array): current alpha array
            y (numpy.array): Response Vector
            K (numpy.array): kernel Matrix dot(X, X.T)

        Returns:
            float: dual objective function value
        """
        signed_y = y * alpha_array
        return sum(alpha_array) - 0.5 * dot(dot(signed_y, K), signed_y)

    def calc_LH(self, alpha1, alpha2, y1, y2, C):
        """Calculate the lower and upper bound for new alpha2

        Args:
            alpha1 (float):
            alpha2 (float):
            y1 (numpy.array):
            y2 (numpy.array):
            C (float):

        Returns:
            (L, H):
        """
        L = H = 0
        if y1 != y2:
            L = maximum(0, alpha2 - alpha1)
            H = minimum(C, C + alpha2 - alpha1)
        else:
            L = maximum(0, alpha1 + alpha2 - C)
            H = minimum(C, alpha1 + alpha2)
        return (L, H)

    def choose_i1(self, i2, alpha_index_nonbound, alpha_array, X, y, b):
        """Heuristically choose alpha1 to optimize given alpha2

        Args:
            i2 (int): index of alpha2
            alpha_index_nonbound (numpy.array): array of nonbound alpha's index
            alpha_array (numpy.array): current alpha array
            X (numpy.array):
            y (numpy.array):
            b (list):

        Returns:
            int: index of alpha1
        """
        E2 = self.get_output(X[i2, :], alpha_array, X, y, b) - y[i2]
        E1_array = self.get_output(X[alpha_index_nonbound, :], alpha_array, X,
                                   y, b) - y[alpha_index_nonbound]
        index = argmax(fabs(E1_array - E2[0]))
        return alpha_index_nonbound[index]

    def update_b(self, b_old, alpha1, alpha2, a1, a2, x1, x2, y1, y2, E1, E2,
                 C):
        """update the threshold parameter b

        Args:
            b_old (list):
            alpha1 (float):
            alpha2 (float):
            a1 (float):
            a2 (float):
            x1 (numpy.array):
            x2 (numpy.array):
            y1 (int):
            y2 (int):
            E1 (float):
            E2 (float):

        Returns:
            float: updated parameter b
        """
        b1 = E1 + y1 * (a1 - alpha1) * dot(x1, x1) + y2 * \
            (a2 - alpha2) * dot(x1, x2) + b_old
        b2 = E2 + y1 * (a1 - alpha1) * dot(x1, x2) + y2 * \
            (a2 - alpha2) * dot(x2, x2) + b_old
        if (a1 == C or a1 == 0) and (a2 == C or a2 == 0):
            return (b1 + b2) / 2
        elif a1 == C or a1 == 0:
            return b2
        else:
            return b1

    def take_step(self, i1, i2, alpha_array, X, y, C, b, K):
        """Optimize given alpha1 and alpha2

        Args:
            i1 (int): index of alpha1
            i2 (int): index of alpha2
            alpha_array (numpy.array): current alpha array
            X (numpy.array):
            y (numpy.array):
            C (float):
            b (list):
            K (numpy.array):

        Returns:
            boolean: True if we optimize the alpha pairs, otherwise False
        """
        if i1 == i2:
            return False

        eps = 1e-05
        alpha1, alpha2 = alpha_array[[i1, i2]]
        y1, y2 = y[[i1, i2]]
        X1, X2 = X[[i1, i2]]
        E1 = self.get_output(X1, alpha_array, X, y, b) - y1
        E2 = self.get_output(X2, alpha_array, X, y, b) - y2
        s = y1 * y2
        L, H = self.calc_LH(alpha1, alpha2, y1, y2, C)

        if fabs(L - H) < eps:
            return False

        k11 = dot(X1, X1)
        k12 = dot(X1, X2)
        k22 = dot(X2, X2)
        eta = 2 * k12 - k11 - k22

        if eta < 0:
            a2 = alpha2 - y2 * (E1 - E2) / eta
            if a2 < L:
                a2 = L
            elif a2 > H:
                a2 = H
        else:
            alpha_array[i2] = L
            alpha_array[i1] = alpha1 + s * (alpha2 - L)
            Lobj = self.calc_objective_fast(alpha_array, X, y, K)

            alpha_array[i2] = H
            alpha_array[i1] = alpha1 + s * (alpha2 - H)
            Hobj = self.calc_objective_fast(alpha_array, X, y, K)

            alpha_array[i1] = alpha1
            alpha_array[i2] = alpha2

            if Lobj > Hobj + eps:
                a2 = L
            elif Lobj < Hobj - eps:
                a2 = H
            else:
                a2 = alpha2

        if a2 < eps:
            a2 = 0
        elif a2 > C - eps:
            a2 = C

        if fabs(a2 - alpha2) < eps * (a2 + alpha2 + eps):
            return False

        a1 = alpha1 + s * (alpha2 - a2)

        # update b
        b_old = b[0]
        b_new = self.update_b(b_old, alpha1, alpha2, a1, a2, X1, X2, y1, y2,
                              E1, E2, C)
        b[0] = b_new[0]

        alpha_array[i1] = a1
        alpha_array[i2] = a2
        return True

    def examine_example(self, i2, alpha_array, X, y, C, b, K):
        """Given alpha2, find alpha1 and optimize them

        Args:
            i2 (index): index of alpha2
            alpha_array (numpy.array): current array of alpha
            X (numpy.array):
            y (numpy.array):
            C (float):
            b (list):
            K (numpy.array):

        Returns:
            boolean: True if we optimize alpha2, otherwise False
        """
        n, p = X.shape
        y2 = y[i2]
        alpha2 = alpha_array[i2]
        X2 = X[i2]
        E2 = self.get_output(X2, alpha_array, X, y, b) - y2
        r2 = E2 * y2
        tol = 1e-03

        if (r2 < -tol and alpha2 < C) or (r2 > tol and alpha2 > 0):
            # find thoses nonbound alphas
            alpha_index_nonbound = [
                i for i in range(n)
                if alpha_array[i] != 0 and alpha_array[i] != C
            ]
            num_nonbound = len(alpha_index_nonbound)

            if num_nonbound > 1:
                # heuristicly choose i1
                i1 = self.choose_i1(i2, alpha_index_nonbound, alpha_array, X,
                                    y, b)
                if i1 >= 0 and self.take_step(i1, i2, alpha_array, X, y, C, b,
                                              K):
                    return True

                # iterate over all nonbound alphas using random start index
                start_index = choice(len(alpha_index_nonbound), 1)
                alpha_index_nonbound_modified = concatenate(
                    (alpha_index_nonbound[start_index:],
                     alpha_index_nonbound[0:start_index]))
                for i1 in alpha_index_nonbound_modified:
                    if self.take_step(i1, i2, alpha_array, X, y, C, b, K):
                        return True

            # iterate over all alphas using random start index
            start_index = choice(n, 1)
            alpha_index_modified = concatenate(
                (arange(start_index, n), arange(start_index, 2)))
            for i1 in alpha_index_modified:
                if self.take_step(i1, i2, alpha_array, X, y, C, b, K):
                    return True

        return False

    def train_svm(self, X, y, C, iter_max):
        """Train SVM using SMO algorithm

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector {-1, 1}
            C (float): penalty parameter
            iter_max (int): maximum number of iterations

        Returns:
            (numpy.array, list): (alpha_array, b), trained parameters
        """
        n, p = X.shape
        K = dot(X, X.T)
        alpha_array = zeros(n)
        b = [0]
        num_changed = 0
        examine_all = True

        while num_changed > 0 or examine_all:
            num_changed = 0

            if examine_all:
                alpha_index = permutation(range(n))
                for i2 in alpha_index:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break
            else:
                alpha_index_nonbound = [
                    i for i in range(n)
                    if alpha_array[i] != 0 and alpha_array[i] != C
                ]
                alpha_index_nonbound = permutation(alpha_index_nonbound)
                for i2 in alpha_index_nonbound:
                    if self.examine_example(i2, alpha_array, X, y, C, b, K):
                        num_changed += 1
                        self.loglist.append(
                            self.calc_objective_fast(alpha_array, y, K))
                        iter_max -= 1
                        if iter_max < 0:
                            break
                if iter_max < 0:
                    break

            # stop if the number of changed alphas are less than n / 10
            if num_changed < n / 10:
                break

            if examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

        return (alpha_array, b)
Ejemplo n.º 11
0
class SVM_SGD (Classifier):
    """class SVM_SGD which implements the Pegasos algorithm for training linear SVM

    Attributes:
        data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance
        loglist (list): store primal objective function value for each iteration
        weight (numpy.array): trained weight
        y_vals (numpy.array): class labels
    """

    def __init__(self, X, y, para_lambda, k):
        """initialzie the classifier and train the model

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            para_lambda (float): regularization parameter
            k (int): maximum training sample size for each iteration
        """
        assert k > 0
        self.data_preprocessor = Data_Preprocessor(X)
        X = self.data_preprocessor.predict(X)
        y = copy(y)
        self.y_vals = unique(y)
        y[y == self.y_vals[0]] = -1
        y[y == self.y_vals[1]] = 1
        self.loglist = []
        self.weight = self.calc_weight(X, y, para_lambda, k)

    def predict(self, X, output=0):
        """make prediction for the new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix
            output (int, optional): Description

        Returns:
            numpy.array: vector of prediction class
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        return predicted_class

    def validate(self, X, y, output=0):
        """validate prediction result for the new Desgin Matrix X and new Response Vector y

        Args:
            X (numpy.array): new Design Matrix
            y (numpy.array): new Response Vector
            output (int, optional): Description

        Returns:
            float: prediction error on the new Inputs
        """
        X = self.data_preprocessor.predict(X)
        predicted_score = self.predict_score(X)
        predicted_class = self.predict_class(predicted_score)
        prediction_error = self.calc_predict_error(predicted_class, y)
        return prediction_error

    def predict_score(self, X):
        """calculate prediction score for new Design Matrix X

        Args:
            X (numpy.array): new Design Matrix

        Returns:
            numpy.array: vector of prediction score
        """
        return dot(X, self.weight)

    def predict_class(self, predicted_score):
        """predict the class label for each observation in the new Design Matrix X

        Args:
            predicted_score (numpy.array): vector of prediction score

        Returns:
            numpy.array: vector of predicted class label
        """
        return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score]

    def calc_predict_error(self, predicted_class, y):
        """Calculate the prediction error

        Args:
            predicted_class (numpy.array): vector of predicted class label
            y (numpy.array): vector of true class label

        Returns:
            float: overall error rate
        """
        predicted_indicator = array(
            [predicted_class[i] == y[i] for i in range(0, y.size)])
        return 1 - sum(predicted_indicator) / y.size

    def calc_weight(self, X, y, para_lambda, k):
        """estimate the weight vector given X, y, para_lambda and k

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            para_lambda (float): regularization parameter
            k (int): maximum training sample size for each iteration

        Returns:
            numpy.array: the trained weight vector
        """
        n, p = X.shape
        weight = self.initialize_weight(p, para_lambda)
        for i in range(1, 10000):
            X_work, y_work = self.select_workset(X, y, weight, k)
            self.loglist.append(self.calc_loss_function(
                X, y, weight, para_lambda))
            weight_new = self.update_weight(
                X_work, y_work, weight, para_lambda, k, i)
            if sum((weight_new - weight) ** 2) < 0.01:
                break
            else:
                weight = weight_new
        return weight

    def select_workset(self, X, y, weight, k):
        """Select training set for each iteration

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            weight (numpy.array): weight vector
            k (int): maximum training sample size for each iteration

        Returns:
            (numpy.array, numpy.array): (X_train, y_train)
        """
        n, p = X.shape

        index = array([])
        while index.size == 0:
            index = choice(n, k)
            X_sub = X[index, :]
            y_sub = y[index]
            sub_index = (dot(X_sub, weight) * y_sub) < 1
            index = index[sub_index]
        return (X[index, :], y[index])

    def initialize_weight(self, p, para_lambda):
        """initialize the weight vector

        Args:
            p (int): number of features in the Design Matrix
            para_lambda (float): regularization parameter

        Returns:
            numpy.array: a satisfactory weight vector
        """
        weight = zeros(p)
        weight.fill(sqrt(1 / (p * para_lambda)))
        neg_index = choice(p, size=(int)(p / 2))
        weight[neg_index] = -weight[neg_index]
        return weight

    def update_weight(self, X, y, weight, para_lambda, k, iter_num):
        """update the weight vector

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            weight (numpy.array): weight vector
            para_lambda (float): regularization parameter
            k (int): maximum training sample size for each iteration
            iter_num (int): current iteration number

        Returns:
            numpy.array: an updated weight vector
        """
        eta = 1 / (para_lambda * iter_num)  # step size
        weight_half = (1 - eta * para_lambda) * weight + eta / k * dot(y, X)
        if sum(weight_half ** 2) < 1e-07:
            weight_half = maximum(weight_half, 1e-04)
        weight_new = minimum(1, 1 / sqrt(para_lambda) /
                             sqrt(sum(weight_half ** 2))) * weight_half
        return weight_new

    def calc_loss_function(self, X, y, weight, para_lambda):
        """calcualte the primal objective function for linear SVM

        Args:
            X (numpy.array): Design Matrix
            y (numpy.array): Response Vector
            weight (numpy.array): weight parameter
            para_lambda (float): regularization parameter

        Returns:
            float: current value of the primal objective function
        """
        n, p = X.shape
        tmp_loss = 1 - y * dot(X, weight)
        loss = sum(tmp_loss[tmp_loss > 0]) / n + \
            para_lambda / 2 * dot(weight, weight)
        return loss