def train(self, features: List[List[float]], labels: List[int]):
        X = np.array(features)
        N = X.shape[0]
        pi = np.full((N), 1 / 2)
        y = np.array(labels)
        f = np.zeros((N))
        hx = np.array((N))

        for t in range(self.T):
            num = ((y + 1) / 2) - pi
            den = np.multiply(pi, 1 - pi)
            z = num / den
            w = np.multiply(pi, 1 - pi)

            #step5
            min = 9223372036854775807
            for clf in self.clfs:
                decstump = DecisionStump(clf.s, clf.b, clf.d)
                hxpred = np.array(decstump.predict(features))
                check = np.sum(
                    np.multiply(w, np.multiply(z - hxpred, z - hxpred)))

                if check < min:
                    min_clf = clf
                    hx = hxpred
                    min = check

            self.clfs_picked.append(min_clf)
            self.betas.append(0.5)

            f = f + (1 / 2) * hx
            den = 1 + np.exp(-2 * f)
            pi = 1 / den
    def predict(self, features: List[List[float]]) -> List[int]:
        x = np.array(features)

        f = np.zeros(x.shape[0])
        for t in range(self.T):
            decstump = DecisionStump(self.clfs_picked[t].s,
                                     self.clfs_picked[t].b,
                                     self.clfs_picked[t].d)
            f = f + (self.betas[t] * np.array(decstump.predict(features)))

        predictions = np.ones(f.shape, np.int)
        predictions[np.where(f < 0)[0]] = -1

        return predictions.tolist()
예제 #3
0
파일: hw2.py 프로젝트: leonhx/codebase
def q14():
    T = 1
    clf = AdaBoost(T, lambda u: DecisionStump(u), lambda dd: dd.err_)
    X, y = load_ada_boost_train()
    clf.fit(X, y)
    print clf.u
    print np.sum(clf.u)
    print 'AND SEE q13'
예제 #4
0
    def build_tree(self, X, Y, w, depth, curr_depth):
        # See if we can do any splitting at all
        tree = Node()
        yw = Y*w
        if len(X)<2 or len(unique(Y)) < 2 or curr_depth >= depth:
            tree.stump = 1.0 if abs(sum(yw[yw>=0]))>abs(sum(yw[yw<0])) else -1.0
            return tree
        # TODO: check for inconsistent data

        # Learn the decision stump
        stump = DecisionStump().fit(X,Y,w)
        side1 = stump.predict(X)>=0
        side2 = stump.predict(X)<0

        tree.stump = stump
        tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth, curr_depth+1)
        tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth, curr_depth+1)
        
        return tree
    def train(self, features: List[List[float]], labels: List[int]):
        X = np.array(features)
        N = X.shape[0]
        w = np.full((N), 1 / N)
        labels = np.array(labels)
        hx = np.array((N))

        for t in range(self.T):

            #step3
            min = 9223372036854775807
            for clf in self.clfs:
                decstump = DecisionStump(clf.s, clf.b, clf.d)

                hxpred = np.array(decstump.predict(features))
                indicator = np.zeros((N))
                indicator[np.where(labels != hxpred)[0]] = 1

                check = np.sum(np.multiply(w, indicator))

                if check < min:
                    min_clf = clf
                    hx = hxpred
                    min = check

            self.clfs_picked.append(min_clf)

            error = 0
            for i in range(N):
                if labels[i] != hx[i]:
                    error = error + w[i]

            beta = (1 / 2) * np.log((1 - error) / error)
            self.betas.append(beta)
            for i in range(N):
                if labels[i] == hx[i]:
                    w[i] = w[i] * np.exp((-1) * self.betas[t])
                else:
                    w[i] = w[i] * np.exp(self.betas[t])

            w_sum = np.sum(w)
            w = w / w_sum
예제 #6
0
파일: hw2.py 프로젝트: leonhx/codebase
def q13():
    T = 300
    clf = AdaBoost(T, lambda u: DecisionStump(u), lambda dd: dd.err_)
    X, y = load_ada_boost_train()
    clf.fit(X, y)
    print clf.e
    print 'Ein:', clf.score(X, y)
    print '%f <= U_T <= %f' % (np.min(clf.U), np.max(clf.U))
    print 'e_t >= %f' % np.min(clf.e)
    X, y = load_ada_boost_test()
    print 'Eout:', clf.score(X, y)
    print 'U(2):', clf.U[1]
예제 #7
0
파일: hw2.py 프로젝트: leonhx/codebase
def q12():
    clf = DecisionStump()
    X, y = load_ada_boost_train()
    clf.fit(X, y)
    print 'Ein:', clf.score(X, y)
    X, y = load_ada_boost_test()
    print 'Eout:', clf.score(X, y)
예제 #8
0
	def train(self, features: List[List[float]], labels: List[int]):
		############################################################
		# TODO: implement "train"
		############################################################
		N = len(features)
		w = [[0 for n in range(N)] for m in range(self.T +1)] ###row weight of feature, col iteration
		w[0] = [1/N for n in range(N)] ###initial weight of each feature is 1/N

		for t in range(self.T): ### find T classifiers(T iterations)
			min_w_sum = 2147483647
			best_d = 0
			best_b = 0.0
			best_s = 0

			for clf in self.clfs:### find the best classifier
				w_sum = 0 ### sum of weight of n features whose label is not equal to  classifer estimate

				for j in range(len(features)): ### compute the error rate of each classifier
					a = 0
					if features[j][clf.d] > clf.b:
						a = clf.s
					else:
						a = -clf.s
					if a != labels[j]:
						w_sum += w[t][j]
				if w_sum < min_w_sum:
					min_w_sum = w_sum
					best_d = clf.d
					best_b = clf.b
					best_s = clf.s

			self.clfs_picked.append(DecisionStump(best_s,best_b,best_d))
			error = min_w_sum
			beta = 0.5 * np.log((1-error)/error)
			self.betas.append(beta)
			
			for i in range(len(features)): #upadate weight of N features
				b =0
				if features[i][best_d] > best_b:
					b = best_s
				else:
					b = -best_s
				if b == labels[i]:
					w[t+1][i] = w[t][i] * np.exp(-beta)
				else:
					w[t+1][i] = w[t][i] * np.exp(beta)
			
			sum = 0 #normalize weight
			for i in range(len(w[0])):
				sum += w[t+1][i]
			for i in range(len(w[0])):
				w[t+1][i] /= sum
예제 #9
0
    def build_tree(self, X, Y, w, depth, curr_depth):
        # See if we can do any splitting at all
        tree = Node()
        yw = Y * w
        if len(X) < 2 or len(unique(Y)) < 2 or curr_depth >= depth:
            tree.stump = 1.0 if abs(sum(yw[yw >= 0])) > abs(sum(
                yw[yw < 0])) else -1.0
            return tree
        # TODO: check for inconsistent data

        # Learn the decision stump
        stump = DecisionStump().fit(X, Y, w)
        side1 = stump.predict(X) >= 0
        side2 = stump.predict(X) < 0

        tree.stump = stump
        tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth,
                                    curr_depth + 1)
        tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth,
                                     curr_depth + 1)

        return tree
예제 #10
0
	def boostRound(self):
		weakLearners = []
		self.weights /= self.weights.sum()

		for feature in self.featuretbl:
			rec1 = feature[:4]
			rec2 = feature[2:]

			stump = DecisionStump(rec1, rec2)
			stump.fit(self.iimages, self.labels, self.weights)
			weakLearners.append(stump)

		errors = np.array([learner.error for learner in weakLearners])
		bestLearner = weakLearners[errors.argmin()]
		error = bestLearner.error

		beta = error/(1-error)
		alpha = np.log(1/beta)

		predictions = bestLearner.predict(self.iimages)
		self.featuretbl = np.delete(self.featuretbl, np.argmin(errors), 0)
		self.weights *= np.power(beta, 1 - np.equal(predictions, self.labels))

		return alpha, bestLearner
예제 #11
0
    def train(self, features: List[List[float]], labels: List[int]):
        '''
		Inputs:
		- features: the features of all examples
		- labels: the label of all examples
   
		Require:
		- store what you learn in self.clfs_picked and self.betas
		'''
        ############################################################
        # TODO: implement "train"
        ############################################################
        features = np.asarray(features)  #initization
        if np.ndim(features) == 1:
            features = np.expand_dims(features, axis=0)
        labels = np.asarray(labels)
        h = list(self.clfs)
        h_list = []
        for n in range(len(h)):
            h_list.append(DecisionStump(h[n].s, h[n].b,
                                        h[n].d))  # import classifiers
        N = features.shape[0]
        D = np.zeros(N)  #means D_t, D_t+1
        h_t = []
        e_t = b_t = np.zeros(self.T)
        D[:] = 1 / N
        for t in range(0, self.T):
            h_t1 = np.array([
                np.multiply(
                    D,
                    (h_list[n].predict(features) != labels).astype(int)).sum()
                for n in range(len(h))
            ])
            h_t.append(h_list[np.argmin(h_t1)])
            e_t[t] = np.multiply(
                D, (h_t[t].predict(features) != labels).astype(int)).sum()
            b_t[t] = 0.5 * np.log((1 - e_t[t]) / e_t[t])

            sgn = (h_t[t].predict(features) != labels).astype(int)
            sgn[sgn == 0] = -1
            D = D * np.exp(b_t[t] * sgn)

            D = D / np.sum(D)
        self.clfs_picked = h_t
        self.betas = b_t.tolist()
        return
예제 #12
0
	def train(self, features: List[List[float]], labels: List[int]):
		############################################################
		# TODO: implement "train"
		############################################################
		N = len(features)
		pi = [[0 for i in range(N)] for j in range(self.T + 1)]### pi_0 is 1/2, N*(T +1) matrix
		pi[0] = [0.5 for i in range(N)]
		z = [[0 for i in range(N)] for j in range(self.T)] ### z is N * T
		w = [[0 for i in range(N)] for j in range(self.T)] ### w is N * T, w_i[0] = 1/N, i =1,2,...N
		w[0] = [0 for i in range(N)]
		f = [[0 for i in range(N)] for j in range(self.T + 1)] ### F(x) = 0
		for t in range(self.T):### T iterations
			for n in range(len(features)): ### update w and z
				z[t][n]=((labels[n]+1)/2 - pi[t][n])/(pi[t][n]*(1 - pi[t][n]))
				w[t][n] = pi[t][n] * (1 - pi[t][n])
			min_w_sum = 2147483647
			best_d = 0
			best_b = 0.0
			best_s = 0
			for clf in self.clfs: ### find the best classifier
				w_sum = 0 ### compute the sum of weight of each classifier
				for n in range(len(features)):
					h_xn = 0
					if features[n][clf.d] > clf.b:
						h_xn = clf.s
					else:
						h_xn = -clf.s
					w_sum += w[t][n] * np.square(z[t][n] - h_xn)
				if w_sum < min_w_sum: ### find the best classifer h_t
					min_w_sum = w_sum
					best_d = clf.d
					best_b = clf.b
					best_s = clf.s			
			self.clfs_picked.append(DecisionStump(best_s,best_b,best_d)) ### add the best classifier 
			self.betas.append(1) ### should we use 0.5 or 1 as beta?
			for n in range(len(features)):
				h_t_x = 0
				if features[n][best_d] > best_b:
					h_t_x = best_s
				else:
					h_t_x = -best_s
				f[t+1][n] = f[t][n] + 0.5 * h_t_x

			for n in range(len(features)): ### the last step use f to update pi
				pi[t+1][n] = 1/(1+ np.exp(-2 * f[t+1][n])) 
예제 #13
0
파일: main.py 프로젝트: kthnd/ml-basics
        # PLOT RESULT
        utils.plotClassifier(model, X, y)

        fname = os.path.join("..", "figs", "q2_decisionBoundary.pdf")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == "2.2":
        # 1. Load citiesSmall dataset
        dataset = utils.load_dataset("citiesSmall")
        X = dataset["X"]
        y = dataset["y"]

        # 3. Evaluate decision stump
        model = DecisionStump()
        model.fit(X, y)
        y_pred = model.predict(X)

        error = np.mean(y_pred != y)
        print("Decision Stump with inequality rule error: %.3f" % error)

        # PLOT RESULT
        utils.plotClassifier(model, X, y)

        fname = os.path.join("..", "figs", "q2.2_decisionBoundary.pdf")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    elif question == "2.3":
        # 1. Load citiesSmall dataset
예제 #14
0
                        '--module',
                        required=True,
                        choices=["1.1", "1.2", "1.3", "1.4", "1.5"])

    io_args = parser.parse_args()
    module = io_args.module

    # Decision Stump using inequalities/threshold
    if module == "1.1":
        # 1. Load citiesSmall dataset
        dataset = load_dataset("citiesSmall.pkl")
        X = dataset["X"]
        y = dataset["y"]

        # 2. Evaluate decision stump
        model = DecisionStump()
        model.fit(X, y)
        y_pred = model.predict(X)

        error = np.mean(y_pred != y)
        print("Decision Stump with inequality rule error: %.3f" % error)

        # PLOT RESULT
        utils.plotClassifier(model, X, y)

        fname = os.path.join("..", "figs", "decision_stump_boundary.pdf")
        plt.savefig(fname)
        print("\nFigure saved as '%s'" % fname)

    # Simple decision tree using decision stumps
    elif module == "1.2":
예제 #15
0
            self.E.append(self.score(X, y))


    def predict(self, x):
        return sign(np.sum([a * g.predict(x)
                            for a, g in zip(self.alpha, self.g)]))


    def score(self, X, y):
        y_ = np.apply_along_axis(lambda x: self.predict(x), axis=1, arr=X)
        return np.sum(y != y_) * 1.0 / len(y)


if __name__ == '__main__':
    X = np.array([[ 1. ,  2.1],
                  [ 2. ,  1.1],
                  [ 1.3,  1. ],
                  [ 1. ,  1. ],
                  [ 2. ,  1. ]])
    y = np.array([1.0, 1.0, -1.0, -1.0, 1.0])
    T = 5
    from decision_stump import DecisionStump
    clf = AdaBoost(T, lambda u: DecisionStump(u), lambda dd: dd.err_)
    clf.fit(X, y)
    print clf.score(X, y)
    print clf.E
    for a, g in zip(clf.alpha, clf.g):
        print a, g
    for i in range(len(X)):
        print clf.predict(X[i])
# -*- coding: utf-8 -*-
from decision_stump import DecisionStump

if __name__ == '__main__':
    q7 = DecisionStump(20, 1000)
    q7.run_and_render_histogram()

    q8 = DecisionStump(2000, 1000)
    q8.run_and_render_histogram()