def train(self, features: List[List[float]], labels: List[int]): X = np.array(features) N = X.shape[0] pi = np.full((N), 1 / 2) y = np.array(labels) f = np.zeros((N)) hx = np.array((N)) for t in range(self.T): num = ((y + 1) / 2) - pi den = np.multiply(pi, 1 - pi) z = num / den w = np.multiply(pi, 1 - pi) #step5 min = 9223372036854775807 for clf in self.clfs: decstump = DecisionStump(clf.s, clf.b, clf.d) hxpred = np.array(decstump.predict(features)) check = np.sum( np.multiply(w, np.multiply(z - hxpred, z - hxpred))) if check < min: min_clf = clf hx = hxpred min = check self.clfs_picked.append(min_clf) self.betas.append(0.5) f = f + (1 / 2) * hx den = 1 + np.exp(-2 * f) pi = 1 / den
def predict(self, features: List[List[float]]) -> List[int]: x = np.array(features) f = np.zeros(x.shape[0]) for t in range(self.T): decstump = DecisionStump(self.clfs_picked[t].s, self.clfs_picked[t].b, self.clfs_picked[t].d) f = f + (self.betas[t] * np.array(decstump.predict(features))) predictions = np.ones(f.shape, np.int) predictions[np.where(f < 0)[0]] = -1 return predictions.tolist()
def build_tree(self, X, Y, w, depth, curr_depth): # See if we can do any splitting at all tree = Node() yw = Y*w if len(X)<2 or len(unique(Y)) < 2 or curr_depth >= depth: tree.stump = 1.0 if abs(sum(yw[yw>=0]))>abs(sum(yw[yw<0])) else -1.0 return tree # TODO: check for inconsistent data # Learn the decision stump stump = DecisionStump().fit(X,Y,w) side1 = stump.predict(X)>=0 side2 = stump.predict(X)<0 tree.stump = stump tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth, curr_depth+1) tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth, curr_depth+1) return tree
def train(self, features: List[List[float]], labels: List[int]): X = np.array(features) N = X.shape[0] w = np.full((N), 1 / N) labels = np.array(labels) hx = np.array((N)) for t in range(self.T): #step3 min = 9223372036854775807 for clf in self.clfs: decstump = DecisionStump(clf.s, clf.b, clf.d) hxpred = np.array(decstump.predict(features)) indicator = np.zeros((N)) indicator[np.where(labels != hxpred)[0]] = 1 check = np.sum(np.multiply(w, indicator)) if check < min: min_clf = clf hx = hxpred min = check self.clfs_picked.append(min_clf) error = 0 for i in range(N): if labels[i] != hx[i]: error = error + w[i] beta = (1 / 2) * np.log((1 - error) / error) self.betas.append(beta) for i in range(N): if labels[i] == hx[i]: w[i] = w[i] * np.exp((-1) * self.betas[t]) else: w[i] = w[i] * np.exp(self.betas[t]) w_sum = np.sum(w) w = w / w_sum
def build_tree(self, X, Y, w, depth, curr_depth): # See if we can do any splitting at all tree = Node() yw = Y * w if len(X) < 2 or len(unique(Y)) < 2 or curr_depth >= depth: tree.stump = 1.0 if abs(sum(yw[yw >= 0])) > abs(sum( yw[yw < 0])) else -1.0 return tree # TODO: check for inconsistent data # Learn the decision stump stump = DecisionStump().fit(X, Y, w) side1 = stump.predict(X) >= 0 side2 = stump.predict(X) < 0 tree.stump = stump tree.left = self.build_tree(X[side1], Y[side1], w[side1], depth, curr_depth + 1) tree.right = self.build_tree(X[side2], Y[side2], w[side2], depth, curr_depth + 1) return tree
choices=["1.1", "1.2", "1.3", "1.4", "1.5"]) io_args = parser.parse_args() module = io_args.module # Decision Stump using inequalities/threshold if module == "1.1": # 1. Load citiesSmall dataset dataset = load_dataset("citiesSmall.pkl") X = dataset["X"] y = dataset["y"] # 2. Evaluate decision stump model = DecisionStump() model.fit(X, y) y_pred = model.predict(X) error = np.mean(y_pred != y) print("Decision Stump with inequality rule error: %.3f" % error) # PLOT RESULT utils.plotClassifier(model, X, y) fname = os.path.join("..", "figs", "decision_stump_boundary.pdf") plt.savefig(fname) print("\nFigure saved as '%s'" % fname) # Simple decision tree using decision stumps elif module == "1.2": # 1. Load citiesSmall dataset dataset = load_dataset("citiesSmall.pkl")