return Xtrain, Xtest, Ytrain, Ytest if __name__ == '__main__': # np.random.seed(3) Xtrain, Xtest, Ytrain, Ytest = get_data() print("Possible labels:", set(Ytrain)) # Make sure the targets are (-1, +1) Ytrain[Ytrain == 0] = -1 Ytest[Ytest == 0] = -1 # Scale the data scaler = StandardScaler() Xtrain = scaler.fit_transform(Xtrain) Xtest = scaler.transform(Xtest) # Now we'll use our custom implementation model = SVM(kernel=linear) t0 = datetime.now() model.fit(Xtrain, Ytrain) print("train duration:", datetime.now() - t0) t0 = datetime.now() print("train score:", model.score(Xtrain, Ytrain), "duration:", datetime.now() - t0) t0 = datetime.now() print("test score:", model.score(Xtest, Ytest), "duration:", datetime.now() - t0) if Xtrain.shape[1] == 2: plot_decision_boundary(model)
X[250:375] += np.array([-sep, -sep]) X[375:] += np.array([-sep, sep]) Y = np.array([0]*125 + [1]*125 + [0]*125 + [1]*125) # plot the data plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5) plt.show() # lone decision tree model = DecisionTreeClassifier() model.fit(X, Y) print "score for 1 tree:", model.score(X, Y) # plot data with boundary plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5) plot_decision_boundary(X, model) plt.show() # create the bagged model class BaggedTreeClassifier: def __init__(self, B): self.B = B def fit(self, X, Y): N = len(X) self.models = [] for b in xrange(self.B): idx = np.random.choice(N, size=N, replace=True) Xb = X[idx] Yb = Y[idx]
loss = data_loss + reg_loss if i%10==0: print("iteration {}: loss:{}".format(i, loss)) dz = probs dz[range(M), Y] -= 1 # softmax的loss, Zj=c时是 Pj-1,Zj!=c时是Pj dw = np.dot(X.T, dz) db = np.sum(dz, axis=0, keepdims=True) dw += reg*W W -= step_size * dw b -= step_size * db print(W) print(b) def predict(X, W, b): scores = np.dot(X, W) + b predicted_class = np.argmax(scores, axis=1) return predicted_class ### 评测训练数据 predicted_class = predict(X, W, b) print('training accuracy: %.2f' % (np.mean(predicted_class == Y))) util.plot_decision_boundary(X, Y, lambda x:predict(x, W, b)) plt.show()
dw2 = np.dot(hidden.T, dz2) dh = np.dot(dz2, W2.T) # backprob for the next layer dz1 = dh dz1[hidden <= 0] = 0.0 # backprop relu dw1 = np.dot(X.T, dz1) db1 = np.sum(dz1, axis=0, keepdims=True) dw2 += reg * W2 dw1 += reg * W1 W2 -= step_size * dw2 b2 -= step_size * db2 W1 -= step_size * dw1 b1 -= step_size * db1 def predict(X, W1, b1, W2, b2): h = np.maximum(0.0, np.dot(X, W1) + b1) scores = np.dot(h, W2) + b2 predicted_class = np.argmax(scores, axis=1) return predicted_class ### 评测训练数据 predicted_class = predict(X, W1, b1, W2, b2) print('training accuracy: %.2f' % (np.mean(predicted_class == Y))) util.plot_decision_boundary(X, Y, lambda x: predict(x, W1, b1, W2, b2)) plt.show()
axes = plt.gca() axes.set_xlim([-1.5,1.5]) axes.set_ylim([-1.5,1.5]) plot_decision_boundary(lambda x: predict_dec(params, x.T), train_X, train_Y) ''' def initialize_parameters_he(layer_dims): np.random.seed(3) params = {} L = len(layer_dims) for l in range(1, L): params['W' + str(l)] = np.random.randn( layer_dims[l], layer_dims[l - 1]) * np.sqrt(2 / layer_dims[l - 1]) params['b' + str(l)] = np.zeros((layer_dims[l], 1)) return params params = model(train_X, train_Y, initialization='he', print_cost=True) print('on the train set') predictions_train = predict(train_X, train_Y, params) print('on the test set') predictions_train = predict(test_X, test_Y, params) plt.title("Model with large random initialization") axes = plt.gca() axes.set_xlim([-1.5, 1.5]) axes.set_ylim([-1.5, 1.5]) plot_decision_boundary(lambda x: predict_dec(params, x.T), train_X, train_Y)
rnd_clf.feature_importances_): print(name, score) """ Boost """ if False: """ AdaBoost """ ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm="SAMME.R", learning_rate=0.5, random_state=42) ada_clf.fit(X_train, y_train) util.plot_decision_boundary(ada_clf, X, y) if True: """ Gradient Boosting """ np.random.seed(42) X = np.random.rand(100, 1) - 0.5 y = 3 * X[:, 0]**2 + 0.05 * np.random.randn(100) X_new = np.array([[0.8]]) # tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42) # tree_reg1.fit(X, y) # y2 = y - tree_reg1.predict(X) # tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42) # tree_reg2.fit(X, y2)