Beispiel #1
0
  return Xtrain, Xtest, Ytrain, Ytest


if __name__ == '__main__':
  # np.random.seed(3)
  Xtrain, Xtest, Ytrain, Ytest = get_data()
  print("Possible labels:", set(Ytrain))

  # Make sure the targets are (-1, +1)
  Ytrain[Ytrain == 0] = -1
  Ytest[Ytest == 0] = -1

  # Scale the data
  scaler = StandardScaler()
  Xtrain = scaler.fit_transform(Xtrain)
  Xtest = scaler.transform(Xtest)

  # Now we'll use our custom implementation
  model = SVM(kernel=linear)

  t0 = datetime.now()
  model.fit(Xtrain, Ytrain)
  print("train duration:", datetime.now() - t0)
  t0 = datetime.now()
  print("train score:", model.score(Xtrain, Ytrain), "duration:", datetime.now() - t0)
  t0 = datetime.now()
  print("test score:", model.score(Xtest, Ytest), "duration:", datetime.now() - t0)

  if Xtrain.shape[1] == 2:
    plot_decision_boundary(model)
X[250:375] += np.array([-sep, -sep])
X[375:] += np.array([-sep, sep])
Y = np.array([0]*125 + [1]*125 + [0]*125 + [1]*125)

# plot the data
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
plt.show()

# lone decision tree
model = DecisionTreeClassifier()
model.fit(X, Y)
print "score for 1 tree:", model.score(X, Y)

# plot data with boundary
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
plot_decision_boundary(X, model)
plt.show()


# create the bagged model
class BaggedTreeClassifier:
  def __init__(self, B):
    self.B = B

  def fit(self, X, Y):
    N = len(X)
    self.models = []
    for b in xrange(self.B):
      idx = np.random.choice(N, size=N, replace=True)
      Xb = X[idx]
      Yb = Y[idx]
Beispiel #3
0
      loss = data_loss + reg_loss

      if i%10==0:
        print("iteration {}: loss:{}".format(i, loss))

      dz = probs
      dz[range(M), Y] -= 1 # softmax的loss, Zj=c时是 Pj-1,Zj!=c时是Pj
      dw = np.dot(X.T, dz)
      db = np.sum(dz, axis=0, keepdims=True)
      
      dw += reg*W

      W -= step_size * dw
      b -= step_size * db

print(W)
print(b)

def predict(X, W, b):
  scores = np.dot(X, W) + b
  predicted_class = np.argmax(scores, axis=1)
  return predicted_class


### 评测训练数据
predicted_class = predict(X, W, b)
print('training accuracy: %.2f' % (np.mean(predicted_class == Y)))

util.plot_decision_boundary(X, Y, lambda x:predict(x, W, b))
plt.show()
Beispiel #4
0
    dw2 = np.dot(hidden.T, dz2)
    dh = np.dot(dz2, W2.T)  # backprob for the next layer

    dz1 = dh
    dz1[hidden <= 0] = 0.0  # backprop relu
    dw1 = np.dot(X.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)

    dw2 += reg * W2
    dw1 += reg * W1

    W2 -= step_size * dw2
    b2 -= step_size * db2
    W1 -= step_size * dw1
    b1 -= step_size * db1


def predict(X, W1, b1, W2, b2):
    h = np.maximum(0.0, np.dot(X, W1) + b1)
    scores = np.dot(h, W2) + b2
    predicted_class = np.argmax(scores, axis=1)
    return predicted_class


### 评测训练数据
predicted_class = predict(X, W1, b1, W2, b2)
print('training accuracy: %.2f' % (np.mean(predicted_class == Y)))

util.plot_decision_boundary(X, Y, lambda x: predict(x, W1, b1, W2, b2))
plt.show()
Beispiel #5
0
axes = plt.gca()
axes.set_xlim([-1.5,1.5])
axes.set_ylim([-1.5,1.5])
plot_decision_boundary(lambda x: predict_dec(params, x.T), train_X, train_Y)
'''


def initialize_parameters_he(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)

    for l in range(1, L):
        params['W' + str(l)] = np.random.randn(
            layer_dims[l], layer_dims[l - 1]) * np.sqrt(2 / layer_dims[l - 1])
        params['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return params


params = model(train_X, train_Y, initialization='he', print_cost=True)
print('on the train set')
predictions_train = predict(train_X, train_Y, params)
print('on the test set')
predictions_train = predict(test_X, test_Y, params)

plt.title("Model with large random initialization")
axes = plt.gca()
axes.set_xlim([-1.5, 1.5])
axes.set_ylim([-1.5, 1.5])
plot_decision_boundary(lambda x: predict_dec(params, x.T), train_X, train_Y)
Beispiel #6
0
                           rnd_clf.feature_importances_):
        print(name, score)
"""
Boost
"""
if False:
    """
    AdaBoost
    """
    ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                                 n_estimators=200,
                                 algorithm="SAMME.R",
                                 learning_rate=0.5,
                                 random_state=42)
    ada_clf.fit(X_train, y_train)
    util.plot_decision_boundary(ada_clf, X, y)

if True:
    """
    Gradient Boosting
    """
    np.random.seed(42)
    X = np.random.rand(100, 1) - 0.5
    y = 3 * X[:, 0]**2 + 0.05 * np.random.randn(100)
    X_new = np.array([[0.8]])

    # tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
    # tree_reg1.fit(X, y)
    # y2 = y - tree_reg1.predict(X)
    # tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
    # tree_reg2.fit(X, y2)