def grad_sanity(): # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. # import sys #sys.path.append("C:\Shahar\BarIlan\NLP-courses\89687-DL\Assignment1\code\loglinear.py") #print(sys.path) #from .grad_check import gradient_check global W, b W, b = ll.create_classifier(3, 6) b = np.array(b, ndmin=2) def _loss_and_W_grad(W): global b x = np.array([[1, 2, 3]], np.double) loss, grads = ll.loss_and_gradients(x, 0, [W, b]) return loss, grads[0] def _loss_and_b_grad(b): global W x = np.array([[1, 2, 3]], np.double) loss, grads = ll.loss_and_gradients(x, 0, [W, b]) return loss, grads[1] for _ in range(10): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0], b.shape[1]) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_W_grad, W)
def main(): data, target = load_digits(return_X_y=True) data, target = shuffle(data, target) target = target.reshape(len(target), 1) enc = sklearn.preprocessing.OneHotEncoder() enc.fit(target) target = enc.transform(target).toarray() data = data / 16.0 # VERY IMPORTANT: ALWAYS SCALE DATA # import ipdb; ipdb.set_trace() loss = MSE("mse_loss") sgd_optimizer = SGD() sgd_optimizer.alpha = 0.1 model = Model(name="mnist_test", loss_layer=loss, optimizer=sgd_optimizer) model.add(Dense(n_in=64, n_out=32, name="dense1")) model.add(Sigmoid(name="act1")) model.add(Dense(n_in=32, n_out=10, name="dense2")) model.add(Sigmoid(name="act2")) from grad_check import gradient_check model.feature_size = 64 gradient_check(model) for epoch in range(500): print("Epoch: {}".format(epoch)) epoch_loss = [] for start_idx in range(0, len(data), 25): #batching end_idx = min(len(data), start_idx + 25) batch_x = data[start_idx:end_idx, :] batch_y = target[start_idx:end_idx, :] # forward -> backward -> loss _ = model.do_forward(batch_x) batch_loss = model.do_loss(batch_y) model.do_backward() model.do_update() epoch_loss.append(batch_loss) print("Loss: {}".format(sum(epoch_loss) / len(epoch_loss))) model.optimizer.alpha = model.optimizer.alpha # Predict data_test, target_test = data[:200], target[:200] y_preds = model.do_forward(data_test) target_test = np.argmax(target_test, axis=1) y_preds = np.argmax(y_preds, axis=1) print((y_preds == target_test).mean())
def sanity_check(): from grad_check import gradient_check W, b, U, b_tag = create_classifier(3, 3, 4) def _loss_and_W_grad(W): global b global U global b_tag loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[0] def _loss_and_b_grad(b): global W global U global b_tag loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U): global W global b global b_tag loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): global W global b global U loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[3] for _ in xrange(10): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) U = np.random.randn(W.shape[0], W.shape[1]) b_tag = np.random.randn(b.shape[0]) print 'W:' gradient_check(_loss_and_W_grad, W) print 'b:' gradient_check(_loss_and_b_grad, b) print 'U:' gradient_check(_loss_and_U_grad, U) print 'b_tag:' gradient_check(_loss_and_b_tag_grad, b_tag)
def mlpn_grad_sanity(): # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. # import sys #sys.path.append("C:\Shahar\BarIlan\NLP-courses\89687-DL\Assignment1\code\loglinear.py") #print(sys.path) #from .grad_check import gradient_check W, b, U, b_tag = mlpn.create_classifier([3, 4, 6]) def _loss_and_W_grad(W): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[0] def _loss_and_b_grad(b): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[3] for _ in range(1): W = randomize_array(W) b = randomize_array(b) U = randomize_array(U) b_tag = randomize_array(b_tag) print("b_tag") gradient_check(_loss_and_b_tag_grad, b_tag) print("U:") gradient_check(_loss_and_U_grad, U) print("b:") gradient_check(_loss_and_b_grad, b) print("W:") gradient_check(_loss_and_W_grad, W)
W, b = create_classifier([3, 4]) def _loss_and_W_grad(W): global b loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[0] def _loss_and_b_grad(b): global W loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[1] for _ in range(10): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_W_grad, W) W1, b1, W2, b2, W3, b3 = create_classifier([3, 20, 30, 4]) def _loss_and_W1_grad(W1): global b1, W2, b2, W3, b3 loss, grads = loss_and_gradients([1, 2, 3], 0, [W1, b1, W2, b2, W3, b3]) return loss, grads[0] def _loss_and_b1_grad(b1): global W1, W2, b2, W3, b3 loss, grads = loss_and_gradients([1, 2, 3], 0, [W1, b1, W2, b2, W3, b3]) return loss, grads[1]
def _loss_and_b_grad(b): global W, U, b_tag loss, grads = loss_and_gradients([1, 2], 0, [W, b, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U): global W, b, b_tag loss, grads = loss_and_gradients([1, 2], 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): global W, U, b loss, grads = loss_and_gradients([1, 2], 0, [W, b, U, b_tag]) return loss, grads[3] for _ in range(10): W = np.random.randn(W.shape[0], W.shape[1]) U = np.random.randn(U.shape[0], U.shape[1]) b = np.random.randn(b.shape[0]) b_tag = np.random.randn(b_tag.shape[0]) gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_U_grad, U) gradient_check(_loss_and_b_tag_grad, b_tag)
if __name__ == '__main__': # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check dims = [5, 4, 7, 3] params = create_classifier(dims) def _loss_and_p_grad(p): """ General function - return loss and the gradients with respect to parameter p """ params_to_send = np.copy(params) par_num = 0 for i in range(len(params)): if p.shape == params[i].shape: params_to_send[i] = p par_num = i loss, grads = loss_and_gradients(np.array(range(dims[0])), 0, params_to_send) return loss, grads[par_num] for _ in xrange(10): my_params = create_classifier(dims) for p in my_params: gradient_check(_loss_and_p_grad, p)
[W, b, U1, b_tag1, U2, b_tag2]) return loss, grads[2] def _loss_and_b_tag1_grad(b_tag1): global W, b, U1, U2, b_tag2 loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U1, b_tag1, U2, b_tag2]) return loss, grads[3] def _loss_and_U2_grad(U2): global W, b, U1, b_tag1, b_tag2 loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U1, b_tag1, U2, b_tag2]) return loss, grads[4] def _loss_and_b_tag2_grad(b_tag2): global W, b, U1, b_tag1, U2 loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U1, b_tag1, U2, b_tag2]) return loss, grads[5] for _ in xrange(10): W, b, U1, b_tag1, U2, b_tag2 = create_classifier([3, 5, 7, 9]) gradient_check(_loss_and_b_tag2_grad, b_tag2) gradient_check(_loss_and_U2_grad, U2) gradient_check(_loss_and_b_tag1_grad, b_tag1) gradient_check(_loss_and_U1_grad, U1) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_W_grad, W)
def check(): # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check W1, b1, W2, b2, W3, b3 = init_net([3, 4, 8, 5]) def _loss_and_W1_grad(W1): global b1, W2, b2, W3, b3 loss, grads = backprop([-7.3, 5, 0], 0, [W1, b1, W2, b2, W3, b3]) return loss, grads[0] def _loss_and_b1_grad(b1): global W1, W2, b2, W3, b3 loss, grads = backprop([-9, 22, 3.2], 2, [W1, b1, W2, b2, W3, b3]) return loss, grads[1] def _loss_and_W2_grad(W2): global W1, b1, b2, W3, b3 loss, grads = backprop([-1, 7, 4], 1, [W1, b1, W2, b2, W3, b3]) return loss, grads[2] def _loss_and_b2_grad(b2): global W1, b1, W2, W3, b3 loss, grads = backprop([1, 2, 3], 0, [W1, b1, W2, b2, W3, b3]) return loss, grads[3] def _loss_and_W3_grad(W3): global W1, b1, W2, b2, b3 loss, grads = backprop([-1, 78, 4], 1, [W1, b1, W2, b2, W3, b3]) return loss, grads[4] def _loss_and_b3_grad(b3): global W1, b1, W2, b2, W3 loss, grads = backprop([1, 2, 7.25], 3, [W1, b1, W2, b2, W3, b3]) return loss, grads[5] for _ in range(10): W1 = np.random.randn(W1.shape[0], W1.shape[1]) b1 = np.random.randn(b1.shape[0]) W2 = np.random.randn(W2.shape[0], W2.shape[1]) b2 = np.random.randn(b2.shape[0]) W3 = np.random.randn(W3.shape[0], W3.shape[1]) b3 = np.random.randn(b3.shape[0]) gradient_check(_loss_and_b1_grad, b1) gradient_check(_loss_and_W1_grad, W1) gradient_check(_loss_and_b2_grad, b2) gradient_check(_loss_and_W2_grad, W2) gradient_check(_loss_and_b3_grad, b3) gradient_check(_loss_and_W3_grad, W3)
test2 = softmax(np.array([1001, 1002])) print test2 assert np.amax(np.fabs(test2 - np.array([0.26894142, 0.73105858]))) <= 1e-6 test3 = softmax(np.array([-1001, -1002])) print test3 assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6 # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check W1, b1 = create_classifier(3, 4) def _loss_and_W_grad(W): global b1 loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[0] def _loss_and_b_grad(b): global W1 loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[1] for _ in xrange(1000): W1 = np.random.randn(W1.shape[0], W1.shape[1]) b1 = np.random.randn(b1.shape[0]) gradient_check(_loss_and_b_grad, b1) gradient_check(_loss_and_W_grad, W1)
test3 = softmax(np.array([-1001, -1002])) print(test3) assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6 # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check W, b = create_classifier(3, 4) def _loss_and_W_grad(W): global b loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[0] def _loss_and_b_grad(b): global W loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[1] for _ in xrange(1000): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) # result = gradient_check(_loss_and_b_grad, b) # if (not result): # print "ERROR" # exit() if (not gradient_check(_loss_and_W_grad, W)): print("ERROR") exit()
def _loss_and_W1_grad(W1): global b1 loss, grads = loss_and_gradients(np.array([1, 2, 3]), 0, [W1, b1, W2, b2]) return loss, grads[0] def _loss_and_b1_grad(b1): global W1 loss, grads = loss_and_gradients(np.array([1, 2, 3]), 0, [W1, b1, W2, b2]) return loss, grads[1] def _loss_and_b2_grad(b2): loss, grads = loss_and_gradients(np.array([1, 2, 3]), 0, [W1, b1, W2, b2]) return loss, grads[3] for _ in xrange(10): W1 = np.random.randn(W1.shape[0], W1.shape[1]) b1 = np.random.randn(b1.shape[0]) W2 = np.random.randn(W2.shape[0], W2.shape[1]) b2 = np.random.randn(b2.shape[0]) loss, grads = loss_and_gradients(np.array([1, 2, 3]), 0, [W1, b1, W2, b2]) gradient_check(_loss_and_W2_grad, W2) gradient_check(_loss_and_W1_grad, W1) gradient_check(_loss_and_b1_grad, b1) gradient_check(_loss_and_b2_grad, b2)
global b_tag global W global b loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): global U global W global b loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag]) return loss, grads[3] for _ in xrange(100): U = np.random.randn(U.shape[0], U.shape[1]) b_tag = np.random.randn(b_tag.shape[0]) b = np.random.randn(b.shape[0]) W = np.random.randn(W.shape[0], W.shape[1]) if (not gradient_check(_loss_and_b_grad, b)): print("ERROR") exit() if (not gradient_check(_loss_and_W_grad, W)): print("ERROR") exit() if (not gradient_check(_loss_and_U_grad, U)): print("ERROR") exit() if (not gradient_check(_loss_and_b_tag_grad, b_tag)): print("ERROR") exit()
def sanity_check(): from grad_check import gradient_check W, b, U, b_tag, V, b_t = create_classifier([3, 3, 4, 4]) def _loss_and_W_grad(W): global b global U global b_tag global V global b_t loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[0] def _loss_and_b_grad(b): global W global U global b_tag global V global b_t loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[1] def _loss_and_U_grad(U): global W global b global b_tag global V global b_t loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): global W global b global U global V global b_t loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[3] def _loss_and_V_grad(V): global W global b global U global b_tag global b_t loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[4] def _loss_and_b_t_grad(b_t): global W global b global U global b_tag global V loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b, U, b_tag, V, b_t]) return loss, grads[5] for _ in xrange(2): print _, '!!!!!!!!!!!!!!!!!!' W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) U = np.random.randn(U.shape[0], U.shape[1]) b_tag = np.random.randn(b_tag.shape[0]) V = np.random.randn(V.shape[0], V.shape[1]) b_t = np.random.randn(b_t.shape[0]) print 'W:' gradient_check(_loss_and_W_grad, W) print 'b:' gradient_check(_loss_and_b_grad, b) print 'U:' gradient_check(_loss_and_U_grad, U) print 'b_tag:' gradient_check(_loss_and_b_tag_grad, b_tag) print 'V:' gradient_check(_loss_and_V_grad, V) print 'bb:' gradient_check(_loss_and_b_t_grad, b_t)
U, W, b, b_prime = create_classifier(3, 2, 4) def _loss_and_U_grad(U): loss, grads = loss_and_gradients([1, 2, 3], 0, [U, W, b, b_prime]) return loss, grads[0] def _loss_and_W_grad(W): global b loss, grads = loss_and_gradients([1, 2, 3], 0, [U, W, b, b_prime]) return loss, grads[1] def _loss_and_b_grad(b): global W loss, grads = loss_and_gradients([1, 2, 3], 0, [U, W, b, b_prime]) return loss, grads[2] def _loss_and_bprime_grad(b_prime): loss, grads = loss_and_gradients([1, 2, 3], 0, [U, W, b, b_prime]) return loss, grads[3] for _ in xrange(10): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) U = np.random.randn(U.shape[0], U.shape[1]) b_prime = np.random.randn(b_prime.shape[0]) loss, grads = loss_and_gradients([1, 2, 3], 0, [U, W, b, b_prime]) gradient_check(_loss_and_U_grad, U) gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_bprime_grad, b_prime)
loss, grads = loss_and_gradients([1, 2, 3], 0, [U, b2, W, b1]) return loss, grads[2] def _loss_and_b1_grad(b1): loss, grads = loss_and_gradients([1, 2, 3], 1, [U, b2, W, b1]) return loss, grads[3] def _loss_and_U_grad(U): loss, grads = loss_and_gradients([1, 2, 3], 0, [U, b2, W, b1]) return loss, grads[0] def _loss_and_b2_grad(b2): loss, grads = loss_and_gradients([1, 2, 3], 1, [U, b2, W, b1]) return loss, grads[1] for _ in xrange(10): U = np.random.randn(U.shape[0], U.shape[1]) b1 = np.random.randn(b1.shape[0]) b2 = np.random.randn(b2.shape[0]) W = np.random.randn(W.shape[0], W.shape[1]) #set dropout_rate=0 before gradient test gradient_check(_loss_and_U_grad, U) gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_b1_grad, b1) gradient_check(_loss_and_b2_grad, b2)
test2 = softmax(np.array([1001, 1002])) print(test2) assert np.amax(np.fabs(test2 - np.array([0.26894142, 0.73105858]))) <= 1e-6 test3 = softmax(np.array([-1001, -1002])) print(test3) assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6 # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check W, b = create_classifier(3, 4) def _loss_and_W_grad(W): global b loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[0] def _loss_and_b_grad(b): global W loss, grads = loss_and_gradients([1, 2, 3], 0, [W, b]) return loss, grads[1] # for _ in xrange(10): for _ in range(10): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_W_grad, W)
[W, b, U, b_tag, W2, b2, W3, b3]) return loss, grads[7] def text_to_unigrams(text): return ["%s" % c1 for c1 in zip(text[1:])] for _ in range(100): W = np.random.randn(W.shape[0], W.shape[1]) b = np.random.randn(b.shape[0]) U = np.random.randn(U.shape[0], U.shape[1]) b_tag = np.random.randn(b_tag.shape[0]) W2 = np.random.randn(W2.shape[0], W2.shape[1]) b2 = np.random.randn(b2.shape[0]) W3 = np.random.randn(W3.shape[0], W3.shape[1]) b3 = np.random.randn(b3.shape[0]) gradient_check(_loss_and_b_grad, b) gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_U_grad, U) gradient_check(_loss_and_b_tag_grad, b_tag) gradient_check(_loss_and_W2_grad, W2) gradient_check(_loss_and_b2_grad, b2) gradient_check(_loss_and_W3_grad, W3) gradient_check(_loss_and_b3_grad, b3) # classier_params = create_classifier([5,10,15,20,30,10,5,3,32,11]) # for _ in range(10): # for i in range(0,len(classier_params) - 1,2): # s1 = classier_params[i].shape[0] # s2 = classier_params[i].shape[1] # classier_params[i] = np.random.rand(classier_params[i].shape[0],classier_params[i].shape[1]) # classier_params[i+1] = np.random.rand(classier_params[i+1].shape[0])