def test_exp(): x1 = ad.Variable(name = "x1") y = 1 + 2 * ad.exp_op(ad.log_op(x1)) x1_val = np.ones((2, 1)) grad_y, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_y]) y_val, grad_y_val, = executor.run(feed_dict = {x1: x1_val}) assert np.array_equal(y_val, 3 * np.ones_like(y_val)) assert np.array_equal(grad_y_val, 2 * np.ones_like(grad_y_val))
def softmax_ce_loss(preds, truth): """ calculate the softmax and xent loss in a more efficient way :param preds: the pred is the output of the model :param truth: the true label, a one-hot vector :return: the loss """ pred_max = ad.max_op(preds) preds_shift = ad.add_byscalar_op(ad.neg_op(pred_max), x) exps = ad.exp_op(preds_shift) return minus_op(ad.log_op(ad.sum_op(exps)), ad.sum_op(ad.mul_op(preds_shift, truth)))
def test_exp(): # P x2 = ad.Variable(name="x2") y = ad.exp_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(x2_val)) assert np.array_equal(grad_x2_val, np.exp(x2_val))
def test_exp(): x2 = ad.Variable(name="x2") y = ad.exp_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) epsilon = 1e-6 zero_arr = np.zeros(3) + epsilon assert isinstance(y, ad.Node) assert np.all(np.less_equal(np.abs(y_val - np.exp(x2_val)), zero_arr)) assert np.all(np.less_equal(np.abs(grad_x2_val - np.exp(x2_val)), zero_arr))
def test(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = (ad.sin_op(x1 + 1) + ad.cos_op(2 * x2)) * ad.tan_op(ad.log_op(x3)) + ( ad.sin_op(x2 + 1)) + ad.cos_op(2 * x1) * ad.exp_op(1 + ad.sin_op(x3)) grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(1) x2_val = 2 * np.ones(1) x3_val = 3 * np.ones(1) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) print('x1=', x1_val[0]) print('x2=', x2_val[0]) print('x3=', x3_val[0]) print('---------------------------------------------------------------') print('y0_val=', y_val[0]) print('grad_x1_val= ', grad_x1_val[0]) print('grad_x2_val= ', grad_x2_val[0]) print('grad_x3_val= ', grad_x3_val[0]) print('---------------------------------------------------------------') y_numerical, grad_numerical = numerical_diff(f, [x1_val, x2_val, x3_val], 1e-10) print('y0_numerical= ', y_numerical) grad_numerical_x1, grad_numerical_x2, grad_numerical_x3 = grad_numerical[ 0], grad_numerical[1], grad_numerical[2] print('grad_numerical_x1 =', grad_numerical_x1) print('grad_numerical_x2 =', grad_numerical_x2) print('grad_numerical_x3 =', grad_numerical_x3) print('---------------------------------------------------------------') print('gradients Offset:') print('x1:', abs(grad_x1_val - grad_numerical_x1)) assert abs(grad_x1_val - grad_numerical_x1) < 1e-5 print('x2:', abs(grad_x2_val - grad_numerical_x2)) assert abs(grad_x2_val - grad_numerical_x2) < 1e-5 print('x3:', abs(grad_x3_val - grad_numerical_x3)) assert abs(grad_x3_val - grad_numerical_x3) < 1e-5
def test_sigmoid(): x2 = ad.Variable(name="x2") y = 1 / (1 + ad.exp_op(-1 * x2)) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert isinstance(grad_x2, ad.Node) epsilon = 1e-10 zero_arr = np.zeros(3) + epsilon assert np.all( np.less_equal(np.abs(1 / (1 + np.exp(-1 * x2_val)) - y_val), zero_arr)) print(grad_x2_val) print(y_val * (1 - y_val)) assert np.all( np.less_equal(np.abs(grad_x2_val - y_val * (1 - y_val)), zero_arr))
def fit(self, X, Y): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') p = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) # cross entropy loss = 0 - y * ad.log_op(p) - (1 - y) * ad.log_op(1 - p) grad_w, = ad.gradients(loss, [w]) # SGD length = np.shape(X)[0] self.num_feature = np.shape(X)[1] executor = ad.Executor([loss, grad_w]) self.coef_ = np.random.rand(1, self.num_feature) / 1000.0 for i in range(self.maxiter): grad = np.zeros((1, self.num_feature)) loss = 0 for j in range(self.batch): t = random.choice(range(length)) x_val = X[t].reshape((self.num_feature, 1)) if Y[t] == self.labels[0]: y_val = 0 else: y_val = 1 loss_val, grad_w_val = executor.run(feed_dict={ x: x_val, w: self.coef_, y: y_val }) grad = grad + grad_w_val loss = loss + loss_val self.coef_ = self.coef_ - self.learning_rate * grad / self.batch if i % 100 == 0: print(loss)
def get_model_params(x_train, y_train, class_1, class_2): '''returns the weights after performing gradient descdent''' learning_rate = 0.01 batch_size = 8 x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') logistic_regression = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) cross_entropy = -1 * y * ad.log_op(logistic_regression) - (1 - y) * ad.log_op(1 - logistic_regression) gradients = ad.gradients(cross_entropy, [w])[0] executor = ad.Executor([cross_entropy, gradients]) weights = np.random.rand(1, np.shape(x_train)[1]) / 1000.0 #batch = 0 #previous_loss = 0 for i in range(5000): grad = np.zeros((1, np.shape(x_train)[1])) loss = 0 #go ramdomly over examples in each batch for _ in range(batch_size): t = random.choice(range(np.shape(x_train)[0])) x_flat = x_train[t].reshape((np.shape(x_train)[1], 1)) y_label = 0 if y_train[t] == class_1 else 1 loss_delta, grad_delta = executor.run(feed_dict={x : x_flat, w : weights, y : y_label}) grad += grad_delta loss += loss_delta weights = weights - (learning_rate * grad / batch_size) if i % 1000 == 0: print("loss = {:.3f} loss_delta = {:.3f}".format(loss[0][0], loss_delta[0][0])) return weights
w = ad.Variable(name="w") b = ad.Variable(name="b") labels = ad.Variable(name="lables") # Computation graph def cross_entropy(output, labels): loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(output) + (1.0 - labels) * ad.log_op(1.0 - output), axis=1) return loss # Output of the hypothesis of logistic regression p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x)))) # Loss node loss = cross_entropy(p, labels) # Gradient node of loss corresponding to w grad_y_w, = ad.gradients(loss, [w]) num_features = 2 num_points = 200 num_iterations = 1000 learning_rate = 0.01 # The dummy dataset consists of two classes. # The classes are modelled as a random normal variables with different means. class_1 = np.random.normal(2, 0.1, (int(num_points / 2), num_features)) class_2 = np.random.normal(4, 0.1, (int(num_points / 2), num_features))
def get_logistic_model(x, weight, bias): y = 1 / (1 + ad.exp_op(-1 * (ad.matmul_op(x, weight, trans_B=True) + bias))) #y = 1 / (1+ad.exp_op(-1 * (ad.mul_op(x, weight)+bias))) return y
import autodiff as ad import numpy as np w = ad.Variable(name="w") x = ad.Variable(name="x") b = ad.Variable(name="b") labels = ad.Variable(name="lables") out = 1.0 / (1.0 + ad.exp_op((-1.0 * (w * x + b)))) ce_loss = -1.0 * ((labels * ad.log_op(out)) + ((1.0 - labels) * ad.log_op(1.0 - out))) grad_w, grad_b = ad.gradients(ce_loss, [w, b]) # weights our model initially starts at w_val = 10 b_val = 1 # weights our model should reach to w_required = 5 b_required = 20 # we are simulating the training dataset for logistic regression # taking x as a continuous array from -10 to 10 with step size of 0.01 x_val = np.arange(-10, 6, 0.01) # finding the labels by doing the exact calculation of logistic regression using numpy labels_val = 1 / (1 + np.exp(-(w_required * x_val + b_required)))
def softmax(x): x_max = ad.max_op(x) x_shift = ad.add_byscalar_op(ad.neg_op(x_max), x) exps = ad.exp_op(x_shift) return ad.mul_byscalar_op(ad.reciprocal_op(ad.sum_op(exps)), exps)