def main(): # Generate dataset and initial weight x_t, y_t = generate_dataset(1, 1, -5, point_num=100) # add extra dim to build homogenous coordinates x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1) W_val = np.random.rand(3, 1) # draw initial decision superplane draw(W_val, x_t, y_t) # Create the model x = ad.Variable(name='x') W = ad.Variable(name='W') y = ad.sigmoid_op(ad.matmul_op(x, W)) # Define loss y_ = ad.Variable(name='y_') cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op( y_ * ad.log_op(y) + (1 - y_) * ad.log_op(1 - y), reduction_indices=[1])) # Update rule learning_rate = 0.5 W_grad, = ad.gradients(cross_entropy, [W]) W_train_step = W - learning_rate * W_grad # Training executor = ad.Executor([cross_entropy, y, W_train_step]) steps = 200 plt.ion() for i in range(steps): plt.cla() loss_val, y_val, W_val = executor.run(feed_dict={ x: x_t, y_: y_t, W: W_val, }) print("Step {}: loss: {}".format(i + 1, loss_val)) # draw trained decision superplane draw(W_val, x_t, y_t) plt.pause(0.1) plt.ioff() plt.show()
def test_exp(): x1 = ad.Variable(name = "x1") y = 1 + 2 * ad.exp_op(ad.log_op(x1)) x1_val = np.ones((2, 1)) grad_y, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_y]) y_val, grad_y_val, = executor.run(feed_dict = {x1: x1_val}) assert np.array_equal(y_val, 3 * np.ones_like(y_val)) assert np.array_equal(grad_y_val, 2 * np.ones_like(grad_y_val))
def softmax_ce_loss(preds, truth): """ calculate the softmax and xent loss in a more efficient way :param preds: the pred is the output of the model :param truth: the true label, a one-hot vector :return: the loss """ pred_max = ad.max_op(preds) preds_shift = ad.add_byscalar_op(ad.neg_op(pred_max), x) exps = ad.exp_op(preds_shift) return minus_op(ad.log_op(ad.sum_op(exps)), ad.sum_op(ad.mul_op(preds_shift, truth)))
def test_log(): x1 = ad.Variable(name = "x1") x2 = ad.Variable(name = "x2") y = ad.log_op(x1) / x2 grad_y, = ad.gradients(y, [x1]) x1_val = 2 * np.ones((2, 1)) x2_val = np.ones((1,1)) executor = ad.Executor([y, grad_y]) y_val, grad_y_val, = executor.run(feed_dict = {x1: x1_val, x2:x2_val}) assert np.array_equal(y_val, np.log(x1_val)) assert np.array_equal(grad_y_val, 0.5 * np.ones_like(grad_y_val))
def test_log(): # P x2 = ad.Variable(name="x2") y = ad.log_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = np.linspace(0.1, 2, 5) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.log(x2_val)) assert np.array_equal(grad_x2_val, 1 / x2_val)
def fit(self, X, Y): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') p = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) # cross entropy loss = 0 - y * ad.log_op(p) - (1 - y) * ad.log_op(1 - p) grad_w, = ad.gradients(loss, [w]) # SGD length = np.shape(X)[0] self.num_feature = np.shape(X)[1] executor = ad.Executor([loss, grad_w]) self.coef_ = np.random.rand(1, self.num_feature) / 1000.0 for i in range(self.maxiter): grad = np.zeros((1, self.num_feature)) loss = 0 for j in range(self.batch): t = random.choice(range(length)) x_val = X[t].reshape((self.num_feature, 1)) if Y[t] == self.labels[0]: y_val = 0 else: y_val = 1 loss_val, grad_w_val = executor.run(feed_dict={ x: x_val, w: self.coef_, y: y_val }) grad = grad + grad_w_val loss = loss + loss_val self.coef_ = self.coef_ - self.learning_rate * grad / self.batch if i % 100 == 0: print(loss)
def get_model_params(x_train, y_train, class_1, class_2): '''returns the weights after performing gradient descdent''' learning_rate = 0.01 batch_size = 8 x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') logistic_regression = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x))) cross_entropy = -1 * y * ad.log_op(logistic_regression) - (1 - y) * ad.log_op(1 - logistic_regression) gradients = ad.gradients(cross_entropy, [w])[0] executor = ad.Executor([cross_entropy, gradients]) weights = np.random.rand(1, np.shape(x_train)[1]) / 1000.0 #batch = 0 #previous_loss = 0 for i in range(5000): grad = np.zeros((1, np.shape(x_train)[1])) loss = 0 #go ramdomly over examples in each batch for _ in range(batch_size): t = random.choice(range(np.shape(x_train)[0])) x_flat = x_train[t].reshape((np.shape(x_train)[1], 1)) y_label = 0 if y_train[t] == class_1 else 1 loss_delta, grad_delta = executor.run(feed_dict={x : x_flat, w : weights, y : y_label}) grad += grad_delta loss += loss_delta weights = weights - (learning_rate * grad / batch_size) if i % 1000 == 0: print("loss = {:.3f} loss_delta = {:.3f}".format(loss[0][0], loss_delta[0][0])) return weights
def test_log(): x2 = ad.Variable(name="x2") y = ad.log_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) epsilon = 1e-6 zero_arr = np.zeros(3) + epsilon assert isinstance(y, ad.Node) assert np.all(np.less_equal(np.abs(y_val - np.log(x2_val)), zero_arr)) assert np.all(np.less_equal(np.abs(1 / x2_val - grad_x2_val), zero_arr))
def test(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = (ad.sin_op(x1 + 1) + ad.cos_op(2 * x2)) * ad.tan_op(ad.log_op(x3)) + ( ad.sin_op(x2 + 1)) + ad.cos_op(2 * x1) * ad.exp_op(1 + ad.sin_op(x3)) grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(1) x2_val = 2 * np.ones(1) x3_val = 3 * np.ones(1) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) print('x1=', x1_val[0]) print('x2=', x2_val[0]) print('x3=', x3_val[0]) print('---------------------------------------------------------------') print('y0_val=', y_val[0]) print('grad_x1_val= ', grad_x1_val[0]) print('grad_x2_val= ', grad_x2_val[0]) print('grad_x3_val= ', grad_x3_val[0]) print('---------------------------------------------------------------') y_numerical, grad_numerical = numerical_diff(f, [x1_val, x2_val, x3_val], 1e-10) print('y0_numerical= ', y_numerical) grad_numerical_x1, grad_numerical_x2, grad_numerical_x3 = grad_numerical[ 0], grad_numerical[1], grad_numerical[2] print('grad_numerical_x1 =', grad_numerical_x1) print('grad_numerical_x2 =', grad_numerical_x2) print('grad_numerical_x3 =', grad_numerical_x3) print('---------------------------------------------------------------') print('gradients Offset:') print('x1:', abs(grad_x1_val - grad_numerical_x1)) assert abs(grad_x1_val - grad_numerical_x1) < 1e-5 print('x2:', abs(grad_x2_val - grad_numerical_x2)) assert abs(grad_x2_val - grad_numerical_x2) < 1e-5 print('x3:', abs(grad_x3_val - grad_numerical_x3)) assert abs(grad_x3_val - grad_numerical_x3) < 1e-5
def cross_entropy(output, labels): loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(output) + (1.0 - labels) * ad.log_op(1.0 - output), axis=1) return loss
import numpy as np import autodiff as ad x = ad.Variable(name = "x") w = ad.Variable(name = "w") b = ad.Variable(name = "b") labels = ad.Variable(name = "lables") # Define Computation graph p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x)))) loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(p) + (1.0 - labels) * ad.log_op(1.0 - p), axis = 1) grad_y_w, = ad.gradients(loss, [w]) num_features = 2 num_points = 500 num_iterations = 1000 learning_rate = 0.001 # The dummy dataset consists of two classes. # The classes are modelled as a random normal variables with different means. class_1 = np.random.normal(2, 0.1, (num_points / 2, num_features)) class_2 = np.random.normal(4, 0.1, (num_points / 2, num_features))
import numpy as np import autodiff as ad x = ad.Variable(name="x") w = ad.Variable(name="w") b = ad.Variable(name="b") labels = ad.Variable(name="lables") # Define Computation graph p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x)))) loss = -1.0 * ad.reduce_sum_op( labels * ad.log_op(p) + (1.0 - labels) * ad.log_op(1.0 - p), axis=1) grad_y_w, = ad.gradients(loss, [w]) num_features = 2 num_points = 200 num_iterations = 1000 learning_rate = 0.01 # The dummy dataset consists of two classes. # The classes are modelled as a random normal variables with different means. class_1 = np.random.normal(2, 0.1, (num_points / 2, num_features)) class_2 = np.random.normal(4, 0.1, (num_points / 2, num_features)) x_val = np.concatenate((class_1, class_2), axis=0).T x_val = np.concatenate((x_val, np.ones((1, num_points))), axis=0) w_val = np.random.normal(size=(1, num_features + 1))
import autodiff as ad import numpy as np w = ad.Variable(name="w") x = ad.Variable(name="x") b = ad.Variable(name="b") labels = ad.Variable(name="lables") out = 1.0 / (1.0 + ad.exp_op((-1.0 * (w * x + b)))) ce_loss = -1.0 * ((labels * ad.log_op(out)) + ((1.0 - labels) * ad.log_op(1.0 - out))) grad_w, grad_b = ad.gradients(ce_loss, [w, b]) # weights our model initially starts at w_val = 10 b_val = 1 # weights our model should reach to w_required = 5 b_required = 20 # we are simulating the training dataset for logistic regression # taking x as a continuous array from -10 to 10 with step size of 0.01 x_val = np.arange(-10, 6, 0.01) # finding the labels by doing the exact calculation of logistic regression using numpy labels_val = 1 / (1 + np.exp(-(w_required * x_val + b_required)))
import autodiff as ad import numpy as np # construct the computation graph x = ad.Variable(name="x") w = ad.Variable(name="w") y_ = ad.Variable(name="lables") prob = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x)))) loss = -1.0 * ad.reduce_sum_op( y_ * ad.log_op(prob) + (1.0 - y_) * ad.log_op(1.0 - prob), axis=1) # pay attention that there is a ',' grad_w, = ad.gradients(loss, [w]) # Data data1 = np.random.normal(1, 0.1, size=(100, 10)) data2 = np.random.normal(5, 0.4, size=(200, 10)) data = np.concatenate((data1, data2), axis=0).T x_val = np.concatenate((data, np.ones((1, 300))), axis=0) y_val = np.concatenate((np.zeros( (data1.shape[0], 1)), np.ones((data2.shape[0], 1))), axis=0).T # Variables w_val = np.random.normal(size=(1, 11)) # Params learning_rate = 0.0001 # Execute executor = ad.Executor([loss, grad_w])