Beispiel #1
0
def main():
    # Generate dataset and initial weight
    x_t, y_t = generate_dataset(1, 1, -5, point_num=100)

    # add extra dim to build homogenous coordinates
    x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1)
    W_val = np.random.rand(3, 1)

    # draw initial decision superplane
    draw(W_val, x_t, y_t)

    # Create the model
    x = ad.Variable(name='x')
    W = ad.Variable(name='W')
    y = ad.sigmoid_op(ad.matmul_op(x, W))

    # Define loss
    y_ = ad.Variable(name='y_')
    cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op(
        y_ * ad.log_op(y) +
        (1 - y_) * ad.log_op(1 - y), reduction_indices=[1]))

    # Update rule
    learning_rate = 0.5
    W_grad, = ad.gradients(cross_entropy, [W])
    W_train_step = W - learning_rate * W_grad

    # Training
    executor = ad.Executor([cross_entropy, y, W_train_step])
    steps = 200

    plt.ion()

    for i in range(steps):
        plt.cla()

        loss_val, y_val, W_val = executor.run(feed_dict={
            x: x_t,
            y_: y_t,
            W: W_val,
        })

        print("Step {}: loss: {}".format(i + 1, loss_val))

        # draw trained decision superplane
        draw(W_val, x_t, y_t)

        plt.pause(0.1)

    plt.ioff()
    plt.show()
Beispiel #2
0
def test_exp():
    x1 = ad.Variable(name = "x1")
    y = 1 + 2 * ad.exp_op(ad.log_op(x1))

    x1_val = np.ones((2, 1))
    grad_y, = ad.gradients(y, [x1])
    executor = ad.Executor([y, grad_y])
    y_val, grad_y_val, = executor.run(feed_dict = {x1: x1_val})
    assert np.array_equal(y_val, 3 * np.ones_like(y_val))
    assert np.array_equal(grad_y_val, 2 * np.ones_like(grad_y_val))
Beispiel #3
0
def softmax_ce_loss(preds, truth):
    """
    calculate the softmax and xent loss in a more efficient way
    :param preds: the pred is the output of the model 
    :param truth: the true label, a one-hot vector
    :return: the loss
    """
    pred_max = ad.max_op(preds)
    preds_shift = ad.add_byscalar_op(ad.neg_op(pred_max), x)
    exps = ad.exp_op(preds_shift)
    return minus_op(ad.log_op(ad.sum_op(exps)), ad.sum_op(ad.mul_op(preds_shift, truth)))
Beispiel #4
0
def test_log():
    x1 = ad.Variable(name = "x1")
    x2 = ad.Variable(name = "x2")
    y = ad.log_op(x1) / x2
    grad_y, = ad.gradients(y, [x1])

    x1_val = 2 * np.ones((2, 1))
    x2_val = np.ones((1,1))
    executor = ad.Executor([y, grad_y])

    y_val, grad_y_val, = executor.run(feed_dict = {x1: x1_val, x2:x2_val})
    assert np.array_equal(y_val, np.log(x1_val))
    assert np.array_equal(grad_y_val, 0.5 * np.ones_like(grad_y_val))
Beispiel #5
0
def test_log():  # P
    x2 = ad.Variable(name="x2")
    y = ad.log_op(x2)

    grad_x2, = ad.gradients(y, [x2])

    executor = ad.Executor([y, grad_x2])
    x2_val = np.linspace(0.1, 2, 5)
    y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val})

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, np.log(x2_val))
    assert np.array_equal(grad_x2_val, 1 / x2_val)
Beispiel #6
0
    def fit(self, X, Y):
        x = ad.Variable(name='x')
        w = ad.Variable(name='w')
        y = ad.Variable(name='y')

        p = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x)))

        # cross entropy
        loss = 0 - y * ad.log_op(p) - (1 - y) * ad.log_op(1 - p)

        grad_w, = ad.gradients(loss, [w])

        # SGD
        length = np.shape(X)[0]
        self.num_feature = np.shape(X)[1]
        executor = ad.Executor([loss, grad_w])
        self.coef_ = np.random.rand(1, self.num_feature) / 1000.0
        for i in range(self.maxiter):
            grad = np.zeros((1, self.num_feature))
            loss = 0
            for j in range(self.batch):
                t = random.choice(range(length))
                x_val = X[t].reshape((self.num_feature, 1))
                if Y[t] == self.labels[0]:
                    y_val = 0
                else:
                    y_val = 1
                loss_val, grad_w_val = executor.run(feed_dict={
                    x: x_val,
                    w: self.coef_,
                    y: y_val
                })
                grad = grad + grad_w_val
                loss = loss + loss_val
            self.coef_ = self.coef_ - self.learning_rate * grad / self.batch
            if i % 100 == 0:
                print(loss)
def get_model_params(x_train, y_train, class_1, class_2):
    '''returns the weights after performing gradient descdent'''
    learning_rate = 0.01
    batch_size = 8

    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    logistic_regression = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x)))
    cross_entropy = -1 * y * ad.log_op(logistic_regression) - (1 - y) * ad.log_op(1 - logistic_regression)

    gradients = ad.gradients(cross_entropy, [w])[0]
    executor = ad.Executor([cross_entropy, gradients])
    weights = np.random.rand(1, np.shape(x_train)[1]) / 1000.0

    #batch = 0
    #previous_loss = 0
    for i in range(5000):
        grad = np.zeros((1, np.shape(x_train)[1]))
        loss = 0

        #go ramdomly over examples in each batch
        for _ in range(batch_size):
            t = random.choice(range(np.shape(x_train)[0]))
            x_flat = x_train[t].reshape((np.shape(x_train)[1], 1))
            y_label = 0 if y_train[t] == class_1 else 1

            loss_delta, grad_delta = executor.run(feed_dict={x : x_flat, w : weights, y : y_label})
            grad += grad_delta
            loss += loss_delta
        weights = weights - (learning_rate * grad / batch_size)
        if i % 1000 == 0:
            print("loss = {:.3f} loss_delta = {:.3f}".format(loss[0][0], loss_delta[0][0]))

    return weights
Beispiel #8
0
def test_log():
    x2 = ad.Variable(name="x2")
    y = ad.log_op(x2)

    grad_x2, = ad.gradients(y, [x2])

    executor = ad.Executor([y, grad_x2])

    x2_val = 2 * np.ones(3)
    y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val})

    epsilon = 1e-6
    zero_arr = np.zeros(3) + epsilon
    assert isinstance(y, ad.Node)
    assert np.all(np.less_equal(np.abs(y_val - np.log(x2_val)), zero_arr))
    assert np.all(np.less_equal(np.abs(1 / x2_val - grad_x2_val), zero_arr))
Beispiel #9
0
def test():
    x1 = ad.Variable(name="x1")
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")

    y = (ad.sin_op(x1 + 1) + ad.cos_op(2 * x2)) * ad.tan_op(ad.log_op(x3)) + (
        ad.sin_op(x2 + 1)) + ad.cos_op(2 * x1) * ad.exp_op(1 + ad.sin_op(x3))
    grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3])

    executor = ad.Executor([y, grad_x1, grad_x2, grad_x3])
    x1_val = 1 * np.ones(1)
    x2_val = 2 * np.ones(1)
    x3_val = 3 * np.ones(1)
    y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={
        x1: x1_val,
        x2: x2_val,
        x3: x3_val
    })

    print('x1=', x1_val[0])
    print('x2=', x2_val[0])
    print('x3=', x3_val[0])
    print('---------------------------------------------------------------')

    print('y0_val=', y_val[0])
    print('grad_x1_val= ', grad_x1_val[0])
    print('grad_x2_val= ', grad_x2_val[0])
    print('grad_x3_val= ', grad_x3_val[0])
    print('---------------------------------------------------------------')
    y_numerical, grad_numerical = numerical_diff(f, [x1_val, x2_val, x3_val],
                                                 1e-10)
    print('y0_numerical= ', y_numerical)
    grad_numerical_x1, grad_numerical_x2, grad_numerical_x3 = grad_numerical[
        0], grad_numerical[1], grad_numerical[2]
    print('grad_numerical_x1 =', grad_numerical_x1)
    print('grad_numerical_x2 =', grad_numerical_x2)
    print('grad_numerical_x3 =', grad_numerical_x3)
    print('---------------------------------------------------------------')
    print('gradients Offset:')
    print('x1:', abs(grad_x1_val - grad_numerical_x1))
    assert abs(grad_x1_val - grad_numerical_x1) < 1e-5
    print('x2:', abs(grad_x2_val - grad_numerical_x2))
    assert abs(grad_x2_val - grad_numerical_x2) < 1e-5
    print('x3:', abs(grad_x3_val - grad_numerical_x3))
    assert abs(grad_x3_val - grad_numerical_x3) < 1e-5
Beispiel #10
0
def cross_entropy(output, labels):
    loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(output) +
                                   (1.0 - labels) * ad.log_op(1.0 - output),
                                   axis=1)
    return loss
Beispiel #11
0
import numpy as np
import autodiff as ad



x = ad.Variable(name = "x")
w = ad.Variable(name = "w")
b = ad.Variable(name = "b")
labels = ad.Variable(name = "lables")


# Define Computation graph

p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x))))

loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(p) + (1.0 - labels) * ad.log_op(1.0 - p), axis = 1)

grad_y_w, = ad.gradients(loss, [w])



num_features = 2
num_points = 500
num_iterations = 1000
learning_rate = 0.001

# The dummy dataset consists of two classes.
# The classes are modelled as a random normal variables with different means.

class_1 = np.random.normal(2, 0.1, (num_points / 2, num_features))
class_2 = np.random.normal(4, 0.1, (num_points / 2, num_features))
import numpy as np
import autodiff as ad

x = ad.Variable(name="x")
w = ad.Variable(name="w")
b = ad.Variable(name="b")
labels = ad.Variable(name="lables")

# Define Computation graph

p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x))))

loss = -1.0 * ad.reduce_sum_op(
    labels * ad.log_op(p) + (1.0 - labels) * ad.log_op(1.0 - p), axis=1)

grad_y_w, = ad.gradients(loss, [w])

num_features = 2
num_points = 200
num_iterations = 1000
learning_rate = 0.01

# The dummy dataset consists of two classes.
# The classes are modelled as a random normal variables with different means.

class_1 = np.random.normal(2, 0.1, (num_points / 2, num_features))
class_2 = np.random.normal(4, 0.1, (num_points / 2, num_features))
x_val = np.concatenate((class_1, class_2), axis=0).T

x_val = np.concatenate((x_val, np.ones((1, num_points))), axis=0)
w_val = np.random.normal(size=(1, num_features + 1))
Beispiel #13
0
import autodiff as ad
import numpy as np

w = ad.Variable(name="w")
x = ad.Variable(name="x")
b = ad.Variable(name="b")

labels = ad.Variable(name="lables")

out = 1.0 / (1.0 + ad.exp_op((-1.0 * (w * x + b))))

ce_loss = -1.0 * ((labels * ad.log_op(out)) +
                  ((1.0 - labels) * ad.log_op(1.0 - out)))

grad_w, grad_b = ad.gradients(ce_loss, [w, b])

# weights our model initially starts at
w_val = 10
b_val = 1

# weights our model should reach to
w_required = 5
b_required = 20

# we are simulating the training dataset for logistic regression

# taking x as a continuous array from -10 to 10 with step size of 0.01
x_val = np.arange(-10, 6, 0.01)

# finding the labels by doing the exact calculation of logistic regression using numpy
labels_val = 1 / (1 + np.exp(-(w_required * x_val + b_required)))
import autodiff as ad
import numpy as np

# construct the computation graph
x = ad.Variable(name="x")
w = ad.Variable(name="w")
y_ = ad.Variable(name="lables")

prob = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x))))
loss = -1.0 * ad.reduce_sum_op(
    y_ * ad.log_op(prob) + (1.0 - y_) * ad.log_op(1.0 - prob), axis=1)

# pay attention that there is a ','
grad_w, = ad.gradients(loss, [w])

# Data
data1 = np.random.normal(1, 0.1, size=(100, 10))
data2 = np.random.normal(5, 0.4, size=(200, 10))
data = np.concatenate((data1, data2), axis=0).T
x_val = np.concatenate((data, np.ones((1, 300))), axis=0)
y_val = np.concatenate((np.zeros(
    (data1.shape[0], 1)), np.ones((data2.shape[0], 1))),
                       axis=0).T
# Variables
w_val = np.random.normal(size=(1, 11))

# Params
learning_rate = 0.0001

# Execute
executor = ad.Executor([loss, grad_w])