예제 #1
0
def test_logistic_regression_2():
    x = ad.Variable(name = "input")  #n*1
    w = ad.Variable(name = "weight") #n*1
    b = ad.Variable(name = "bias")   #1
    logits = ad.matmul_op(x, w) + b
    y = ad.softmax_with_cross_entropy_op(logits)

    x_val = np.array([[2, 2, 2]])
    w_val = np.array([[1, 1, 1], [2, 2, 2]]).transpose()
    b_val = 5 * np.ones(2)
    # print(x_val, w_val, b_val, np.matmul(x_val, w_val) + b_val)

    grad_x, grad_w, grad_b = ad.gradients(y, [x, w, b])
    executor = ad.Executor([y, grad_x, grad_w, grad_b])
    y_val, grad_x_val, grad_w_val, grad_b_val = \
        executor.run(feed_dict = {x: x_val, w: w_val, b: b_val})

    x_row_max = (np.dot(x_val, w_val) + b_val).max(axis=-1)
    x_row_max = x_row_max.reshape(list((np.matmul(x_val, w_val) + b_val).shape)[:-1]+[1])
    e_x = np.exp((np.matmul(x_val, w_val) + b_val) - x_row_max)
    expected_yval = e_x / e_x.sum(axis=-1).reshape(list((np.matmul(x_val, w_val) + b_val).shape)[:-1]+[1])
    y_base = np.ones_like(expected_yval)
    expected_grad_x_val = np.matmul(y_base*(expected_yval - 1), w_val.transpose())
    expected_grad_w_val = np.matmul(x_val.transpose(), y_base*(expected_yval - 1))
    expected_grad_b_val = y_base*(expected_yval - 1)
    # print(grad_b_val)
    # print(expected_grad_b_val)
    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_yval)
    assert np.array_equal(grad_x_val, expected_grad_x_val)
    assert np.array_equal(grad_w_val, expected_grad_w_val)
    assert np.array_equal(grad_b_val, expected_grad_b_val)
예제 #2
0
def test_matmul_two_vars():
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")
    y = ad.matmul_op(x2, x3)

    grad_x2, grad_x3 = ad.gradients(y, [x2, x3])

    executor = ad.Executor([y, grad_x2, grad_x3])
    x2_val = np.array([[1, 2], [3, 4], [5, 6]])  # 3x2
    x3_val = np.array([[7, 8, 9], [10, 11, 12]])  # 2x3

    y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={
        x2: x2_val,
        x3: x3_val
    })

    expected_yval = np.matmul(x2_val, x3_val)
    expected_grad_x2_val = np.matmul(
        np.ones_like(expected_yval), np.transpose(x3_val))
    expected_grad_x3_val = np.matmul(
        np.transpose(x2_val), np.ones_like(expected_yval))

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_yval)
    assert np.array_equal(grad_x2_val, expected_grad_x2_val)
    assert np.array_equal(grad_x3_val, expected_grad_x3_val)
예제 #3
0
def test_matmul_mix_add_1():
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")
    x4 = ad.Variable(name="x4")
    y = ad.matmul_op(x2, x3)
    z = 2 * (y + x4)

    grad_x2, grad_x3, grad_x4 = ad.gradients(z, [x2, x3, x4])

    executor = ad.Executor([z, grad_x2, grad_x3, grad_x4])
    x2_val = np.array([[1, 2], [3, 4], [5, 6]])  # 3x2
    x3_val = np.array([[7, 8, 9], [10, 11, 12]])  # 2x3
    x4_val = np.array(np.array(list(range(9))).reshape((3, 3)))

    z_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict={
        x2: x2_val,
        x3: x3_val,
        x4: x4_val
    })

    expected_zval = 2 * (np.matmul(x2_val, x3_val) + x4_val)

    expected_grad_x2_val = 2 * np.matmul(np.ones_like(expected_zval),
                                         np.transpose(x3_val))
    expected_grad_x3_val = 2 * np.matmul(np.transpose(x2_val),
                                         np.ones_like(expected_zval))
    expected_grad_x4_val = 2 * np.ones((3, 3))

    assert isinstance(z, ad.Node)
    assert np.array_equal(z_val, expected_zval)
    assert np.array_equal(grad_x2_val, expected_grad_x2_val)
    assert np.array_equal(grad_x3_val, expected_grad_x3_val)
    assert np.array_equal(grad_x4_val, expected_grad_x4_val)
예제 #4
0
def main():
    # Generate dataset and initial weight
    x_t, y_t = generate_dataset(1, 1, -5, point_num=100)

    # add extra dim to build homogenous coordinates
    x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1)
    W_val = np.random.rand(3, 1)

    # draw initial decision superplane
    draw(W_val, x_t, y_t)

    # Create the model
    x = ad.Variable(name='x')
    W = ad.Variable(name='W')
    y = ad.sigmoid_op(ad.matmul_op(x, W))

    # Define loss
    y_ = ad.Variable(name='y_')
    cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op(
        y_ * ad.log_op(y) +
        (1 - y_) * ad.log_op(1 - y), reduction_indices=[1]))

    # Update rule
    learning_rate = 0.5
    W_grad, = ad.gradients(cross_entropy, [W])
    W_train_step = W - learning_rate * W_grad

    # Training
    executor = ad.Executor([cross_entropy, y, W_train_step])
    steps = 200

    plt.ion()

    for i in range(steps):
        plt.cla()

        loss_val, y_val, W_val = executor.run(feed_dict={
            x: x_t,
            y_: y_t,
            W: W_val,
        })

        print("Step {}: loss: {}".format(i + 1, loss_val))

        # draw trained decision superplane
        draw(W_val, x_t, y_t)

        plt.pause(0.1)

    plt.ioff()
    plt.show()
예제 #5
0
    def fit(self, X, Y):
        x = ad.Variable(name='x')
        w = ad.Variable(name='w')
        y = ad.Variable(name='y')

        p = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x)))

        # cross entropy
        loss = 0 - y * ad.log_op(p) - (1 - y) * ad.log_op(1 - p)

        grad_w, = ad.gradients(loss, [w])

        # SGD
        length = np.shape(X)[0]
        self.num_feature = np.shape(X)[1]
        executor = ad.Executor([loss, grad_w])
        self.coef_ = np.random.rand(1, self.num_feature) / 1000.0
        for i in range(self.maxiter):
            grad = np.zeros((1, self.num_feature))
            loss = 0
            for j in range(self.batch):
                t = random.choice(range(length))
                x_val = X[t].reshape((self.num_feature, 1))
                if Y[t] == self.labels[0]:
                    y_val = 0
                else:
                    y_val = 1
                loss_val, grad_w_val = executor.run(feed_dict={
                    x: x_val,
                    w: self.coef_,
                    y: y_val
                })
                grad = grad + grad_w_val
                loss = loss + loss_val
            self.coef_ = self.coef_ - self.learning_rate * grad / self.batch
            if i % 100 == 0:
                print(loss)
def linear_regression():
    x = ad.Variable(name='x')
    y = ad.Variable(name='y')
    W = ad.Variable(name='W')
    output = ad.matmul_op(x, W)
    # loss function
    cost = 0.5 * ad.reduce_sum_op((y - output) * (y - output), axis=0)
    # cost = 0.5 * ad.matmul_op((y - output), (y - output), True, False)
    # gradient
    grad_cost_w, = ad.gradients(cost, [W])
    # construct data set
    # y = x
    num_point = 10
    x_data = np.array(range(num_point)).reshape((num_point, 1))
    y_data = x_data + np.random.uniform(-0.1, 0.1, (num_point, 1))
    x_data = np.concatenate([x_data, np.ones((num_point, 1))], axis=1)
    # initialize the parameters
    w_val = np.array([[0.5], [0.1]])
    excutor = ad.Executor([cost, grad_cost_w])
    # train
    n_epoch = 1000
    lr = 0.001
    cost_list = []
    print "training..."
    for i in range(n_epoch):
        # evaluate the graph
        cost_val, grad_cost_w_val = excutor.run(feed_dict={
            x: x_data,
            W: w_val,
            y: y_data
        })
        # update the parameters using GD
        print "cost: ", cost_val
        print "grad: ", grad_cost_w_val
        w_val = w_val - lr * grad_cost_w_val
        print "weight: ", w_val
        cost_list.append(cost_val)
def get_model_params(x_train, y_train, class_1, class_2):
    '''returns the weights after performing gradient descdent'''
    learning_rate = 0.01
    batch_size = 8

    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    logistic_regression = 1 / (1 + ad.exp_op(0 - ad.matmul_op(w, x)))
    cross_entropy = -1 * y * ad.log_op(logistic_regression) - (1 - y) * ad.log_op(1 - logistic_regression)

    gradients = ad.gradients(cross_entropy, [w])[0]
    executor = ad.Executor([cross_entropy, gradients])
    weights = np.random.rand(1, np.shape(x_train)[1]) / 1000.0

    #batch = 0
    #previous_loss = 0
    for i in range(5000):
        grad = np.zeros((1, np.shape(x_train)[1]))
        loss = 0

        #go ramdomly over examples in each batch
        for _ in range(batch_size):
            t = random.choice(range(np.shape(x_train)[0]))
            x_flat = x_train[t].reshape((np.shape(x_train)[1], 1))
            y_label = 0 if y_train[t] == class_1 else 1

            loss_delta, grad_delta = executor.run(feed_dict={x : x_flat, w : weights, y : y_label})
            grad += grad_delta
            loss += loss_delta
        weights = weights - (learning_rate * grad / batch_size)
        if i % 1000 == 0:
            print("loss = {:.3f} loss_delta = {:.3f}".format(loss[0][0], loss_delta[0][0]))

    return weights
예제 #8
0
def test_msr():
    x = ad.Variable(name="x")
    y = ad.Variable(name="y")
    z = x * y
    l = ad.reduce_sum_op((x - z) * (x - z), axis=0)
    # c = 2*x
    c = ad.matmul_op(x - z, x - z, True, False)

    x_val = np.ones((10, 1))
    y_val = np.ones((10, 1)) * 2
    grad_x1, grad_y1 = ad.gradients(l, [x, y])
    grad_x2, grad_y2 = ad.gradients(c, [x, y])
    excutor = ad.Executor([l, c, grad_x1, grad_y1, grad_x2, grad_y2])
    # excutor = ad.Executor([l, grad_x1, grad_y1, d])
    loss, cost, grad_x1_val, grad_y1_val, grad_x2_val, grad_y2_val = excutor.run(
        feed_dict={
            x: x_val,
            y: y_val
        })
    # loss, grad_x1_val, grad_y1_val, d_val = excutor.run(feed_dict={x: x_val, y: y_val, z: z_val})
    print loss
    print cost
    print "gx1: %s, gy1: %s" % (str(grad_x1_val), str(grad_y1_val))
    print "gx2: %s, gy2: %s" % (str(grad_x2_val), str(grad_y2_val))
예제 #9
0
w = ad.Variable(name="w")
b = ad.Variable(name="b")
labels = ad.Variable(name="lables")

# Computation graph


def cross_entropy(output, labels):
    loss = -1.0 * ad.reduce_sum_op(labels * ad.log_op(output) +
                                   (1.0 - labels) * ad.log_op(1.0 - output),
                                   axis=1)
    return loss


# Output of the hypothesis of logistic regression
p = 1.0 / (1.0 + ad.exp_op((-1.0 * ad.matmul_op(w, x))))
# Loss node
loss = cross_entropy(p, labels)
# Gradient node of loss corresponding to w
grad_y_w, = ad.gradients(loss, [w])

num_features = 2
num_points = 200
num_iterations = 1000
learning_rate = 0.01

# The dummy dataset consists of two classes.
# The classes are modelled as a random normal variables with different means.

class_1 = np.random.normal(2, 0.1, (int(num_points / 2), num_features))
class_2 = np.random.normal(4, 0.1, (int(num_points / 2), num_features))
예제 #10
0
# const
data_size = 50
batch_size = 8
INPUT_NODE = 2
LAYER1_NODE = 10
OUTPUT_NODE = 1

# inputs
x = ad.Variable(name='x')
y_ = ad.Variable(name='y_')
# parameters
w1 = ad.Variable(name='w1')
b1 = ad.Variable(name='b1')
# forward
y = ad.matmul_op(x, w1) + b1

# loss
MSE = ad.pow_op((y - y_), 2)
# backward
grad_w1, grad_b1 = ad.gradients(MSE, [w1, b1])
executor = ad.Executor([MSE, grad_w1, grad_b1])

# fake data
rmd = np.random.RandomState(1)
X = np.linspace(0, 5, data_size)
X = np.array([[i] for i in X])
Y = X * 2
plt.scatter(X, Y)
# plt.show()
예제 #11
0
data_size = 100
batch_size = 8
INPUT_NODE = 2
LAYER1_NODE = 10
OUTPUT_NODE = 1

# inputs
x = ad.Variable(name='x')
y_ = ad.Variable(name='y_')
# parameters
w1 = ad.Variable(name='w1')
w2 = ad.Variable(name='w2')
b1 = ad.Variable(name='b1')
b2 = ad.Variable(name='b2')
# forward
a = ad.matmul_op(x, w1) + b1
a = ad.relu_op(a)
y = ad.matmul_op(a, w2) + b2
y = ad.relu_op(y)
# loss
cross_entropy = ad.pow_op((y - y_), 2)
# backward
grad_w1, grad_w2, grad_b1, grad_b2 = \
    ad.gradients(cross_entropy, [w1, w2, b1, b2])
executor = ad.Executor([cross_entropy, grad_w1, grad_w2, grad_b1, grad_b2])

# fake data
rmd = np.random.RandomState(1)
X = rmd.rand(data_size, 2)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]
for i in range(data_size):
예제 #12
0
    ix = range(N * j, N * (j + 1))
    r = np.linspace(0.0, 1, N)
    t = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2  # theta
    X_data[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
    y_data[ix] = j

y_one_hot = np.zeros((N * K, K))
y_one_hot[range(N * K), y_data] = 1

######################################
x = ad.Variable(name="x")
y = ad.Variable(name='y')
w = ad.Variable(name="w")
b = ad.Variable(name="b")

z = ad.matmul_op(x, w)
softmax = z + ad.broadcastto_op(b, z)

loss = ad.softmax_crossentropy_op(softmax, y)
grad_w, grad_b = ad.gradients(loss, [w, b])
executor = ad.Executor([loss, grad_w, grad_b])

w_val = np.zeros((D, K))
b_val = np.zeros(K)

n_epoch = 1000
lr = 0.01

for i in range(n_epoch):
    loss_val, grad_w_val, grad_b_val = executor.run(feed_dict={x: X_data, w: w_val, y: y_one_hot, b: b_val})
    if i % 10 == 0:
예제 #13
0
import numpy as np
import autodiff as ad

x = ad.Variable(name='x')
y = ad.Variable(name='y')
W = ad.Variable(name='W')
b = ad.Variable(name='b')
z = ad.matmul_op(x, W)
output = z + ad.broadcastto_op(b, z)

num_point = 1000

cost = ad.reduce_sum_op((y - output) * (y - output)) / (2.0 * num_point)
grad_cost_w, grad_b = ad.gradients(cost, [W, b])

x_data = np.linspace(0, 10, num_point).reshape((num_point, 1))
y_data = 2.0 * x_data + np.random.uniform(-0.2, 0.2,
                                          (num_point, 1)) + 5.0 * np.ones(
                                              (num_point, 1))

w_val = np.zeros((1, 1))
b_val = np.zeros(1)
executor = ad.Executor([cost, grad_cost_w, grad_b])
# train
n_epoch = 2000
lr = 0.01

print("training...")

for i in range(n_epoch):
    # evaluate the graph
예제 #14
0
def get_logistic_model(x, weight, bias):
    y = 1 / (1 + ad.exp_op(-1 *
                           (ad.matmul_op(x, weight, trans_B=True) + bias)))
    #y = 1 / (1+ad.exp_op(-1 * (ad.mul_op(x, weight)+bias)))
    return y
예제 #15
0
LR = 0.0001
EPOCH = 500

x_val = np.linspace(-5, 5, 50)
y_val = x_val * x_val + np.random.rand(50) * 0.5

x = ad.Variable(name='x')
y_ = ad.Variable(name='y_')
w1 = ad.Variable(name='w1', init_val=np.random.rand(1, 10))
b1 = ad.Variable(name='b1', init_val=np.random.rand(1, 10))
w2 = ad.Variable(name='w2', init_val=np.random.rand(10, 10))
b2 = ad.Variable(name='b2', init_val=np.random.rand(1, 10))
w3 = ad.Variable(name='w3', init_val=np.random.rand(10, 1))
b3 = ad.Variable(name='b3', init_val=np.random.rand(1, 1))

fc1 = ad.relu(ad.matmul_op(x, w1) + b1)
fc2 = ad.relu(ad.matmul_op(fc1, w2) + b2)
y = ad.matmul_op(fc2, w3) + b3

loss = (y_ - y) * (y_ - y)

w1_grad, b1_grad, w2_grad, b2_grad, w3_grad, b3_grad = ad.gradients(
    loss, [w1, b1, w2, b2, w3, b3])

executor = ad.Executor(
    [loss, w1_grad, b1_grad, w2_grad, b2_grad, w3_grad, b3_grad])

for epoch in range(EPOCH):
    for i in range(x_val.shape[0]):
        loss_val, w1_grad_val, b1_grad_val, w2_grad_val, b2_grad_val, w3_grad_val, b3_grad_val = executor.run(
            feed_dict={