Exemple #1
0
def test_lr_grad():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    wshape = (500, 250)
    weights = random.rand(*wshape) - 0.5

    xshape = (256, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1

    gradient_checker.quick_grad_check(training_loss, inputs)
Exemple #2
0
def test_lr_grad():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    wshape = (500, 250)
    weights = random.rand(*wshape) - 0.5

    xshape = (256, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1

    gradient_checker.quick_grad_check(training_loss, inputs)
Exemple #3
0
def main():
    x = random.rand(3, 3)
    y = random.rand(3, 3)
    print('x: {}'.format(x.asnumpy()))
    print('y: {}'.format(y.asnumpy()))
    g = core.grad(f, argnum=[0, 1])
    gr = g(x, y)
    print('grad_x: {}'.format(gr[0].asnumpy()))
    print('grad_y: {}'.format(gr[1].asnumpy()))
Exemple #4
0
def test_context():
    set_context(gpu(1)) # set the global context as gpu(1)
    
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    with gpu(0):
        xshape = (256, 500)
        wshape = (500, 250)
        tshape = (256, 250)
        inputs = random.rand(*xshape) - 0.5
        targets = np.zeros(tshape)
        truth = random.randint(0, 250, 256)
        targets[np.arange(256), truth] = 1
        weights = random.rand(*wshape) - 0.5
    
        training_gradient_fun = grad(training_loss)
    
        for i in range(20):
            print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
            gr = training_gradient_fun(weights, inputs)
            weights -= gr * 0.01
        print("\nff and bp on {0}".format(weights.context))
    
    print("\nexecute on cpu")
    with cpu():
        x_cpu = random.rand(32, 64) - 0.5
        y_cpu = random.rand(64, 32) - 0.5
        z_cpu = np.dot(x_cpu, y_cpu)
        print('z_cpu.context = {0}'.format(z_cpu.context))
    
    print("\nexecute on gpu(0)")
    with gpu(0):
        x_gpu0 = random.rand(32, 64) - 0.5
        y_gpu0 = random.rand(64, 32) - 0.5
        z_gpu0 = np.dot(x_gpu0, y_gpu0)
        z_gpu0.asnumpy()
        print('z_gpu0.context = {0}'.format(z_gpu0.context))
    
    print("\n[use global context] execute on gpu(1)")
    x_gpu1 = random.rand(32, 64) - 0.5
    y_gpu1 = random.rand(64, 32) - 0.5
    z_gpu1 = np.dot(x_gpu1, y_gpu1)
    z_gpu1.asnumpy()
    print('z_gpu1.context = {0}'.format(z_gpu1.context))
def test_context():
    #set_context(gpu(1)) # set the global context as gpu(1)

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    """
    with gpu(0):
        xshape = (256, 500)
        wshape = (500, 250)
        tshape = (256, 250)
        inputs = random.rand(*xshape) - 0.5
        targets = np.zeros(tshape)
        truth = random.randint(0, 250, 256)
        targets[np.arange(256), truth] = 1
        weights = random.rand(*wshape) - 0.5
    
        training_gradient_fun = grad(training_loss)
    
        for i in range(20):
            print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
            gr = training_gradient_fun(weights, inputs)
            weights -= gr * 0.01
        print("\nff and bp on {0}".format(weights.context))
    """

    print("\nexecute on cpu")
    with cpu():
        x_cpu = random.rand(32, 64) - 0.5
        y_cpu = random.rand(64, 32) - 0.5
        z_cpu = np.dot(x_cpu, y_cpu)
        print('z_cpu.context = {0}'.format(z_cpu.context))
    """
def dropout_forward(x, dropout_param):
  """
  Performs the forward pass for (inverted) dropout.

  Inputs:
  - x: Input data, of any shape
  - dropout_param: A dictionary with the following keys:
    - p: Dropout parameter. We drop each neuron output with probability p.
    - mode: 'test' or 'train'. If the mode is train, then perform dropout;
      if the mode is test, then just return the input.
    - seed: Seed for the random number generator. Passing seed makes this
      function deterministic, which is needed for gradient checking but not in
      real networks.

  Outputs:
  - out: Array of the same shape as x.
  - cache: A tuple (dropout_param, mask). In training mode, mask is the dropout
    mask that was used to multiply the input; in test mode, mask is None.
  """
  p, mode = dropout_param['p'], dropout_param['mode']
  if 'seed' in dropout_param:
    random.seed(dropout_param['seed'])

  mask = None
  out = None

  if mode == 'train':
    #TODO: check implementation of compare operator in mxnet? 
    mask = random.rand(*x.shape) > p
    out = x * mask #drop!
  else:
    out = x

  return out
Exemple #7
0
def dropout_forward(x, dropout_param):
    """
  Performs the forward pass for (inverted) dropout.

  Inputs:
  - x: Input data, of any shape
  - dropout_param: A dictionary with the following keys:
    - p: Dropout parameter. We drop each neuron output with probability p.
    - mode: 'test' or 'train'. If the mode is train, then perform dropout;
      if the mode is test, then just return the input.
    - seed: Seed for the random number generator. Passing seed makes this
      function deterministic, which is needed for gradient checking but not in
      real networks.

  Outputs:
  - out: Array of the same shape as x.
  - cache: A tuple (dropout_param, mask). In training mode, mask is the dropout
    mask that was used to multiply the input; in test mode, mask is None.
  """
    p, mode = dropout_param['p'], dropout_param['mode']
    if 'seed' in dropout_param:
        random.seed(dropout_param['seed'])

    mask = None
    out = None

    if mode == 'train':
        #TODO: check implementation of compare operator in mxnet?
        mask = random.rand(*x.shape) > p
        out = x * mask  #drop!
    else:
        out = x

    return out
Exemple #8
0
def dropout(x, prob, mode='train', seed=None):
    """
    Performs the forward pass for (inverted) dropout.

    Inputs:
    - x: Input data, of any shape
    - prob: Dropout parameter. We drop each neuron output with probability prob.
    - mode: 'test' or 'train'. If the mode is train, then perform dropout;
      if the mode is test, then just return the input.
    - seed: Seed for the random number generator. Passing seed makes this
      function deterministic, which is needed for gradient checking but not in
      real networks.

    Outputs:
    - out: Array of the same shape as x.
    """
    if seed is not None:
        random.seed(seed)
    mask = None
    out = None
    if mode == 'train':
        #TODO: check implementation of compare operator in mxnet?
        mask = random.rand(*x.shape) > prob
        out = x * mask  #drop!
    else:
        out = x * (1 - prob)
    return out
Exemple #9
0
def dropout(x, prob, mode='train', seed=None):
    """
    Performs the forward pass for (inverted) dropout.

    Inputs:
    - x: Input data, of any shape
    - prob: Dropout parameter. We drop each neuron output with probability prob.
    - mode: 'test' or 'train'. If the mode is train, then perform dropout;
      if the mode is test, then just return the input.
    - seed: Seed for the random number generator. Passing seed makes this
      function deterministic, which is needed for gradient checking but not in
      real networks.

    Outputs:
    - out: Array of the same shape as x.
    """
    if seed is not None:
        random.seed(seed)
    mask = None
    out = None
    if mode == 'train':
        #TODO: check implementation of compare operator in mxnet?
        mask = random.rand(*x.shape) > prob
        out = x * mask  #drop!
    else:
        out = x * (1 - prob)
    return out
Exemple #10
0
def test_op_statistics():

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    
    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))
    
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(
                preds, axis=1) - np.argmax(
                    targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    np.record_op_stat()
    
    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5
    
    training_gradient_fun = grad(training_loss)
    
    for i in range(30):
        print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights,
                                                                      inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01
    
    # Print Op Statistics Info
    np.show_op_stat()
Exemple #11
0
def test_logistic():

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    
    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))
    
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(
                preds, axis=1) - np.argmax(
                    targets, axis=1))
        return (256 - error) * 100 / 256.0

    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5
    
    training_gradient_fun = grad(training_loss)
    
    for i in range(200):
        print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights,
                                                                      inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01
Exemple #12
0
def test_logistic():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        return sigmoid(np.dot(inputs, weights))

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(
                preds, axis=1) - np.argmax(
                    targets, axis=1))
        return (256 - error) * 100 / 256.0

    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5

    training_gradient_fun = grad(training_loss)

    for i in range(200):
        print('Trained accuracy #{}: {}%'.format(
            i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01

    # The accuracy should be 100 in bug-free MinPy
    if (training_accuracy(weights, inputs) < 95):
        assert (False)
Exemple #13
0
def test_slice():
    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)

    def predict(weights, inputs):
        # Test Slice
        sliced_weights = weights[:, ::2]
        y = sigmoid(np.dot(inputs, sliced_weights))
        return y

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l

    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(
            np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0

    xshape = (256, 500)
    # wshape = (500, 250)
    wshape = (500, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5

    training_gradient_fun = grad(training_loss)

    for i in range(20):
        print('Trained loss accuracy #{}: {}%'.format(
            i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        print('Gradient Size', gr.shape)
        print('Gradient example', gr[0, :10].asnumpy())
        weights -= gr * 0.01
Exemple #14
0
def test_slice():

    def sigmoid(x):
        return 0.5 * (np.tanh(x / 2) + 1)
    
    def predict(weights, inputs):
        # Test Slice
        sliced_weights = weights[:, ::2]
        y = sigmoid(np.dot(inputs, sliced_weights))
        return y
    
    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        l = -np.sum(np.log(label_probabilities))
        return l
    
    def training_accuracy(weights, inputs):
        preds = predict(weights, inputs)
        error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
        return (256 - error) * 100 / 256.0
    
    xshape = (256, 500)
    # wshape = (500, 250)
    wshape = (500, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5
    
    training_gradient_fun = grad(training_loss)
    
    for i in range(20):
        print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        print('Gradient Size', gr.shape)
        print('Gradient example', gr[0,:10].asnumpy())
        weights -= gr * 0.01
def test_mxnet_logistic():
    def sigmoid(x):
        return np.multiply(0.5, np.add(np.tanh(x), 1))

    xshape = (256, 500)
    #needs to reverse. because of mxnet's setting
    wshape = (250, 500)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = np.random.rand(*wshape) - 0.5

    x = mx.sym.Variable(name='x')
    fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250)
    act = mx.sym.Activation(data=fc, act_type='sigmoid')

    f = core.Function(act, {'x': xshape})

    def predict(weights, inputs):
        #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu())
        return f(x=inputs, fc_weight=weights)

    def training_loss(weights, inputs):
        preds = predict(weights, inputs)
        label_probabilities = preds * targets + (1 - preds) * (1 - targets)
        return -np.sum(np.log(label_probabilities))

    training_gradient_fun = core.grad(training_loss)

    print('Initial loss: {}'.format(training_loss(weights, inputs)))
    for i in range(100):
        gr = training_gradient_fun(weights, inputs)
        #print('Training gradient: {}'.format(gr))
        weights -= gr * 0.1
        if i % 10 == 0:
            print('Trained loss: {}'.format(training_loss(weights, inputs)))

    # The training loss should be around 300 in a bug-free Minpy
    if (training_loss(weights, inputs)[0] > 600):
        assert (False)
Exemple #16
0
def training_loss(weights, bias, inputs):
    preds = predict(weights, bias, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    l = -np.sum(np.log(label_probabilities))
    return l

def training_accuracy(weights, bias, inputs):
    preds = predict(weights, bias, inputs)
    error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0

xshape = (256, 500)
wshape = (500, 250)
bshape = (250)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5
#bias = random.rand(bshape) - 0.5
#print bias.shape
bias = np.zeros(bshape)
print bias.shape

training_gradient_fun = grad(training_loss)

for i in range(20):
    print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, bias, inputs)))
    gr = training_gradient_fun(weights, bias, inputs)
    weights -= gr * 0.01
Exemple #17
0
from minpy import core
import minpy.numpy as np
import minpy.numpy.random as random
import mxnet as mx


def sigmoid(x):
    return np.multiply(0.5, np.add(np.tanh(x), 1))

#xshape = (256, 500)
xshape = (256, 1, 30, 30)
#needs to reverse. because of mixnet's setting
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1

fc_wshape = (250, 845)
fc_weight = random.rand(*fc_wshape) - 0.5

fc_bshape = (250, )
#fc_bias = np.zeros(*fc_bshape)
fc_bias = random.rand(*fc_bshape) * 0

conv_wshape = (5, 1, 5, 5)
conv_weight = random.rand(*conv_wshape) - 0.5

conv_bshape = (5, )
#conv_bias = np.zeros(*conv_bshape)
conv_bias = random.rand(*conv_bshape) * 0
Exemple #18
0
from minpy import core
import minpy.numpy as np
import minpy.numpy.random as random
import mxnet as mx


def sigmoid(x):
    return np.multiply(0.5, np.add(np.tanh(x), 1))


xshape = (256, 500)
#needs to reverse. because of mixnet's setting
wshape = (250, 500)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = np.random.rand(*wshape) - 0.5

x = mx.sym.Variable(name='x')
fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250)
act = mx.sym.Activation(data=fc, act_type='sigmoid')

f = core.Function(act, {'x': xshape})


def predict(weights, inputs):
    #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu())
    return f(x=inputs, fc_weight=weights)
Exemple #19
0
def training_loss(weights, inputs):
    preds = predict(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    l = -np.sum(np.log(label_probabilities))
    return l

def training_accuracy(weights, inputs):
    preds = predict(weights, inputs)
    error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0

xshape = (256, 500)
# wshape = (500, 250)
wshape = (500, 500)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5

training_gradient_fun = grad(training_loss)

def NumpyVarToMinpy(var):
  return minpy.array.Value.wrap(var)

def MinpyVarToNumpy(var):
  return minpy.array.Value.wrap(var).get_data(ArrayType.NUMPY)

for i in range(20):
    print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
Exemple #20
0
import minpy.numpy as np
import minpy.numpy.random as random
from minpy.context import set_context, gpu
import win_unicode_console
win_unicode_console.enable()
set_context(gpu(0))

a = random.rand(3, 1)
b = random.rand(5, 1)
print(type(a))
print(type(a.asnumpy()))
c = np.row_stack((a.asnumpy(), b.asnumpy()))
# c = np.concat((a, b), axis=0)
print(c)

# values = np.zeros((1, 10))
# print(values)
# n_values = np.max(values) + 1
# print(n_values)
# d = np.eye(int(n_values[0]))[values]
# print(d)

# nb_classes = 6
# targets = np.array([3, 2]).reshape(-1)
# d = np.eye(nb_classes)[targets]
# print(d.T)

# nb_classes = 6
# targets = np.array([3]).reshape(-1)
# d = np.eye(nb_classes)[targets]
# print(d.T)
Exemple #21
0
import minpy.numpy as np
import minpy.numpy.random as random
from minpy.context import cpu, gpu
import time

n = 1000

with cpu():
    x_cpu = random.rand(1024, 1024) - 0.5
    y_cpu = random.rand(1024, 1024) - 0.5

    # dry run
    for i in range(10):
        z_cpu = np.dot(x_cpu, y_cpu)
    z_cpu.asnumpy()

    # real run
    t0 = time.time()
    for i in range(n):
        z_cpu = np.dot(x_cpu, y_cpu)
    z_cpu.asnumpy()
    t1 = time.time()

with gpu(0):
    x_gpu0 = random.rand(1024, 1024) - 0.5
    y_gpu0 = random.rand(1024, 1024) - 0.5

    # dry run
    for i in range(10):
        z_gpu0 = np.dot(x_gpu0, y_gpu0)
    z_gpu0.asnumpy()
Exemple #22
0
    l = -np.sum(np.log(label_probabilities))
    return l


def training_accuracy(weights, inputs):
    preds = predict(weights, inputs)
    error = np.count_nonzero(
        np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0


with gpu(0):
    xshape = (256, 500)
    wshape = (500, 250)
    tshape = (256, 250)
    inputs = random.rand(*xshape) - 0.5
    targets = np.zeros(tshape)
    truth = random.randint(0, 250, 256)
    targets[np.arange(256), truth] = 1
    weights = random.rand(*wshape) - 0.5

    training_gradient_fun = grad(training_loss)

    for i in range(20):
        print('Trained loss accuracy #{}: {}%'.format(
            i, training_accuracy(weights, inputs)))
        gr = training_gradient_fun(weights, inputs)
        weights -= gr * 0.01
    print("\nff and bp on {0}".format(weights.context))

print("\nexecute on cpu")