def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def main(): x = random.rand(3, 3) y = random.rand(3, 3) print('x: {}'.format(x.asnumpy())) print('y: {}'.format(y.asnumpy())) g = core.grad(f, argnum=[0, 1]) gr = g(x, y) print('grad_x: {}'.format(gr[0].asnumpy())) print('grad_y: {}'.format(gr[1].asnumpy()))
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))
def test_context(): #set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 """ with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) """ print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) """
def dropout_forward(x, dropout_param): """ Performs the forward pass for (inverted) dropout. Inputs: - x: Input data, of any shape - dropout_param: A dictionary with the following keys: - p: Dropout parameter. We drop each neuron output with probability p. - mode: 'test' or 'train'. If the mode is train, then perform dropout; if the mode is test, then just return the input. - seed: Seed for the random number generator. Passing seed makes this function deterministic, which is needed for gradient checking but not in real networks. Outputs: - out: Array of the same shape as x. - cache: A tuple (dropout_param, mask). In training mode, mask is the dropout mask that was used to multiply the input; in test mode, mask is None. """ p, mode = dropout_param['p'], dropout_param['mode'] if 'seed' in dropout_param: random.seed(dropout_param['seed']) mask = None out = None if mode == 'train': #TODO: check implementation of compare operator in mxnet? mask = random.rand(*x.shape) > p out = x * mask #drop! else: out = x return out
def dropout(x, prob, mode='train', seed=None): """ Performs the forward pass for (inverted) dropout. Inputs: - x: Input data, of any shape - prob: Dropout parameter. We drop each neuron output with probability prob. - mode: 'test' or 'train'. If the mode is train, then perform dropout; if the mode is test, then just return the input. - seed: Seed for the random number generator. Passing seed makes this function deterministic, which is needed for gradient checking but not in real networks. Outputs: - out: Array of the same shape as x. """ if seed is not None: random.seed(seed) mask = None out = None if mode == 'train': #TODO: check implementation of compare operator in mxnet? mask = random.rand(*x.shape) > prob out = x * mask #drop! else: out = x * (1 - prob) return out
def test_op_statistics(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 np.record_op_stat() xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(30): print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 # Print Op Statistics Info np.show_op_stat()
def test_logistic(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(200): print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01
def test_logistic(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(200): print('Trained accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 # The accuracy should be 100 in bug-free MinPy if (training_accuracy(weights, inputs) < 95): assert (False)
def test_slice(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): # Test Slice sliced_weights = weights[:, ::2] y = sigmoid(np.dot(inputs, sliced_weights)) return y def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) print('Gradient Size', gr.shape) print('Gradient example', gr[0, :10].asnumpy()) weights -= gr * 0.01
def test_slice(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): # Test Slice sliced_weights = weights[:, ::2] y = sigmoid(np.dot(inputs, sliced_weights)) return y def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) print('Gradient Size', gr.shape) print('Gradient example', gr[0,:10].asnumpy()) weights -= gr * 0.01
def test_mxnet_logistic(): def sigmoid(x): return np.multiply(0.5, np.add(np.tanh(x), 1)) xshape = (256, 500) #needs to reverse. because of mxnet's setting wshape = (250, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = np.random.rand(*wshape) - 0.5 x = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.Function(act, {'x': xshape}) def predict(weights, inputs): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x=inputs, fc_weight=weights) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) training_gradient_fun = core.grad(training_loss) print('Initial loss: {}'.format(training_loss(weights, inputs))) for i in range(100): gr = training_gradient_fun(weights, inputs) #print('Training gradient: {}'.format(gr)) weights -= gr * 0.1 if i % 10 == 0: print('Trained loss: {}'.format(training_loss(weights, inputs))) # The training loss should be around 300 in a bug-free Minpy if (training_loss(weights, inputs)[0] > 600): assert (False)
def training_loss(weights, bias, inputs): preds = predict(weights, bias, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, bias, inputs): preds = predict(weights, bias, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) bshape = (250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 #bias = random.rand(bshape) - 0.5 #print bias.shape bias = np.zeros(bshape) print bias.shape training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, bias, inputs))) gr = training_gradient_fun(weights, bias, inputs) weights -= gr * 0.01
from minpy import core import minpy.numpy as np import minpy.numpy.random as random import mxnet as mx def sigmoid(x): return np.multiply(0.5, np.add(np.tanh(x), 1)) #xshape = (256, 500) xshape = (256, 1, 30, 30) #needs to reverse. because of mixnet's setting tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 fc_wshape = (250, 845) fc_weight = random.rand(*fc_wshape) - 0.5 fc_bshape = (250, ) #fc_bias = np.zeros(*fc_bshape) fc_bias = random.rand(*fc_bshape) * 0 conv_wshape = (5, 1, 5, 5) conv_weight = random.rand(*conv_wshape) - 0.5 conv_bshape = (5, ) #conv_bias = np.zeros(*conv_bshape) conv_bias = random.rand(*conv_bshape) * 0
from minpy import core import minpy.numpy as np import minpy.numpy.random as random import mxnet as mx def sigmoid(x): return np.multiply(0.5, np.add(np.tanh(x), 1)) xshape = (256, 500) #needs to reverse. because of mixnet's setting wshape = (250, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = np.random.rand(*wshape) - 0.5 x = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.Function(act, {'x': xshape}) def predict(weights, inputs): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x=inputs, fc_weight=weights)
def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) def NumpyVarToMinpy(var): return minpy.array.Value.wrap(var) def MinpyVarToNumpy(var): return minpy.array.Value.wrap(var).get_data(ArrayType.NUMPY) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs)))
import minpy.numpy as np import minpy.numpy.random as random from minpy.context import set_context, gpu import win_unicode_console win_unicode_console.enable() set_context(gpu(0)) a = random.rand(3, 1) b = random.rand(5, 1) print(type(a)) print(type(a.asnumpy())) c = np.row_stack((a.asnumpy(), b.asnumpy())) # c = np.concat((a, b), axis=0) print(c) # values = np.zeros((1, 10)) # print(values) # n_values = np.max(values) + 1 # print(n_values) # d = np.eye(int(n_values[0]))[values] # print(d) # nb_classes = 6 # targets = np.array([3, 2]).reshape(-1) # d = np.eye(nb_classes)[targets] # print(d.T) # nb_classes = 6 # targets = np.array([3]).reshape(-1) # d = np.eye(nb_classes)[targets] # print(d.T)
import minpy.numpy as np import minpy.numpy.random as random from minpy.context import cpu, gpu import time n = 1000 with cpu(): x_cpu = random.rand(1024, 1024) - 0.5 y_cpu = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() # real run t0 = time.time() for i in range(n): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t1 = time.time() with gpu(0): x_gpu0 = random.rand(1024, 1024) - 0.5 y_gpu0 = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy()
l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu")