def test_cpu_gpu(n, s): #n = 10 #s = 512 with cpu(): x_cpu = _randn(s, s) y_cpu = _randn(s, s) for i in range(10): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t0 = time.time() for i in range(n): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t1 = time.time() all_cpu_time = t1 - t0 with gpu(0): x_gpu0 = _randn(s, s) y_gpu0 = _randn(s, s) for i in range(10): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() t2 = time.time() for i in range(n): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() t3 = time.time() all_gpu_time = t3 - t2 print("run on cpu:%.6f s/iter" % (all_cpu_time / n)) print("run on gpu:%.6f s/iter" % (all_gpu_time / n)) print("%s cpu_time/gpu_time:%.6f " % (s, all_cpu_time / all_gpu_time))
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))
def test_policy_2(): with minpy.OnlyNumPyPolicy(): print(np.policy) print(np.random.policy) np.set_policy(minpy.PreferMXNetPolicy()) set_context(cpu()) print(np.policy) print(np.random.policy)
def test_context(): #set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 """ with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) """ print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) """
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.context import set_context, cpu, gpu # set_context(gpu(0)) set_context(cpu()) import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) X = data[0][:16] hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, ))
import minpy.numpy as np import minpy.numpy.random as random from minpy.context import cpu, gpu import time n = 1000 with cpu(): x_cpu = random.rand(1024, 1024) - 0.5 y_cpu = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() # real run t0 = time.time() for i in range(n): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t1 = time.time() with gpu(0): x_gpu0 = random.rand(1024, 1024) - 0.5 y_gpu0 = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy()
def garchSim(ret2, p): h = np.zeros(ret2.shape[0], dtype='float32') h[0] = np.mean(ret2) for i in range(1, ret2.shape[0]): h[i] = p[0] + p[1] * ret2[i - 1] + p[2] * h[i - 1] return h def garchLLH(y, par): h = garchSim(np.square(y), par) T = y.shape[0] llh = -0.5 * (T - 1) * math.log( 2 * math.pi) - 0.5 * np.sum(np.log(h) + (y / np.sqrt(h))**2) return llh[0] def llh_time(): ret, x0, val_llh = garch_data() ret = np.array(ret) x0 = np.array(x0) t = benchmark(lambda: garchLLH(ret, x0), args.n, val_llh) return t with cpu() if args.mode == 'cpu' else gpu(0): out['minpy-' + args.mode] = llh_time() print(json.dumps(out))
''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) from minpy.nn.io import NDArrayIter batch_size = 128 train_data_iter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) val_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) from minpy.context import set_context, cpu, gpu if args.gpu_index < 0: set_context(cpu()) else: set_context(gpu(args.gpu_index)) model = ResNet(3, (16, 32, 64)) updater = Updater(model, update_rule='sgd', learning_rate=0.1, momentem=0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: # training epoch_number += 1
''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) from minpy.nn.io import NDArrayIter batch_size = 128 train_data_iter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) val_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) from minpy.context import set_context, cpu, gpu if args.gpu_index < 0: set_context(cpu()) else: set_context(gpu(args.gpu_index)) model = ResNet(3, (16, 32, 64)) updater = Updater(model, update_rule='sgd', learning_rate=0.1, momentem=0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: # training epoch_number += 1 train_data_iter.reset() for iteration, batch in enumerate(train_data_iter):