def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))
def main(args): if args.gpu: from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) env = gym.make("Pong-v0") env.seed(args.seed) numpy.random.seed(args.seed) model = PolicyNetwork(PongPreprocessor()) solver = RLPolicyGradientSolver( model, env, update_rule='rmsprop', optim_config={ 'learning_rate': args.learning_rate, 'decay_rate': args.decay_rate }, init_rule='custom', init_config={ 'function': lambda shape: np.random.randn(shape[0], shape[1]) / numpy.sqrt( shape[1]) }, render=args.render, save_dir=args.save_dir, save_every=args.save_every, resume_from=args.resume_from, num_episodes=args.num_episodes, verbose=args.verbose, print_every=args.print_every) solver.init() solver.train()
def main(args): if args.gpu: from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) env = gym.make("Pong-v0") env.seed(args.seed) numpy.random.seed(args.seed) model = PolicyNetwork(PongPreprocessor()) solver = RLPolicyGradientSolver(model, env, update_rule='rmsprop', optim_config={ 'learning_rate': args.learning_rate, 'decay_rate': args.decay_rate }, init_rule='custom', init_config={ 'function': lambda shape: np.random.randn(shape[0], shape[1]) / numpy.sqrt(shape[1]) }, render=args.render, save_dir=args.save_dir, save_every=args.save_every, resume_from=args.resume_from, num_episodes=args.num_episodes, verbose=args.verbose, print_every=args.print_every) solver.init() solver.train()
def test_cpu_gpu(n, s): #n = 10 #s = 512 with cpu(): x_cpu = _randn(s, s) y_cpu = _randn(s, s) for i in range(10): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t0 = time.time() for i in range(n): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t1 = time.time() all_cpu_time = t1 - t0 with gpu(0): x_gpu0 = _randn(s, s) y_gpu0 = _randn(s, s) for i in range(10): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() t2 = time.time() for i in range(n): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() t3 = time.time() all_gpu_time = t3 - t2 print("run on cpu:%.6f s/iter" % (all_cpu_time / n)) print("run on gpu:%.6f s/iter" % (all_gpu_time / n)) print("%s cpu_time/gpu_time:%.6f " % (s, all_cpu_time / all_gpu_time))
def fixed_2daxis_slice(arr, resultarr, length, axis=0, customitr=None): index = 0 if customitr is None: customitr = range(0, length) with(gpu(0)): for i in customitr.__iter__(): resultarr[index] = arr[i][axis] index += 1
def nodeRange(depth, height): with (gpu()): arr = [None] * height for i in range(0, height): arr[i] = AnyNode(parent=None, height=i, depth=depth, weight=np.random.rand(), bias=np.random.rand(), parents=[], output=0.0, metab=np.random.rand()) Nodes[depth] = arr
def calculate(fixedNum, itr): mem = np.zeros((len(itr),3)) i = 0 tstart = time.clock() with(gpu()): for item in itr.__iter__(): mem[i] = (fixedNum % item) mem[i][1] = item mem[i][0] = i i += 1 tend = time.clock() mem.asnumpy() return (mem, i, tstart, tend)
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 """ with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) """ print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) """
x_cpu = random.rand(1024, 1024) - 0.5 y_cpu = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() # real run t0 = time.time() for i in range(n): z_cpu = np.dot(x_cpu, y_cpu) z_cpu.asnumpy() t1 = time.time() with gpu(0): x_gpu0 = random.rand(1024, 1024) - 0.5 y_gpu0 = random.rand(1024, 1024) - 0.5 # dry run for i in range(10): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() # real run t2 = time.time() for i in range(n): z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() t3 = time.time()
def garchSim(ret2, p): h = np.zeros(ret2.shape[0], dtype='float32') h[0] = np.mean(ret2) for i in range(1, ret2.shape[0]): h[i] = p[0] + p[1] * ret2[i - 1] + p[2] * h[i - 1] return h def garchLLH(y, par): h = garchSim(np.square(y), par) T = y.shape[0] llh = -0.5 * (T - 1) * math.log( 2 * math.pi) - 0.5 * np.sum(np.log(h) + (y / np.sqrt(h))**2) return llh[0] def llh_time(): ret, x0, val_llh = garch_data() ret = np.array(ret) x0 = np.array(x0) t = benchmark(lambda: garchLLH(ret, x0), args.n, val_llh) return t with cpu() if args.mode == 'cpu' else gpu(0): out['minpy-' + args.mode] = llh_time() print(json.dumps(out))
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import time import numpy as real_numpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver # Please uncomment following if you have GPU-enabled MXNet installed. from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) #import logging #logging.getLogger('minpy.array').setLevel(logging.DEBUG) #logging.getLogger('minpy.core').setLevel(logging.DEBUG) #logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) num_loops = 100 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self, args): super(TwoLayerNet, self).__init__() self.add_param(name='wi', shape=(784, args.hidden_size)) \ .add_param(name='bi', shape=(args.hidden_size,)) for i in range(args.num_hidden - 1): self.add_param(name='w%d' % i, shape=(args.hidden_size, args.hidden_size)) \ .add_param(name='b%d' % i, shape=(args.hidden_size,))
resnet = ResNet(3) updater = Updater(resnet, update_rule='sgd', learning_rate=0.1, momentem=0.9) from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--gpu_index', type=int, required=True) parser.add_argument('--data_path', type=str, required=False) args = parser.parse_args() from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=64, path=args.data_path) from minpy.context import set_context, gpu set_context(gpu(args.gpu_index)) resnet.training() unpack_batch = lambda batch : (batch.data[0].asnumpy(), batch.label[0].asnumpy()) for epoch in range(125): # anneal learning rate if epoch in (75, 100): updater.learning_rate = updater.learning_rate * 0.1 print 'epoch %d learning rate annealed to %f' % (epoch, updater.learning_rate) t0 = time.time() forward_time, backward_time, updating_time = 0, 0, 0 # training
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import numpy as real_numpy import mxnet as mx import minpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver from minpy import context context.set_context(context.gpu(0)) # import logging # logging.getLogger('minpy.array').setLevel(logging.DEBUG) # logging.getLogger('minpy.core').setLevel(logging.DEBUG) # logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) batch_size = 128 flattened_input_size = 784 hidden_size = 256 num_classes = 10 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self): super(TwoLayerNet, self).__init__() # Use MXNet symbol to define the whole network.
from minpy.core import grad import minpy.numpy as np import minpy.numpy.random as random import minpy.dispatch.policy as policy from minpy.context import Context, cpu, gpu, set_context #np.set_policy(policy.OnlyNumpyPolicy()) set_context(gpu(0)) # set the global context as gpu(0) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0
import cPickle as pickle import minpy.nn.model_builder as builder from facility import * from noisy_loss import * from solver_primitives import * from utilities.data_utility import load_cifar10 from GPU_utility import GPU_availability from minpy.context import set_context, gpu set_context(gpu(GPU_availability()[0])) ACTIVATION = 'ReLU' SHAPE = (1024, ) * 3 + (10, ) BATCH_SIZE = 64 X_SHAPE = (3072, ) activation = getattr(builder, ACTIVATION) mlp = builder.Sequential() for shape in SHAPE[:-1]: mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(SHAPE[-1])) model = builder.Model(mlp, 'softmax', X_SHAPE) initialize(model) updater = Updater(model, 'sgd', {'learning_rate': 0.01}) training_X, training_Y, validation_X, validation_Y, test_X, test_Y, = \ load_cifar10(path='../../cifar10/utilities/cifar/', center=True, rescale=True) X_batches = Batches(training_X, BATCH_SIZE) Y_batches = Batches(training_Y, BATCH_SIZE)
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.core import grad_and_loss as _gradient_loss from minpy.context import set_context, gpu, cpu set_context(gpu(0)) import numpy as np0 from scipy.stats import multivariate_normal as gaussian from scipy.stats import uniform import sys sys.path.append('../../nn/') from facility import * from solver_primitives import * def generate_data(N, D, mean=0, std=1): mean = np0.full(D, mean) covariance_matrix = np0.eye(D) * std data = np0.random.multivariate_normal(mean, covariance_matrix, N) p = gaussian.pdf(data, mean, covariance_matrix) return data, p def gan_gradient_loss(dmodel, gmodel, X, delta=0.1): N, D = X.shape noise = np.random.uniform(np_min(X), np_max(X), X.shape) lower, upper = delta, 1 - delta def gan_loss(*args): p_X = dmodel.forward(X, 'train') random_X = gmodel.forward(noise, 'train')
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.context import set_context, cpu, gpu set_context(gpu(3)) # set_context(cpu()) import cPickle as pickle import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) activation = builder.ReLU shapes = (3072, ) * 4 + (10, ) storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 100 batches = len(data[0]) // batch_size
node.output = calculate(node.parents, node) def mutation(expected): l = len(Nodes) for i in range(0, l): for j in range(0, len(Nodes[l - i - 1])): node = Nodes[i][j] mod = 0.0 if (node.output > expected[j]): mod = -1.0 elif (node.output < expected[j]): mod = 1.0 delta = np.multiply(node.metab, mod) node.weight = np.add(node.weight, delta) node.bias = np.multiply(metab, weight) nodeRange(0, 4) nodeRange(1, 4) nodeRange(2, 4) fullLink(0, 1) fullLink(1, 2) fullLink(2, 0) printOutput() for i in range(0, 10): with (gpu()): propNoArgs() printOutput() mutation([0.0, 0.0, 0.0, 0.0])
args = parser.parse_args() ''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) train_data_iter = NDArrayIter(data=image_data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) test_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) unpack_batch = lambda batch : (batch.data[0].asnumpy(),batch.label[0].asnumpy()) from minpy.context import set_context, cpu,gpu set_context(gpu(args.gpu_index)) model = VGG(5,(2,2,3,3,3),(64,128,256,512,512)) updater = Updater(model,update_rule = 'sgd',learning_rate = 0.1,momentem = 0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: epoch_number +=1 #training train_data_iter.reset() for iteration,batch in enumerate(train_data_iter): iteration_number +=1
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import numpy as real_numpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver from minpy import context context.set_context(context.gpu(0)) #import logging #logging.getLogger('minpy.array').setLevel(logging.DEBUG) #logging.getLogger('minpy.core').setLevel(logging.DEBUG) #logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) batch_size = 256 flattened_input_size = 784 hidden_size = 256 num_classes = 10 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self): super(TwoLayerNet, self).__init__() self.add_param(name='w1', shape=(flattened_input_size, hidden_size)) \ .add_param(name='b1', shape=(hidden_size,)) \
import numpy.random as RNG import numpy as NP import minpy.numpy as np from minpy.nn.model_builder import * from minpy.nn.modules import * from minpy.context import set_context, gpu import h5py import os import time set_context(gpu(1)) # set the global context with gpu def softmax_crossentropy(x, y): EPSI = 1e-6 batch_size, seq_len, prob_dim = x.shape x = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) y = y.reshape((y.shape[0] * y.shape[1], )) #print x.shape, y.shape # x should be (batch, prob) # y should be (batch, ) x_dev = x - np.max(x, axis=1, keepdims=True) # minpy doesn't support x.max() sm = x_dev - np.log(EPSI + np.sum(np.exp(x_dev), axis=1, keepdims=True)) ids = np.arange(0, y.shape[0]) * seq_len + y ce = -np.sum(sm.reshape((sm.shape[0] * sm.shape[1], ))[ids]) / ( 1.0 * y.shape[0]) # minpy doesn't support -1 in shape inference return ce
sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) ''' print sys.argv raise Exception() ''' ACTIVATION = sys.argv[1] activation = getattr(builder, ACTIVATION) DEVICE = int(sys.argv[2]) set_context(gpu(DEVICE)) shapes = [int(shape) for shape in sys.argv[3:]] storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 100 batches = len(data[0]) // batch_size batch_index = 0 iterations = 25000
def fullL2RLink(depth1, depth2, gpun=0): with (gpu(gpun)): for node in Nodes[depth2]: for node2 in Nodes[depth1]: node2.parents.append(node)
import cPickle as pickle import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) from minpy.context import set_context, cpu, gpu print sys.argv device = int(sys.argv[2]) set_context(gpu(device)) blob_setting = sys.argv[1] shapes = (1024, ) * 4 + (10, ) activation = ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(BlobNormalization(blob_setting)) # mlp.append(builder.Export('bn%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, ))