def main(args): if args.gpu: from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) env = gym.make("Pong-v0") env.seed(args.seed) numpy.random.seed(args.seed) model = PolicyNetwork(PongPreprocessor()) solver = RLPolicyGradientSolver( model, env, update_rule='rmsprop', optim_config={ 'learning_rate': args.learning_rate, 'decay_rate': args.decay_rate }, init_rule='custom', init_config={ 'function': lambda shape: np.random.randn(shape[0], shape[1]) / numpy.sqrt( shape[1]) }, render=args.render, save_dir=args.save_dir, save_every=args.save_every, resume_from=args.resume_from, num_episodes=args.num_episodes, verbose=args.verbose, print_every=args.print_every) solver.init() solver.train()
def main(args): if args.gpu: from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) env = gym.make("Pong-v0") env.seed(args.seed) numpy.random.seed(args.seed) model = PolicyNetwork(PongPreprocessor()) solver = RLPolicyGradientSolver(model, env, update_rule='rmsprop', optim_config={ 'learning_rate': args.learning_rate, 'decay_rate': args.decay_rate }, init_rule='custom', init_config={ 'function': lambda shape: np.random.randn(shape[0], shape[1]) / numpy.sqrt(shape[1]) }, render=args.render, save_dir=args.save_dir, save_every=args.save_every, resume_from=args.resume_from, num_episodes=args.num_episodes, verbose=args.verbose, print_every=args.print_every) solver.init() solver.train()
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))
def test_policy_2(): with minpy.OnlyNumPyPolicy(): print(np.policy) print(np.random.policy) np.set_policy(minpy.PreferMXNetPolicy()) set_context(cpu()) print(np.policy) print(np.random.policy)
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 """ with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) """ print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) """
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import numpy as real_numpy import mxnet as mx import minpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver from minpy import context context.set_context(context.gpu(0)) # import logging # logging.getLogger('minpy.array').setLevel(logging.DEBUG) # logging.getLogger('minpy.core').setLevel(logging.DEBUG) # logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) batch_size = 128 flattened_input_size = 784 hidden_size = 256 num_classes = 10 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self): super(TwoLayerNet, self).__init__() # Use MXNet symbol to define the whole network.
import cPickle as pickle import minpy.nn.model_builder as builder from facility import * from noisy_loss import * from solver_primitives import * from utilities.data_utility import load_cifar10 from GPU_utility import GPU_availability from minpy.context import set_context, gpu set_context(gpu(GPU_availability()[0])) ACTIVATION = 'ReLU' SHAPE = (1024, ) * 3 + (10, ) BATCH_SIZE = 64 X_SHAPE = (3072, ) activation = getattr(builder, ACTIVATION) mlp = builder.Sequential() for shape in SHAPE[:-1]: mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(SHAPE[-1])) model = builder.Model(mlp, 'softmax', X_SHAPE) initialize(model) updater = Updater(model, 'sgd', {'learning_rate': 0.01}) training_X, training_Y, validation_X, validation_Y, test_X, test_Y, = \ load_cifar10(path='../../cifar10/utilities/cifar/', center=True, rescale=True) X_batches = Batches(training_X, BATCH_SIZE) Y_batches = Batches(training_Y, BATCH_SIZE)
resnet = ResNet(3) updater = Updater(resnet, update_rule='sgd', learning_rate=0.1, momentem=0.9) from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--gpu_index', type=int, required=True) parser.add_argument('--data_path', type=str, required=False) args = parser.parse_args() from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=64, path=args.data_path) from minpy.context import set_context, gpu set_context(gpu(args.gpu_index)) resnet.training() unpack_batch = lambda batch : (batch.data[0].asnumpy(), batch.label[0].asnumpy()) for epoch in range(125): # anneal learning rate if epoch in (75, 100): updater.learning_rate = updater.learning_rate * 0.1 print 'epoch %d learning rate annealed to %f' % (epoch, updater.learning_rate) t0 = time.time() forward_time, backward_time, updating_time = 0, 0, 0 # training
args = parser.parse_args() ''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) train_data_iter = NDArrayIter(data=image_data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) test_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) unpack_batch = lambda batch : (batch.data[0].asnumpy(),batch.label[0].asnumpy()) from minpy.context import set_context, cpu,gpu set_context(gpu(args.gpu_index)) model = VGG(5,(2,2,3,3,3),(64,128,256,512,512)) updater = Updater(model,update_rule = 'sgd',learning_rate = 0.1,momentem = 0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: epoch_number +=1 #training train_data_iter.reset() for iteration,batch in enumerate(train_data_iter): iteration_number +=1
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.core import grad_and_loss as _gradient_loss from minpy.context import set_context, gpu, cpu set_context(gpu(0)) import numpy as np0 from scipy.stats import multivariate_normal as gaussian from scipy.stats import uniform import sys sys.path.append('../../nn/') from facility import * from solver_primitives import * def generate_data(N, D, mean=0, std=1): mean = np0.full(D, mean) covariance_matrix = np0.eye(D) * std data = np0.random.multivariate_normal(mean, covariance_matrix, N) p = gaussian.pdf(data, mean, covariance_matrix) return data, p def gan_gradient_loss(dmodel, gmodel, X, delta=0.1): N, D = X.shape noise = np.random.uniform(np_min(X), np_max(X), X.shape) lower, upper = delta, 1 - delta def gan_loss(*args): p_X = dmodel.forward(X, 'train') random_X = gmodel.forward(noise, 'train')
''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) from minpy.nn.io import NDArrayIter batch_size = 128 train_data_iter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) val_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) from minpy.context import set_context, cpu, gpu if args.gpu_index < 0: set_context(cpu()) else: set_context(gpu(args.gpu_index)) model = ResNet(3, (16, 32, 64)) updater = Updater(model, update_rule='sgd', learning_rate=0.1, momentem=0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: # training epoch_number += 1
''' from examples.utils.data_utils import get_CIFAR10_data data = get_CIFAR10_data(args.data_dir) from minpy.nn.io import NDArrayIter batch_size = 128 train_data_iter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) val_data_iter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) ''' from load_cifar10_data_iter import * train_data_iter, val_data_iter = load_cifar10_data_iter(batch_size=128, path=args.data_dir) from minpy.context import set_context, cpu, gpu if args.gpu_index < 0: set_context(cpu()) else: set_context(gpu(args.gpu_index)) model = ResNet(3, (16, 32, 64)) updater = Updater(model, update_rule='sgd', learning_rate=0.1, momentem=0.9) epoch_number = 0 iteration_number = 0 terminated = False while not terminated: # training epoch_number += 1 train_data_iter.reset() for iteration, batch in enumerate(train_data_iter):
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import numpy as real_numpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver from minpy import context context.set_context(context.gpu(0)) #import logging #logging.getLogger('minpy.array').setLevel(logging.DEBUG) #logging.getLogger('minpy.core').setLevel(logging.DEBUG) #logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) batch_size = 256 flattened_input_size = 784 hidden_size = 256 num_classes = 10 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self): super(TwoLayerNet, self).__init__() self.add_param(name='w1', shape=(flattened_input_size, hidden_size)) \ .add_param(name='b1', shape=(hidden_size,)) \
"""Simple multi-layer perception neural network on MNIST.""" import argparse import os.path import struct import time import numpy as real_numpy import minpy.numpy as np from minpy.nn import io from minpy.nn import layers import minpy.nn.model import minpy.nn.solver # Please uncomment following if you have GPU-enabled MXNet installed. from minpy.context import set_context, gpu set_context(gpu(0)) # set the global context as gpu(0) #import logging #logging.getLogger('minpy.array').setLevel(logging.DEBUG) #logging.getLogger('minpy.core').setLevel(logging.DEBUG) #logging.getLogger('minpy.primitive').setLevel(logging.DEBUG) num_loops = 100 class TwoLayerNet(minpy.nn.model.ModelBase): def __init__(self, args): super(TwoLayerNet, self).__init__() self.add_param(name='wi', shape=(784, args.hidden_size)) \ .add_param(name='bi', shape=(args.hidden_size,)) for i in range(args.num_hidden - 1): self.add_param(name='w%d' % i, shape=(args.hidden_size, args.hidden_size)) \ .add_param(name='b%d' % i, shape=(args.hidden_size,))
sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) ''' print sys.argv raise Exception() ''' ACTIVATION = sys.argv[1] activation = getattr(builder, ACTIVATION) DEVICE = int(sys.argv[2]) set_context(gpu(DEVICE)) shapes = [int(shape) for shape in sys.argv[3:]] storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 100 batches = len(data[0]) // batch_size batch_index = 0 iterations = 25000
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.context import set_context, cpu, gpu set_context(gpu(3)) # set_context(cpu()) import cPickle as pickle import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) activation = builder.ReLU shapes = (3072, ) * 4 + (10, ) storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 100 batches = len(data[0]) // batch_size
import cPickle as pickle import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) from minpy.context import set_context, cpu, gpu print sys.argv device = int(sys.argv[2]) set_context(gpu(device)) blob_setting = sys.argv[1] shapes = (1024, ) * 4 + (10, ) activation = ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(BlobNormalization(blob_setting)) # mlp.append(builder.Export('bn%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, ))
import minpy.numpy as np import minpy.nn.model_builder as builder from minpy.context import set_context, cpu, gpu # set_context(gpu(0)) set_context(cpu()) import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) X = data[0][:16] hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, ))
import numpy.random as RNG import numpy as NP import minpy.numpy as np from minpy.nn.model_builder import * from minpy.nn.modules import * from minpy.context import set_context, gpu import h5py import os import time set_context(gpu(1)) # set the global context with gpu def softmax_crossentropy(x, y): EPSI = 1e-6 batch_size, seq_len, prob_dim = x.shape x = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) y = y.reshape((y.shape[0] * y.shape[1], )) #print x.shape, y.shape # x should be (batch, prob) # y should be (batch, ) x_dev = x - np.max(x, axis=1, keepdims=True) # minpy doesn't support x.max() sm = x_dev - np.log(EPSI + np.sum(np.exp(x_dev), axis=1, keepdims=True)) ids = np.arange(0, y.shape[0]) * seq_len + y ce = -np.sum(sm.reshape((sm.shape[0] * sm.shape[1], ))[ids]) / ( 1.0 * y.shape[0]) # minpy doesn't support -1 in shape inference return ce