def test_mxnet_affine(): xshape = (10, 40) fake_y = np.zeros([10, 20]) weights = rng.randn(20, 40) - 0.5 inputs = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=inputs, num_hidden=20) f = core.function(fc, [('x', xshape)]) def check_fn(x): return layers.l2_loss(f(x = x, fc_weight = weights), fake_y) x = rng.randn(*xshape) - 0.5 gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_caffe_concat(): xshape_0 = (10, 40) xshape_1 = (10, 30) fake_y = np.zeros([10, 70]) fake_y[:, 0] = 1 x_1 = rng.randn(*xshape_1) - 0.5 inputs_0 = mx.sym.Variable(name="x_0") inputs_1 = mx.sym.Variable(name="x_1") concat = mx.symbol.CaffeOp(data_0=inputs_0, data_1=inputs_1, num_data=2, prototxt='layer {type:"Concat"}') f = core.function(concat, {"x_0": xshape_0, "x_1": xshape_1}) def check_fn(x_0): return layers.l2_loss(f(x_0=x_0, x_1=x_1), fake_y) x_0 = rng.randn(*xshape_0) - 0.5 gradient_checker.quick_grad_check(check_fn, x_0, rs=rng)
def set_mxnet_symbol(self, X): data = mx.sym.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(self.filter_size, self.filter_size), num_filter=self.num_filters) tanh1 = mx.symbol.Activation(data=conv1, act_type="relu") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2, 2), stride=(2, 2)) flatten = mx.symbol.Flatten(data=pool1) fc1 = mx.sym.FullyConnected(name='fc1', data=flatten, num_hidden=self.hidden_dim) fc1 = mx.sym.FullyConnected(name='fc1', data=pool1, num_hidden=self.hidden_dim) act1 = mx.sym.Activation(data=fc1, act_type='relu') fc2 = mx.sym.FullyConnected(name='fc2', data=fc1, num_hidden=self.num_classes) batch_num, x_c, x_h, x_w = X.shape c, h, w = self.input_dim if not (c == x_c and h == x_h and w == x_w): raise ModelInputDimInconsistencyError( 'Expected Dim: {}, Input Dim: {}'.format( self.input_dim, X.shape)) scores = mx.sym.SoftmaxOutput(data=fc2, name='softmax') label_shape = (batch_num, ) self.symbol_func = core.function(scores, [('x', X.shape), ('softmax_label', label_shape)])
def test_caffe_concat(): xshape_0 = (10, 40) xshape_1 = (10, 30) fake_y = np.zeros([10, 70]) fake_y[:, 0] = 1 x_1 = rng.randn(*xshape_1) - 0.5 inputs_0 = mx.sym.Variable(name='x_0') inputs_1 = mx.sym.Variable(name='x_1') concat = mx.symbol.CaffeOp(data_0=inputs_0, data_1=inputs_1, num_data=2, prototxt="layer {type:\"Concat\"}") f = core.function(concat, {'x_0': xshape_0, 'x_1': xshape_1}) def check_fn(x_0): return layers.l2_loss(f(x_0=x_0, x_1=x_1), fake_y) x_0 = rng.randn(*xshape_0) - 0.5 gradient_checker.quick_grad_check(check_fn, x_0, rs=rng)
def __init__(self, input_size=3 * 32 * 32, hidden_size=512, num_classes=10): super(TwoLayerNet, self).__init__() # ATTENTION: mxnet's weight dimension arrangement is different; it is [out_size, in_size] self.param_configs['w1'] = { 'shape': [hidden_size, input_size] } self.param_configs['b1'] = { 'shape': [hidden_size,] } self.param_configs['w2'] = { 'shape': [num_classes, hidden_size] } self.param_configs['b2'] = { 'shape': [num_classes,] } # define the symbols data = mx.sym.Variable(name='X') fc1 = mx.sym.FullyConnected(name='fc1', data=data, num_hidden=hidden_size) act = mx.sym.Activation(data=fc1, act_type='relu') fc2 = mx.sym.FullyConnected(name='fc2', data=act, num_hidden=num_classes) # ATTENTION: when using mxnet symbols, input shape (including batch size) should be fixed self.fwd_fn = core.function(fc2, {'X': (128, input_size)})
def set_mxnet_symbol(self, X): data = mx.sym.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv1', data=data, kernel=(self.filter_size, self.filter_size), num_filter=self.num_filters) tanh1 = mx.symbol.Activation(data=conv1, act_type="relu") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2, 2), stride=(2, 2)) flatten = mx.symbol.Flatten(data=pool1) fc1 = mx.sym.FullyConnected(name='fc1', data=flatten, num_hidden=self.hidden_dim) fc1 = mx.sym.FullyConnected(name='fc1', data=pool1, num_hidden=self.hidden_dim) act1 = mx.sym.Activation(data=fc1, act_type='relu') fc2 = mx.sym.FullyConnected(name='fc2', data=fc1, num_hidden=self.num_classes) batch_num, x_c, x_h, x_w = X.shape c, h, w = self.input_dim if not (c == x_c and h == x_h and w == x_w): raise ModelInputDimInconsistencyError( 'Expected Dim: {}, Input Dim: {}'.format(self.input_dim, X.shape)) scores = mx.sym.SoftmaxOutput(data=fc2, name='softmax') label_shape = (batch_num,) self.symbol_func = core.function( scores, [('x', X.shape), ('softmax_label', label_shape)])
def __init__(self, input_size=3 * 32 * 32, hidden_size=512, num_classes=10): super(TwoLayerCaffeNet, self).__init__() # ATTENTION: mxnet's weight dimension arrangement is different; it is [out_size, in_size] self.param_configs['w1'] = {'shape': [hidden_size, input_size]} self.param_configs['b1'] = { 'shape': [ hidden_size, ] } self.param_configs['w2'] = {'shape': [num_classes, hidden_size]} self.param_configs['b2'] = { 'shape': [ num_classes, ] } # define the symbols data = mx.sym.Variable(name='X') fc1 = mx.sym.CaffeOp( name='fc1', data_0=data, num_weight=2, prototxt= "layer {type:\"InnerProduct\" inner_product_param{num_output: %d} }" % hidden_size) act = mx.sym.CaffeOp(data_0=fc1, prototxt="layer {type:\"ReLU\"}") fc2 = mx.sym.CaffeOp( name='fc2', data_0=act, num_weight=2, prototxt= "layer {type:\"InnerProduct\" inner_product_param{num_output: %d} }" % num_classes) # ATTENTION: when using mxnet symbols, input shape (including batch size) should be fixed self.fwd_fn = core.function(fc2, {'X': (100, input_size)})
from minpy import core import minpy.numpy as np import mxnet as mx def sigmoid(x): return np.multiply(0.5, np.add(np.tanh(x), 1)) x = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=x) #fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=inputs.shape[1]) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.function(act) def predict(weights, inputs): return f(x=inputs, fc_weight=weights, ctx=mx.cpu()) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = np.random.rand(*xshape) - 0.5 targets = np.random.randint(0, 2, size=tshape) weights = np.random.rand(*wshape) - 0.5 training_gradient_fun = core.grad(training_loss) print('Initial loss: {}'.format(training_loss(weights, inputs)))
import logging import minpy.numpy as np import minpy.dispatch.policy as ply from minpy.core import grad_and_loss, function import util import mxnet as mx logging.getLogger('minpy.array').setLevel(logging.WARN) x = mx.symbol.Variable('x') sm = mx.symbol.SoftmaxOutput(data=x, name='softmax', grad_scale=1/10000.0) softmax = function(sm, [('x', (10000, 5)), ('softmax_label', (10000,))]) x, t = util.get_data() #w = np.random.randn(500, 5) w = util.get_weight() softmax_label = np.argmax(t, axis=1) def predict(w, x): ''' a = np.exp(np.dot(x, w)) a_sum = np.sum(a, axis=1, keepdims=True) prob = a / a_sum ''' y = np.dot(x, w) prob = softmax(x=y, softmax_label=softmax_label) return prob #util.plot_data(x, t) #util.plot_data(x, predict(w, x))
data = mx.symbol.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv', data=data, kernel=(5, 5), num_filter=5) tanh1 = mx.symbol.Activation(data=conv1, act_type="tanh") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2, 2), stride=(2, 2)) flatten = mx.symbol.Flatten(data=pool1) fc = mx.sym.FullyConnected(name='fc', data=flatten, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.function(act, {'x': xshape}) def predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x=inputs, fc_weight=fc_weight, fc_bias=fc_bias, conv_weight=conv_weight, conv_bias=conv_bias) def training_loss(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias): preds = predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities))
data = mx.sym.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv', data=data, kernel=(5, 5), num_filter=5) tanh1 = mx.symbol.Activation(data=conv1, act_type="tanh") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2, 2), stride=(2, 2)) flatten = mx.symbol.Flatten(data=pool1) fc = mx.sym.FullyConnected(name='fc', data=flatten, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.function(act, [('x', xshape)]) def predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x=inputs, fc_weight=fc_weight, fc_bias=fc_bias, conv_weight=conv_weight, conv_bias=conv_bias) def training_loss(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias): preds = predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities))
import logging import minpy.numpy as np import minpy.dispatch.policy as ply from minpy.core import grad_and_loss, function import util import mxnet as mx logging.getLogger('minpy.array').setLevel(logging.DEBUG) logging.getLogger('minpy.core').setLevel(logging.DEBUG) x = mx.symbol.Variable('x') sm = mx.symbol.SoftmaxOutput(data=x, name='softmax', grad_scale=1/10000.0) softmax = function(sm, {'x': (10000, 5), 'softmax_label': (10000,)}) x, t = util.get_data() #w = np.random.randn(500, 5) w = util.get_weight() softmax_label = np.argmax(t, axis=1) def predict(w, x): ''' a = np.exp(np.dot(x, w)) a_sum = np.sum(a, axis=1, keepdims=True) prob = a / a_sum ''' y = np.dot(x, w) prob = softmax(x=y, softmax_label=softmax_label) return prob #util.plot_data(x, t)
conv_bshape = (5, ) #conv_bias = np.zeros(*conv_bshape) conv_bias = random.rand(*conv_bshape) * 0 data = mx.sym.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv', data=data, kernel=(5,5), num_filter=5) tanh1 = mx.symbol.Activation(data=conv1, act_type="tanh") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2)) flatten = mx.symbol.Flatten(data=pool1) fc = mx.sym.FullyConnected(name='fc', data=flatten, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.function(act, [('x', xshape)]) def predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x = inputs, fc_weight = fc_weight, fc_bias = fc_bias, conv_weight = conv_weight, conv_bias = conv_bias) def training_loss(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias): preds = predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) training_gradient_fun = core.grad_and_loss(training_loss, range(2, 6)) lr = 1e-5 for i in range(100): grads, loss = training_gradient_fun(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias)
conv_bshape = (5, ) #conv_bias = np.zeros(*conv_bshape) conv_bias = random.rand(*conv_bshape) * 0 data = mx.symbol.Variable(name='x') conv1 = mx.symbol.Convolution(name='conv', data=data, kernel=(5,5), num_filter=5) tanh1 = mx.symbol.Activation(data=conv1, act_type="tanh") pool1 = mx.symbol.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2)) flatten = mx.symbol.Flatten(data=pool1) fc = mx.sym.FullyConnected(name='fc', data=flatten, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.function(act, {'x': xshape}) def predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x = inputs, fc_weight = fc_weight, fc_bias = fc_bias, conv_weight = conv_weight, conv_bias = conv_bias) def training_loss(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias): preds = predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) training_gradient_fun = core.grad_and_loss(training_loss, range(2, 6)) lr = 1e-5 for i in range(100): grads, loss = training_gradient_fun(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias)