def discount_rewards(self, rs): drs = np.zeros_like(rs).asnumpy() s = 0 for t in reversed(range(0, len(rs))): # Reset the running sum at a game boundary. if rs[t] != 0: s = 0 s = s * self.gamma + rs[t] drs[t] = s drs -= np.mean(drs) drs /= np.std(drs) return drs
def discount_rewards(self, rs): drs = np.zeros_like(rs).asnumpy() s = 0 for t in reversed(xrange(0, len(rs))): # Reset the running sum at a game boundary. if rs[t] != 0: s = 0 s = s * self.gamma + rs[t] drs[t] = s drs -= np.mean(drs) drs /= np.std(drs) return drs
def rescale(container, inputs, parameters): """ recover original distribution at the final layer of every container. """ # returns outputs, factor list factors = [] all_factors = [] input_shape = inputs.shape[1:] # find final affine layer ending = None for index in range(len(container._modules) - 1, -1, -1): value = container._modules[index] if isinstance(value, Affine) or isinstance(value, Convolution): ending = index break # iterate through module for module_index, module in enumerate(container._modules): shapes = module.parameter_shape(input_shape) input_shape = module.output_shape(input_shape) if isinstance(module, Affine) or isinstance(module, Convolution): for key, value in shapes.items(): if 'weight' in key: E_X_2 = np.mean(inputs**2) if isinstance(module, Affine): n = value[0] else: C, W, H = value[1:] n = C * W * H std_from = np.std(parameters[key]) std_to = 1 / (E_X_2 * n)**0.5 rescaling_factor = std_to / std_from if module_index == ending: parameters[key] /= np.prod(np.array(factors)) else: factors.append(rescaling_factor) parameters[key] *= rescaling_factor ''' factors.append(rescaling_factor) parameters[key] *= rescaling_factor ''' inputs = module.forward(inputs, parameters) return inputs, factors
def preprocess_data(train, test, validation=None): """ Applies zero-centering and normalization of input data for better model performance. :param train: A numpy matrix containing the data the network is trained on. :param test: A numpy matrix containing the data used to test the network. :param validation: An optional numpy matrix containing the data used to apply hyperparameter validation on the network. :return: The preprocessed matrices. """ # Zero-centering (subtracting the mean) mean = np.mean(train, axis=0) # Using statistic of training set train -= mean if validation is not None: validation -= mean test -= mean # Normalization of data dimension to be of equal scale (division by standard deviation) std = np.std(train, axis=0) train /= std if validation is not None: validation /= std test /= std return train, validation, test, mean, std
mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) ''' for key, value in model.param_configs.items(): if 'weight' in key: value['init_rule'] = 'gaussian' value['init_config'] = {'stdvar' : 1} ''' initialize(model) for key, value in model.params.items(): if 'weight' in key: print np.std(value) result = model.forward(X, 'train') print 'result', np.std(result) result, factors = rescale(mlp, X, model.params) print 'result', np.std(result) for key, value in model.params.items(): if 'weight' in key: print np.std(value)
L_2 = {key: [] for key in model.params} minimum = {key: [] for key in model.params} maximum = {key: [] for key in model.params} for i in range(iterations): X_batch = data[0][batch_index * batch_size:(batch_index + 1) * batch_size] Y_batch = data[1][batch_index * batch_size:(batch_index + 1) * batch_size] batch_index = (batch_index + 1) % batches gradients, loss = gradient_loss(model, X_batch, Y_batch) loss = loss.asnumpy()[0] loss_history.append(loss) for key, value in zip(model.params.keys(), gradients): mean[key].append(np.mean(value).asnumpy()) std[key].append(np.std(value).asnumpy()) L_2[key].append(np.mean(value**2).asnumpy()) minimum[key].append(np.min(value).asnumpy()) maximum[key].append(np.max(value).asnumpy()) updater.update(gradients) if (i + 1) % rescaling_interval == 0: rescale(mlp, data[2], model.params) # validation data print 'rescaled' if (i + 1) % interval == 0: print 'iteration %d loss %f' % (i + 1, loss) pickle.dump((loss_history, mean, std, L_2, minimum, maximum), open('dr-g-norm-%d' % rescaling_interval, 'wb'))
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1, 2, 3], [4, 5, 6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3, -1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2, 1, 1)) c1 = np.array([1, 2, 3]).reshape((1, 3, 1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1, 2], [3, 4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1, 2, 3]]) np.swapaxes(x, 0, 1) x = np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) np.swapaxes(x, 0, 2) x = np.arange(4).reshape((2, 2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1, 4], [3, 1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x', 'y')) np.argsort(x, order=('y', 'x')) a = np.arange(6).reshape(2, 3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2, 3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1, 2, 3, 4, 5], 3) np.searchsorted([1, 2, 3, 4, 5], 3, side='right') np.searchsorted([1, 2, 3, 4, 5], [-10, 10, 2, 3]) a = np.array([[0, 1], [2, 3]]) np.resize(a, (2, 3)) np.resize(a, (1, 4)) np.resize(a, (2, 4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2, )).shape a = np.arange(4).reshape(2, 2) a = np.arange(8).reshape(2, 2, 2) a a[:, :, 0] # main diagonal is [0 6] a[:, :, 1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2, 2, 2)) np.trace(a) a = np.arange(24).reshape((2, 2, 2, 3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1] a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a, axis=0) # sum over rows for each of the 3 columns np.cumsum(a, axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2, 2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2, 2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2, 2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7, 4, 5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1., 2.]) np.prod([[1., 2.], [3., 4.]]) np.prod([[1., 2.], [3., 4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1, 2, 3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a, axis=1) np.ndim([[1, 2, 3], [4, 5, 6]]) np.ndim(np.array([[1, 2, 3], [4, 5, 6]])) np.ndim(1) a = np.array([[1, 2, 3], [4, 5, 6]]) np.size(a) np.size(a, 1) np.size(a, 0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1, 2, 3, 11], decimals=1) # ndarray of ints is returned np.around([1, 2, 3, 11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1,2,3], [4,5,6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2,1,1)) c1 = np.array([1, 2, 3]).reshape((1,3,1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1,2],[3,4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1,2,3]]) np.swapaxes(x,0,1) x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]]) np.swapaxes(x,0,2) x = np.arange(4).reshape((2,2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1,4],[3,1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x','y')) np.argsort(x, order=('y','x')) a = np.arange(6).reshape(2,3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2,3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1,2,3,4,5], 3) np.searchsorted([1,2,3,4,5], 3, side='right') np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3]) a=np.array([[0,1],[2,3]]) np.resize(a,(2,3)) np.resize(a,(1,4)) np.resize(a,(2,4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2,)).shape a = np.arange(4).reshape(2,2) a = np.arange(8).reshape(2,2,2); a a[:,:,0] # main diagonal is [0 6] a[:,:,1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2,2,2)) np.trace(a) a = np.arange(24).reshape((2,2,2,3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1]; a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1,2,3],[4,5,6],[7,8,9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3,4,1,1,1,4,4,4,4,4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1,2,3], [4,5,6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a,axis=0) # sum over rows for each of the 3 columns np.cumsum(a,axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2,2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2,2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2,2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7,4,5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1.,2.]) np.prod([[1.,2.],[3.,4.]]) np.prod([[1.,2.],[3.,4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1,2,3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a,axis=1) np.ndim([[1,2,3],[4,5,6]]) np.ndim(np.array([[1,2,3],[4,5,6]])) np.ndim(1) a = np.array([[1,2,3],[4,5,6]]) np.size(a) np.size(a,1) np.size(a,0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1,2,3,11], decimals=1) # ndarray of ints is returned np.around([1,2,3,11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)
from facility import * from solver_primitives import * HIDDEN_LAYERS = 4 shapes = (1024, ) * HIDDEN_LAYERS + (10, ) storage = {} activation = builder.ReLU mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) mlp.append(builder.Export('affine%d' % (len(shapes) - 1), storage)) model = builder.Model(mlp, 'softmax', (3072, )) initialize(model) X = np.random.normal(0, 1, (64, 3072)) output = model.forward(X, 'train') print 'origin' for key, value in storage.items(): print key, np.std(value) rescale(mlp, X, model.params) rescaled_output = model.forward(X, 'train') print 'rescaled' for key, value in storage.items(): print key, np.std(value) print np.mean(np.abs(output - rescaled_output))