def step(self, batch_size): momentum = self.momentum lr = self.lr if momentum != 0: self.dW *= momentum nd.elemwise_add(-lr / batch_size * self.model.W.grad, self.dW, out=self.dW) self.dv *= momentum nd.elemwise_add(-lr / batch_size * self.model.v_bias.grad, self.dv, out=self.dv) self.dh *= momentum nd.elemwise_add(-lr / batch_size * self.model.h_bias.grad, self.dh, out=self.dh) else: self.dW = -lr / batch_size * self.model.W.grad self.dv = -lr / batch_size * self.model.v_bias.grad self.dh = -lr / batch_size * self.model.h_bias.grad self.model.W += self.dW self.model.v_bias += self.dv self.model.h_bias += self.dh self.model.syn_weight()
def _update_impl(self, index, weight, grad, state): assert (isinstance(weight, nd.NDArray)) assert (isinstance(grad, nd.NDArray)) self._update_count(index) lr = self._get_lr(index) * self.rescale_grad if state is not None: state *= self.momentum nd.elemwise_add(-lr * grad, state, out=state) nd.elemwise_add(weight, state, out=weight) else: nd.elemwise_add(weight, -lr * grad, out=weight)
import mxnet as mx from mxnet import nd import numpy as np x = nd.arange(12).reshape((3, 4)) y = nd.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) z = nd.arange(9).reshape((3, 3)) init_z = id(z) verif = nd.dot(x, y.T) + z print(x) print(y) print(z) nd.elemwise_add(z, nd.dot(x, y.T), out=z) late_z = id(z) print(verif == z) print("z didn't move?", init_z == late_z)
x #------------------------------内存开销------------------------------------ ## 内存的转换,创建了中间变量,再将Y指向新内存地址 before = id(y) y = y + x id(y) == before ## 锁定了目标地址,先创建中间地址保留结果,再复制到最终地点 z = y.zeros_like() before = id(z) z[:] = x + y id(z) == before ## 指定输出位置 nd.elemwise_add(x,y,out=z) id(z) == before ## 减少有标记的标量,从而减小空间 before = id(x) x += y id(x) == before #------------------------------ndarray与numpy互换------------------------------------ ## array() nd-->np import numpy as np p = np.ones((2,3)) d = nd.array(p) d
x * y nd.exp(y) nd.dot(x, y.T) print('id(y):', id(y)) y = y + x print('id(y):', id(y)) print('id(y):', id(y)) y[:] = x + y print('id(y):', id(y)) nd.elemwise_add(x, y, out=y) print('id(x):', id(x)) x += y x print('id(x):', id(x)) x[1:3] x[1, 2] = 9.0 x x[1:2, 1:3] x[1:2, 1:3] = 5.0 x
# 1st, there is the slice operator [:] print(id(x)) x[:] = x + y print(id(x)) # notice how the location in memory did not change.. cool! # however.. we're still creating a temporary memory buffer area for the # result of (x + y) before it's stored back in the same location as x # to get around that, we can use some of the built in functions from # mxnet.nd and specify the "out" parameter to be the variable we're looking # to update in place print(id(x)) print(x) nd.elemwise_add(lhs=x, rhs=y, out=x) print(id(x)) print(x) # using the function straight from mxnet.nd is the most efficient (from # what i've read so far...) # BROADCASTING ---------------------------------- # how does element wise work when our elements are different dimensions? x = nd.ones((3, 3)) y = nd.arange(3) print(x) print(y)
B = nd.arange(2).reshape((1, 2)) broad_add = A + B # 4. index sub_array = X[1:3] X[1, 2] = 9 X[1:2, :] = 12 # 5. memory before = id(Y) Y = Y + X id(Y) == before Z = Y.zeros_like() before = id(Z) Z[:] = X + Y id(Z) == before nd.elemwise_add(X, Y, out=Z) id(Z) == before before = id(X) X[:] = X + Y # or X += Y id(X) == before # 6. ndarray & numpy import numpy as np P = np.ones((2, 3)) D = nd.array(P) P = D.asnumpy()
print(X) X[1:2, :] = 12 print(X) # 运算前后有不同的物理地址 before = id(Y) Y = Y + X print(id(Y) == before) Z = Y.zeros_like() before = id(Z) Z[:] = Y + X print(id(Z) == before) nd.elemwise_add(X, Y, out=Z) # 不会导致申请临时内存 print(id(Z) == before) # numpy和NDarray转换 p = np.ones((2, 3)) print(p) d = nd.array(p) print(d) print(d.asnumpy()) # 自动梯度求导 x1 = nd.arange(4).reshape((4, 1)) print(x1) x1.attach_grad() # 申请存放结果的内存,置0 with autograd.record(): # 记录问题描述 y = 2 * nd.dot(x1.T, x1)
def forward(self, x): f = self.net(x) s = self.net2(x) return nd.elemwise_add(f, s)
def main(): mx.random.seed(1) x = nd.empty((3, 4)) print(x) x = nd.zeros((3, 5)) print(x) x = nd.ones((3, 4)) print(x) y = nd.random.normal(0, 1, (3, 4)) print(y) print(y.shape) print(y.size) print(x + y) print(x * y) print(nd.exp(y)) print(nd.dot(x, y.T)) print('id(y):', id(y)) y = x + y print('id(y):', id(y)) print('id(y):', id(y)) y[:] = x + y print('id(y):', id(y)) nd.elemwise_add(x, y, out=y) print('id(x):', x) x += y print('id(x):', x) print(x[1:3]) print(x[1:2, 1:3]) x[1:2, 1:3] = 5.0 print(x) x = nd.ones(shape=(3, 3)) y = nd.arange(3) print('x = ', x) print('y = ', y) print('x + y = ', x + y) y = y.reshape(shape=(3, 1)) print('y = ', y) print('x + y = ', x + y) a = x.asnumpy() print(type(a)) y = nd.array(a) print(y) z = nd.ones(shape=(3, 3), ctx=mx.gpu(0)) print(z) x_gpu = x.copyto(mx.gpu(0)) print(x_gpu + z) print(x_gpu.context) print(z.context) z = nd.ones(shape=(3, 3)) print('id(z) = ', id(z)) z2 = z.copyto(mx.gpu(0)) print('id(z) = ', id(z2)) z3 = z.as_in_context(mx.gpu(0)) print('id(z) = ', id(z3)) print(z) print(z3)
import numpy as np """"" #广播机制 a = nd.arange(12).reshape((12, 1)) b = nd.arange(6).reshape((1, 6)) c = a+b print(c) """ #索引机制 与numpy无差别 a = nd.arange(24).reshape(4, 6) print(a) print(a[0:2, :]) b = nd.arange(24).reshape(4, 6) #增加了内存开销 before = id(b) b = b + a print(id(b) == before) #减少内存开销1 before = id(b) b += a print(id(b) == before) #减少内存开销2 before = id(b) b = nd.elemwise_add(a, b, out=b) print(id(b) == before) #numpy和ndarry的互换 p = np.ones((2, 3)) d = nd.array(p) print(d) e = d.asnumpy() print(e)