def test_elementwise(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.ones(shape=(LARGE_X, SMALL_Y)) res = a + b assert np.sum(res[-1].asnumpy() == 2) == a.shape[1] res = a + 1 assert np.sum(res[-1].asnumpy() == 2) == a.shape[1] res = nd.sqrt(a + 3) assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
def test_broadcast(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1) res = nd.broadcast_to(b, shape=(b.shape[0], SMALL_Y)) assert np.sum(res[-1].asnumpy() == LARGE_X) == res.shape[1] res = mx.nd.broadcast_like(b, a) assert np.sum(res[-1].asnumpy() == LARGE_X) == a.shape[1]
def test_where(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1) b = nd.broadcast_to(b, shape=(b.shape[0], SMALL_Y)) res = nd.where(b > 100, a, b) assert np.sum(res[-1].asnumpy() == 1) == b.shape[1] csr_cond = nd.sparse.cast_storage(b < 10, 'csr') res = nd.sparse.where(csr_cond, a, b) assert np.sum(res[0].asnumpy() == 1) == b.shape[1]
def _flat_lrp(self,R): ''' distribute relevance for each output evenly to the output neurons' receptive fields. ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.ones((N,hf,wf,df,NF), ctx=self.ctx, dtype=self.dtype) Zs = Z.sum(axis=(1,2,3),keepdims=True) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) return Rx
def test_FullyConnected(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.ones(shape=(SMALL_Y, SMALL_Y)) res = nd.FullyConnected(a, b, num_hidden=b.shape[1], no_bias=True) assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
def corr2d(X, K): n, m = K.shape Y = nd.zeros((X.shape[0] - n + 1, X.shape[1] - m + 1)) for i in range(Y.shape[0]): for j in range(Y.shape[1]): Y[i, j] = (X[i:i + n, j:j + m] * K).sum() return Y X = nd.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) K = nd.array([[0, 1], [2, 3]]) #print(corr2d(X, K)) X = nd.ones((6, 8)) X[:, 2:6] = 0 print(X) plt.imshow(X.asnumpy(), cmap='gray') K = nd.array([[1, -1]]) #print(K) Y = corr2d(X, K) print(Y) class Conv2D(nn.Block): def __init__(self, kernel_size, **kwargs): super(Conv2D, self).__init__(**kwargs) self.weight = self.params.get('weight', shape=kernel_size) self.bias = self.params.get('bias', shape=(1, ))
def test_broadcast_div(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.ones(shape=(LARGE_X, 1)) * 2 res = a / b assert np.sum(res[-1].asnumpy() == 0.5) == a.shape[1]
def test_expand_dims(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) res = nd.expand_dims(a, axis=1) assert res.shape == (a.shape[0], 1, a.shape[1])
def test_slice(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) res = nd.slice(a, begin=(LARGE_X-1000, 1), end=(LARGE_X, SMALL_Y)) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
def _get_batch_axis_test_data(in_size=32): data = nd.ones((100, in_size)) label = nd.zeros((1, in_size)) data_arr = TestAxisArrayDataset(data, label) return mx.gluon.data.DataLoader(data_arr, batch_size=8)
def test_ndarray_ones(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) assert a[-1][0] == 1 assert nd.sum(a).asnumpy() == LARGE_SIZE
def test_copy(): a = nd.ones((SMALL_Y, LARGE_X)) b = a.copy() nd.waitall() assert b.shape == a.shape assert b.size == LARGE_SIZE
def test_neg(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) c = a c = c.__neg__() assert c[0][-1] == -1 assert c.shape == a.shape
def test_dot(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) b = nd.ones(shape=(SMALL_Y, SMALL_Y)) res = nd.dot(a, b) assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
def test_reduce(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
def grad_global_norm(parameters, max_norm): """Calculate the 2-norm of gradients of parameters, and how much they should be scaled down such that their 2-norm does not exceed `max_norm`. If gradients exist for more than one context for a parameter, user needs to explicitly call ``trainer.allreduce_grads`` so that the gradients are summed first before calculating the 2-norm. .. note:: This function is only for use when `update_on_kvstore` is set to False in trainer. Example:: trainer = Trainer(net.collect_params(), update_on_kvstore=False, ...) for x, y in mx.gluon.utils.split_and_load(X, [mx.gpu(0), mx.gpu(1)]): with mx.autograd.record(): y = net(x) loss = loss_fn(y, label) loss.backward() trainer.allreduce_grads() norm, ratio = grad_global_norm(net.collect_params().values(), max_norm) trainer.update(batch_size * ratio) ... Parameters ---------- parameters : list of Parameters Returns ------- NDArray Total norm. Shape is (1,) NDArray Ratio for rescaling gradients based on max_norm s.t. grad = grad / ratio. If total norm is NaN, ratio will be NaN, too. Shape is (1,) NDArray Whether the total norm is finite. Shape is (1,) """ # collect gradient arrays arrays = [] idx = 0 for p in parameters: if p.grad_req != 'null': p_grads = p.list_grad() arrays.append(p_grads[idx % len(p_grads)]) idx += 1 assert len(arrays) > 0, 'No parameter found available for gradient norm.' # compute gradient norms def _norm(array): # TODO(haibin) norm operator does not support fp16 safe reduction. # Issue is tracked at: https://github.com/apache/incubator-mxnet/issues/14126 x = array.reshape((-1, )).astype('float32', copy=False) return nd.dot(x, x) norm_arrays = [_norm(arr) for arr in arrays] # group norm arrays by ctx def group_by_ctx(arr_list): groups = collections.defaultdict(list) for arr in arr_list: ctx = arr.context groups[ctx].append(arr) return groups norm_groups = group_by_ctx(norm_arrays) # reduce ctx, dtype = arrays[0].context, 'float32' norms = [nd.add_n(*g).as_in_context(ctx) for g in norm_groups.values()] total_norm = nd.add_n(*norms).sqrt() scale = total_norm / max_norm # is_finite = 0 if NaN or Inf, 1 otherwise. is_finite = nd.contrib.isfinite(scale) # if scale is finite, nd.maximum selects the max between scale and 1. That is, # 1 is returned if total_norm does not exceed max_norm. # if scale = NaN or Inf, the result of nd.minimum is undefined. Therefore, we use # choices.take to return NaN or Inf. scale_or_one = nd.maximum(nd.ones((1, ), dtype=dtype, ctx=ctx), scale) choices = nd.concat(scale, scale_or_one, dim=0) chosen_scale = choices.take(is_finite) return total_norm, chosen_scale, is_finite
#-*- coding:utf-8 -*- import mxnet as mx from mxnet import nd from mxnet.contrib.ndarray import MultiBoxPrior##MultiBoxPrior产生预设框 n = 40 # 输入形状: batch × channel × height × weight x = nd.random_uniform(shape=(1, 3, n, n)) ## 图像 n 个预设尺寸 m 个预设的长宽比 输出为 n+m-1 个方框 y = MultiBoxPrior(x, sizes=[.5, .25, .1], ratios=[1, 2, .5]) ## 取位于 (20,20) 像素点的第一个预设框 # box的格式为 (x_min, y_min, x_max, y_max) 且为比例 boxes = y.reshape((n, n, -1, 4)) print('The first anchor box at row 21, column 21:', boxes[20, 20, 0, :]) import matplotlib.pyplot as plt #"""convert an anchor box to a matplotlib rectangle""" def box_to_rect(box, color, linewidth=3): box = box.asnumpy() return plt.Rectangle( (box[0], box[1]), (box[2]-box[0]), (box[3]-box[1]), fill=False, edgecolor=color, linewidth=linewidth) colors = ['blue', 'green', 'red', 'black', 'magenta']# 3+3-1=5个 plt.imshow(nd.ones((n, n, 3)).asnumpy()) anchors = boxes[20, 20, :, :] for i in range(anchors.shape[0]): plt.gca().add_patch(box_to_rect(anchors[i,:]*n, colors[i])) plt.show()
def _get_test_data(in_size=32): data = nd.ones((in_size, 100)) label = nd.zeros((in_size, 1)) data_arr = mx.gluon.data.dataset.ArrayDataset(data, label) return mx.gluon.data.DataLoader(data_arr, batch_size=8)
def main(): mx.random.seed(1) x = nd.empty((3, 4)) print(x) x = nd.zeros((3, 5)) print(x) x = nd.ones((3, 4)) print(x) y = nd.random.normal(0, 1, (3, 4)) print(y) print(y.shape) print(y.size) print(x + y) print(x * y) print(nd.exp(y)) print(nd.dot(x, y.T)) print('id(y):', id(y)) y = x + y print('id(y):', id(y)) print('id(y):', id(y)) y[:] = x + y print('id(y):', id(y)) nd.elemwise_add(x, y, out=y) print('id(x):', x) x += y print('id(x):', x) print(x[1:3]) print(x[1:2, 1:3]) x[1:2, 1:3] = 5.0 print(x) x = nd.ones(shape=(3, 3)) y = nd.arange(3) print('x = ', x) print('y = ', y) print('x + y = ', x + y) y = y.reshape(shape=(3, 1)) print('y = ', y) print('x + y = ', x + y) a = x.asnumpy() print(type(a)) y = nd.array(a) print(y) z = nd.ones(shape=(3, 3), ctx=mx.gpu(0)) print(z) x_gpu = x.copyto(mx.gpu(0)) print(x_gpu + z) print(x_gpu.context) print(z.context) z = nd.ones(shape=(3, 3)) print('id(z) = ', id(z)) z2 = z.copyto(mx.gpu(0)) print('id(z) = ', id(z2)) z3 = z.as_in_context(mx.gpu(0)) print('id(z) = ', id(z3)) print(z) print(z3)
g = dgl.heterograph(graph_data) # print(g.ntypes, g.etypes,g.canonical_etypes) print(g.nodes('disease')) print(g) # 齐次图 g1 = dgl.heterograph({ ('node_type', 'edge_type', 'node_type'): ([1, 2], [3, 4]) }) print(g1) # 二部图 g2 = dgl.heterograph({ ('source_type', 'edge_type', 'destination_type'): ([1, 2], [3, 4]) }) print(g2) print(g.metagraph().edges()) g.nodes['drug'].data['hv'] = nd.ones((3, 1)) print(g.nodes['drug'].data['hv']) g.edges['treats'].data['he'] = nd.zeros((2, 1)) print(g.edges['treats'].data['he']) g3 = dgl.heterograph({ ('drug', 'interacts', 'drug'): ([0, 1], [1, 2]), ('drug', 'is similar', 'drug'): ([0, 1], [1, 3]) }) g3.ndata['hv'] = nd.ones((4, 1)) print(g) # 边类型子图 eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ('drug', 'treats', 'disease')]) print(eg, eg.nodes['drug'].data['hv'])
from mxnet import nd x = nd.arange(12) #[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11.] # <NDArray 12 @cpu(0)> x.shape #(12,) x.size # 12 X = x.reshape((3, 4)) # [[ 0. 1. 2. 3.] # [ 4. 5. 6. 7.] # [ 8. 9. 10. 11.]] # <NDArray 3x4 @cpu(0)> nd.zeros((2, 3, 4)) # 各元素为0,形状为(2, 3, 4)的张量 nd.ones((3, 4)) #各元素为1的张量 Y = nd.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) #通过Python的列表(list)指定需要创建的NDArray中每个元素的值 nd.random.normal(0, 1, shape=(3, 4)) #每个元素都随机采样于均值为0、标准差为1的正态分布 X + Y X * Y X / Y Y.exp() nd.dot(X, Y.T) nd.concat(X, Y, dim=0), nd.concat( X, Y, dim=1) #将多个NDArray连结(concatenate)。下⾯分别在⾏上(维度0,即形状中的最左边元素) #和列上(维度1,即形状中左起第⼆个元素)连结两个矩阵 X == Y X.sum() #对NDArray中的所有元素求和得到只有⼀个元素的NDArray。非标量注意 X.norm().asscalar() # 通过asscalar函数将结果变换为Python中的标量 #我们也可以把Y.exp()、X.sum()、X.norm()等分别改写为nd.exp(Y)、nd.sum(X)、nd.norm(X)等。 #可以通过array函数和asnumpy函数令数据在NDArray和NumPy格式之间相互变换
def test_take(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) idx = nd.arange(LARGE_X-1000, LARGE_X) res = nd.take(a, idx) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
def test_take(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) idx = nd.arange(LARGE_X - 1000, LARGE_X) res = nd.take(a, idx) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
def test_slice_assign(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) a[LARGE_X-1:LARGE_X] = 1000 assert np.sum(a[-1].asnumpy() == 1000) == a.shape[1]
def test_squeeze(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) data = nd.expand_dims(a, axis=1) res = nd.squeeze(data) assert res.shape == a.shape
def test_slice(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) res = nd.slice(a, begin=(LARGE_X - 1000, 1), end=(LARGE_X, SMALL_Y)) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
from mxnet import nd x = nd.arange(12) print(x) print(x.shape) print(x.size) X = x.reshape((3, 4)) print(X) print(nd.zeros((2, 3, 4))) print(nd.ones((2, 3, 4))) Y = nd.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) print(Y) print(nd.random.normal(0, 1, shape=(3, 4))) print(X + Y) print(X / Y) print(X * Y) print(X.exp()) print(nd.dot(X, Y.T)) print(nd.concat(X, Y, dim=0), nd.concat(X, Y, dim=1)) print(X == Y) print(X.sum()) print(X.sum().asscalar()) A = nd.arange(3).reshape((3, 1)) B = nd.arange(2).reshape((1, 2))
def test_slice_assign(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) a[LARGE_X - 1:LARGE_X] = 1000 assert np.sum(a[-1].asnumpy() == 1000) == a.shape[1]
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ is_train = autograd.is_training() def flatten_numpy(ndarray): """Flatten nd-array to 1-d column vector Parameters ---------- ndarray : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(ndarray, (-1,), 'F') batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head) dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape( seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: loss = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: return arc_accuracy, rel_accuracy, overall_accuracy, loss outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs
# define loss function loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() # initialization g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) g_trainer = gluon.Trainer( g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) d_trainer = gluon.Trainer( d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric metric = mx.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) for epoch in range(NUM_EPOCHS): for i, (d, _) in enumerate(train_data): # update D data = d.as_in_context(CTX) noise = nd.normal(loc=0, scale=1, shape=( BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX) with autograd.record(): # train with real image output = d_net(data).reshape((-1, 1)) errD_real = loss(output, real_label) metric.update([real_label, ], [output, ]) # train with fake image
import d2lzh as d2l from mxnet import autograd, gluon, init, nd from mxnet.gluon import data as gdata, loss as gloss, nn """ 高维线性回归实验 """ n_train, n_test, num_inputs = 20, 100, 200 true_w, true_b = nd.ones((num_inputs, 1)) * 0.01, 0.05 features = nd.random.normal(shape=(n_train + n_test, num_inputs)) labels = nd.dot(features, true_w) + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) train_features, test_features = features[:n_train, :], features[n_train:, :] train_labels, test_labels = labels[:n_train], labels[n_train:] """ 初始化模型参数 """ def init_params(): w = nd.random.normal(scale=1, shape=(num_inputs, 1)) b = nd.zeros(shape=(1, )) w.attach_grad() b.attach_grad() return [w, b] """ 定义L2范数惩罚项 """