def verify_l2normalization_rewrite_tile(shape, eps, mode): assert len(shape) == 4 # NCHW data_np = np.random.uniform(size=shape) x = nd.array(data_np) # org op y = nd.L2Normalization(x, eps, mode=mode) # rewrite op z = nd.broadcast_mul(x, x) if mode == "channel": axis = [1] elif mode == "instance": axis = [1, 2, 3] elif mode == "spatial": axis = [2, 3] else: assert "not valid `mode` type: %s" % mode reps = tuple( [shp if i in axis else 1 for i, shp in enumerate(list(shape))]) z = nd.sum(z, axis=axis, keepdims=True) eps_tensor = nd.array([eps]) z = nd.sqrt(z) z = nd.tile(z, reps=reps) z = nd.broadcast_div(x, z) # compare assert z.shape == y.shape zn, zp = get_norm(z) yn, yp = get_norm(y) rn = np.linalg.norm(zp - yp) print(zn, yn, rn)
def verify_l2normalization_rewrite(shape, eps, mode): assert len(shape) == 4 # NCHW data_np = np.random.uniform(size=shape) x = nd.array(data_np) # org op y = nd.L2Normalization(x, eps=eps, mode=mode) # rewrite op z = nd.broadcast_mul(x, x) if mode == "channel": axis = [1] elif mode == "instance": axis = [1, 2, 3] elif mode == "spatial": axis = [2, 3] else: assert "not valid `mode` type: %s" % mode z = nd.sum(z, axis=axis) eps_tensor = nd.array([eps]) z = nd.broadcast_add(z, eps_tensor) z = nd.sqrt(z) for i in axis: z = nd.expand_dims(z, axis=i) z = nd.repeat(z, repeats=shape[i], axis=i) z = nd.broadcast_div(x, z) print(z.shape) return # compare assert z.shape == y.shape zn, zp = get_norm(z) yn, yp = get_norm(y) rn = np.linalg.norm(zp - yp) print(zn, yn, rn)
def forward(self, x, mask): """ Parameters ---------- F x: Shape(batch_size, num_node, input_dim) mask: Shape(batch_size, num_node, num_set, 1) Returns ------- """ layer_in_l = [x] layer_out = None for i in range(self._layer_num): if len(layer_in_l) == 1: layer_in = layer_in_l[0] else: layer_in = nd.concat(*layer_in_l, dim=-1) ### TODO assume batch_size=1 x_mW = nd.reshape(self.layers[i](layer_in), shape=(0, 0, self._num_set, self._units)) layer_out = self._act(nd.sum(nd.broadcast_mul(x_mW, mask), axis=-2)) layer_in_l.append(layer_out) return layer_out
def hybrid_forward(self, F, x): x_se = self.avg_pool(x) x_se = self.conv_reduce(x_se) x_se = self.act1(x_se) x_se = self.conv_expand(x_se) x = F.broadcast_mul(self.gate_fn(x_se), x) return x
def _apply_weighting(F, loss, weight=None, sample_weight=None): if sample_weight is not None: loss = F.broadcast_mul(loss, sample_weight) if weight is not None: assert isinstance(weight, numeric_types), "weight must be a number" loss = loss * weight return loss
def forward(self, x: nd.NDArray): if not autograd.is_training() or not self.dropout_rate: return x keep_rate = 1. - self.dropout_rate m = nd.random.negative_binomial(1, keep_rate, (1, x.shape[1], x.shape[2])) mask = m / keep_rate return nd.broadcast_mul(x, mask)
def hybrid_forward(self, F, x, block_channel_mask, *args, **kwargs): block_channel_mask = F.slice(block_channel_mask, begin=(None, None), end=(None, self.channel_number)) block_channel_mask = F.reshape(block_channel_mask, shape=(1, self.channel_number, 1, 1)) x = F.broadcast_mul(x, block_channel_mask) return x
def forward(self, data, neighbor_data, neighbor_indices, neighbor_indptr, node_type_mask=None, neighbor_type_mask=None, edge_type_mask=None, seg_indices=None): """Map the input features to hidden states + apply pooling + apply FC Parameters ---------- F data : Symbol or NDArray Shape (batch_size, node_num, feat_dim) neighbor_data : Symbol or NDArray Shape (batch_size, neighbor_node_num, feat_dim) data_mask : Symbol or NDArray Shape (batch_size, node_num, num_set, 1) neighbor_mask : Symbol or NDArray Shape (batch_size, neighbor_node_num, num_set, 1) neighbor_indices : Symbol or NDArray Shape (nnz, ) neighbor_indptr : Symbol or NDArray Shape (node_num + 1, ) edge_data : Symbol or NDArray or None Shape (batch_size, nnz, num_edge_num, 1) Returns ------- """ ## TODO does not consider node type if self._num_node_set is not None: #print("data", data.shape) #print("node_type_mask", node_type_mask.shape) data = self.data_map(data, node_type_mask) neighbor_data = self.neighbor_mid_map(neighbor_data, neighbor_type_mask) if self._num_edge_set is not None: neighbor_data = self.relation_W(neighbor_data) ### (batch_size, neighbor_node_num, mid_units*num_edge_set) neighbor_data = nd.take(neighbor_data, indices=neighbor_indices, axis=-2) ## (batch_size, nnz, mid_units*num_edge_set) #print("neighbor_data", neighbor_data.shape) neighbor_data = nd.reshape(neighbor_data, shape=(0, 0, self._num_edge_set, self._mid_units)) ## (batch_size, nnz, mid_units*num_edge_set) #print("neighbor_data", neighbor_data.shape) #print("edge_data", edge_data.shape) neighbor_data = nd.reshape(nd.broadcast_mul(neighbor_data, edge_type_mask), shape=(0, 0, -1)) #print("neighbor_data", neighbor_data.shape) pool_data = nd.contrib.seg_pool(data=neighbor_data, indices=seg_indices, indptr=neighbor_indptr, pool_type=self._pool_type) # Shape(batch_size, node_num, mid_units*num_edge_set) if self._num_edge_set is not None: if self._accum_type == "stack": pool_data = self._out_act(pool_data) elif self._accum_type == "sum": pool_data = self._out_act(nd.sum(nd.reshape(pool_data, shape=(0, 0, self._num_edge_set, self._mid_units )), axis=2)) #out = self.out_layer(nd.concat(pool_data, data, dim=-1)) #out = self.out_layer(pool_data) return pool_data
def forward(self, is_train, req, in_data, out_data, aux): x = in_data[0] if is_train: self._spatial_dropout_mask = F.broadcast_greater( F.random_uniform(low=0, high=1, shape=(1, self._num_filters, 1, 1), ctx=self._ctx), F.ones(shape=(1, self._num_filters, 1, 1), ctx=self._ctx) * self._p, ctx=self._ctx ) y = F.broadcast_mul(x, self._spatial_dropout_mask, ctx=self._ctx) / (1-self._p) self.assign(out_data[0], req[0], y) else: self.assign(out_data[0], req[0], x)
def var(array,W=_W,B=None,square=0,sqrt=0,V=False,order='NCHW',sizz=0): arrs=array.shape ashp=W.shape xi=(-2,-1) x2=(-2,-1,-3) sb=(ashp[1],1,1) WV=ashp[-2:] print(sb) mnc=mnd.tile(mnd.reshape(mnd.array([WV[0]*WV[1]]), shape=(1,1,1)),ashp[1]) print(mnc) if V: print(W.eval()) print(arrs,ashp) mul=(mnd.broadcast_mul(array,W)) if V: print('Wsamp',W[-1,-1]) print('array*w',mul[0,-1]) size=mnd.sum(W,axis=xi,keepdims=True)#shape=(outputs, channel) if V: print("sizesamp",size.shape,size) if B is None: B=mnd.zeros(W.shape[0:2],dtype=np.float32)#channel B=mnd.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))])) if sizz==1: mean=mnd.sum(mul,axis=xi,keepdims=True)/size else: mean=mnd.sum(mul,axis=xi,keepdims=True)/mnc if V: print("meansamp",mean[0,-1]) if square: i=mnd.square(mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B)) else: i=mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B) di=i/size if V==2: print("i",i,"i") print("di",di,"di") if V: print('isamp',i.shape,i[-1,-1,]) out=mnd.sum(mnd.broadcast_add(i,B),axis=x2) #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1) #print(out.shape) if sqrt: out=mnd.sqrt(out) out=mnd.swapaxes(out, 3, 1) #print(out.shape,(arrs[0],ashp[0],arrs[1],arrs[2])) assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2]) return(out)
def get(self, pred, label): embedding = nd.L2Normalization(pred, mode='instance') self.acc = 0 nc = self.nc ns = self.ns nq = self.nq margin = self.margin s_embedding = embedding.slice_axis(axis=0, begin=0, end=nc * ns) q_embedding = embedding.slice_axis(axis=0, begin=nc * ns, end=None) s_cls_data = nd.reshape(s_embedding, (nc, ns, -1)) q_cls_data = nd.reshape(q_embedding, (nc, nq, -1)) s_cls_center = nd.mean(s_cls_data, axis=1) s_cls_center = nd.L2Normalization(s_cls_center, mode='instance') temp = q_embedding.expand_dims(axis=1) * s_cls_center.expand_dims( axis=0) data_center_dis = nd.sum(temp, axis=2) cur_label = nd.argmax(data_center_dis, axis=1) loss = 0 # Calculating loss for i in range(nc): temp = data_center_dis[i * nq:(i + 1) * nq, i] loss += nd.sum( nd.LeakyReLU(margin - temp, act_type='leaky', slope=0.1)) for i in range(nc): self.acc += nd.sum(cur_label[nq * i:nq * (i + 1)] == i).asscalar() self.acc /= (nc * nq) s_embedding = embedding.slice_axis(axis=0, begin=0, end=nc * ns) q_embedding = embedding.slice_axis(axis=0, begin=nc * ns, end=None) s_cls_data = nd.reshape(s_embedding, (nc, ns, -1)) q_cls_data = nd.reshape(q_embedding, (nc, nq, -1)) s_cls_center = nd.mean(s_cls_data, axis=1) s_cls_center = nd.L2Normalization(s_cls_center, mode='instance') s_center_broadcast = s_cls_center.expand_dims(axis=1) s_center_dis = nd.sum(nd.broadcast_mul(q_cls_data, s_center_broadcast), axis=2) temp = nd.LeakyReLU(margin - s_center_dis, act_type='leaky', slope=0.1) loss1 = nd.sum(temp) return (self.acc, cur_label, loss)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): dy = out_grad[0] dx = F.broadcast_mul(self._spatial_dropout_mask, dy) self.assign(in_grad[0], req[0], dx)
def ISSM(z, b, F, a, g, sigma, m_prior, S_prior): ''' The documentation for this code can be found in : https://gluon.mxnet.io/chapter12_time-series/issm-scratch.html ''' H = F.shape[0] # dim of latent state T = z.shape[0] # num of observations eye_h = nd.array(np.eye(H)) mu_seq = [] S_seq = [] log_p_seq = [] for t in range(T): if t == 0: # At the first time step, use the prior mu_h = m_prior S_hh = S_prior else: # Otherwise compute using update eqns. F_t = F[:, :, t] g_t = g[:, t].reshape((H,1)) mu_h = gemm2(F_t, mu_t) S_hh = gemm2(F_t, gemm2(S_t, F_t, transpose_b=1)) + \ gemm2(g_t, g_t, transpose_b=1) a_t = a[:, t].reshape((H,1)) mu_v = gemm2(mu_h, a_t, transpose_a=1) # Compute the Kalman gain (vector) S_hh_x_a_t = gemm2(S_hh, a_t) sigma_t = sigma[t] S_vv = gemm2(a_t, S_hh_x_a_t, transpose_a=1) + nd.square(sigma_t) kalman_gain = nd.broadcast_div(S_hh_x_a_t, S_vv) # Compute the error (delta) delta = z[t] - b[t] - mu_v # Filtered estimates mu_t = mu_h + gemm2(kalman_gain, delta) # Joseph's symmetrized update for covariance: ImKa = nd.broadcast_sub(eye_h, gemm2(kalman_gain, a_t, transpose_b=1)) S_t = gemm2(gemm2(ImKa, S_hh), ImKa, transpose_b=1) + \ nd.broadcast_mul(gemm2(kalman_gain, kalman_gain, transpose_b=1), nd.square(sigma_t)) # likelihood term log_p = (-0.5 * (delta * delta / S_vv + np.log(2.0 * np.pi) + nd.log(S_vv)) ) mu_seq.append(mu_t) S_seq.append(S_t) log_p_seq.append(log_p) return log_p_seq
def hybrid_forward(self, F, x): out = self.channel_attention(x) return F.broadcast_mul(x, out.expand_dims(axis=2).expand_dims(axis=2))
def _broadcast_like(x, w): zero = nd.zeros(shape=(1,)) one = nd.ones(shape=(1,)) nw = nd.broadcast_add(nd.broadcast_mul(w, zero), one) z = nd.broadcast_mul(x, nw) return z