def update(self, lrate): N, Hx, Wx, Dx = self.X.shape N, Hy, Wy, NF = self.DY.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride DW = nd.zeros_like(self.W, ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1, 1)): for i in range(Hy): for j in range(Wy): DW += ( nd.expand_dims(self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :], axis=4) * nd.expand_dims(self.DY[:, i:i + 1, j:j + 1, :], axis=3)).sum(axis=0) else: for i in range(hf): for j in range(wf): DW[i, j, :, :] = nd.sum(nd.expand_dims( self.X[:, i:i + Hy:hstride, j:j + Wy:wstride, :], axis=4) * nd.expand_dims(self.DY, axis=3), axis=(0, 1, 2)) DB = self.DY.sum(axis=(0, 1, 2)) self.W -= lrate * DW / (hf * wf * df * Hy * Wy)**.5 self.B -= lrate * DB / (Hy * Wy)**.5
def Route(self, x): # b_mat = nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)#nd.stop_gradient(nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)) b_mat = nd.zeros((x.shape[0], 1, self.num_cap, self.num_locations), ctx=x.context) x_expand = nd.repeat(nd.expand_dims(x, 2), repeats=self.num_cap, axis=2) x_expand = nd.repeat(nd.expand_dims(x_expand, axis=2), repeats=self.units, axis=2) w_expand = nd.expand_dims(self.w_ij.data(), axis=0) u_ = w_expand * x_expand u = nd.sum(u_, axis=1) u_no_gradient = nd.stop_gradient(u) for i in range(self.route_num): c_mat = nd.softmax(b_mat, axis=2) if i == self.route_num - 1: s = nd.sum(u * c_mat, axis=-1) else: s = nd.sum(u_no_gradient * c_mat, axis=-1) v = squash(s, 1) v1 = nd.expand_dims(v, axis=-1) if i != self.route_num - 1: update_term = nd.sum(u_no_gradient * v1, axis=1, keepdims=True) b_mat = b_mat + update_term return v
def _epsilon_lrp(self, R, epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N, Hout, Wout, NF = R.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X, ctx=self.ctx, dtype=self.dtype) R_norm = R / (self.Y + epsilon * ((self.Y >= 0) * 2 - 1.)) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:, i, j, ...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims( self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :], axis=4) Rx[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride + wf:, :] += (Z * (nd.expand_dims( R_norm[:, i:i + 1, j:j + 1, :], axis=3))).sum(axis=4) return Rx
def update_alphas(data, alphas): """Calculate the batch update alpha for each time step Args: data (NDArray): NDArray shape: (seq_len, batch_size, self.tagset_size) alphas (NDArray): NDArray shape: (batch_size, self.tagset_size) """ # alphas_t shape: (self.tagset_size, batch_size, self.tagset_size) alphas_t = nd.broadcast_axis(nd.expand_dims(alphas, axis=0), axis=0, size=self.tagset_size) # emit_score shape: (self.tagset_size, batch_size, 1) emit_score = nd.transpose(nd.expand_dims(data, axis=0), axes=(2, 1, 0)) # trans_score shape: (self.tagset_size, 1, self.tagset_size) trans_score = nd.expand_dims(self.transitions.data(), axis=1) # next_tag_var shape: (self.tagset_size, batch_size, self.tagset_size) next_tag_var = alphas_t + emit_score + trans_score # alphas shape: (self.tagset_size, batch_size) alphas = log_sum_exp(next_tag_var) # alphas shape: (batch_size, self.tagset_size) alphas = nd.transpose(alphas, axes=(1, 0)) return data, alphas
def query(self, image_text_pairs): if self.pool_size == 0: return image_text_pairs ret_images = [] ret_text_feats = [] images, text_feats = image_text_pairs for i in range(images.shape[0]): image = nd.expand_dims(images[i], axis=0) text_feat = nd.expand_dims(text_feats[i], axis=0) if self.num_imgs < self.pool_size: self.num_imgs = self.num_imgs + 1 self.images.append(image) self.text_feats.append(text_feat) ret_images.append(image) ret_text_feats.append(text_feat) else: p = nd.random_normal(0, 1, shape=(1, )).asscalar() if p < 0.5: random_index = nd.random_uniform(0, self.pool_size-1, shape=(1, )).astype(np.uint8).asscalar() tmp_img = self.images[random_index].copy() tmp_text_feat = self.text_feats[random_index].copy() self.images[random_index] = image self.text_feats[random_index] = text_feat ret_images.append(tmp_img) ret_text_feats.append(tmp_text_feat) else: ret_images.append(image) ret_text_feats.append(text_feat) ret_images = nd.concat(*ret_images, dim=0) ret_text_feats = nd.concat(*ret_text_feats, dim=0) return [ret_images, ret_text_feats]
def forward(self, query, values, head=False): """ 计算Attention权重与输出向量 :param query: 查询,即当前步Decoder的输入 :param values: 值,即Encoder中每一个时间步向量 :return: (Attention输出向量, Attention权重) """ #print('In Attention') hidden_with_time_axis = nd.expand_dims(query, 1) #print('hidden_with_time:', hidden_with_time_axis.shape) score = self.V( nd.tanh(self.W1(values) + self.W2(hidden_with_time_axis))) #print('\t score:',score.shape) attention_weights = nd.softmax(score, axis=1) #print('\t attention_weight:', attention_weights.shape) #print('\t values:', values.shape) context_vector = attention_weights * values #print('\t mid_context_vector:',context_vector.shape) if head is True: context_vector = nd.sum(context_vector, axis=2) else: context_vector = nd.sum(context_vector, axis=1) # print('\t context',context_vector.shape) context_vector = nd.expand_dims(context_vector, axis=0) return context_vector, attention_weights
def train(self, true_image, latent_z, generator): data_fake = gluon.utils.split_and_load(latent_z, self.ctx) data_true = gluon.utils.split_and_load(true_image, self.ctx) result = [generator.generator(X) for X in data_fake] with autograd.record(): disc_fake = [self.discriminator(X) for X in result] disc_real = [self.discriminator(X) for X in data_true] d_loss_fake = [ (X - nd.repeat(nd.expand_dims(nd.mean(Y, axis=0), axis=0), repeats=Y.shape[0], axis=0) - 1) ** 2 for X, Y in zip(disc_real, disc_fake)] d_loss_real = [ (Y - nd.repeat(nd.expand_dims(nd.mean(X, axis=0), axis=0), repeats=X.shape[0], axis=0) + 1) ** 2 for X, Y in zip(disc_real, disc_fake)] # d_loss_fake = [X ** 2 for X in disc_fake] # d_loss_real = [(X - 1) ** 2 for X in disc_real] d_loss_total = [nd.mean(X + Y) * 0.5 for X, Y in zip(d_loss_real, d_loss_fake)] for l in d_loss_total: l.backward() self.trainer.step(latent_z[0].shape[0]) curr_dloss = nd.mean(sum(d_loss_total) / len(self.ctx)).asscalar() return curr_dloss
def Route(self, x): # print x.context # b_mat = nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)#nd.stop_gradient(nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)) b_mat = nd.zeros((x.shape[0], 1, self.num_cap, self.num_locations), ctx=x.context) x_expand = nd.expand_dims(nd.expand_dims(x, axis=2), 2) w_expand = nd.repeat(nd.expand_dims(self.w_ij.data(x.context), axis=0), repeats=x.shape[0], axis=0) u_ = w_expand * x_expand u = nd.sum(u_, axis=1) # u_ = nd.square(w_expand - x_expand) # u = -nd.sum(u_, axis = 1) u_no_gradient = nd.stop_gradient(u) for i in range(self.route_num): # c_mat = nd.softmax(b_mat, axis=2) c_mat = nd.sigmoid(b_mat) if i == self.route_num - 1: s = nd.sum(u * c_mat, axis=-1) else: s = nd.sum(u_no_gradient * c_mat, axis=-1) v = squash(s, 1) if i != self.route_num - 1: v1 = nd.expand_dims(v, axis=-1) update_term = nd.sum(u_no_gradient * v1, axis=1, keepdims=True) b_mat = b_mat + update_term # b_mat = update_term # else: # v = s return v
def forward(self, current, previous, doc_encode): """[summary] Args: current ([type]): h_j (batch_size, sentence_hidden_size * 2) previous ([type]): s_j (batch_size, sentence_hidden_size * 2) doc_encode ([type]): d (batch_size, ndoc_dims) """ # content: (batch_size, 1) content = self.content_encoder(current) # salience: (batch_size, sentence_hidden_size * 2) salience = self.salience_encoder(doc_encode) salience = current * salience # salience: (batch_size,) salience = nd.sum_axis(salience, -1) # salience: (batch_size, 1) salience = nd.expand_dims(salience, -1) # novelty: (bathc_size, sentence_hidden_size * 2) novelty = self.novelty_encoder(nd.tanh(previous)) novelty = current * novelty # salience: (batch_size,) novelty = nd.sum_axis(novelty, -1) # salience: (batch_size, 1) novelty = nd.expand_dims(novelty, -1) # P: (batch_size, 1) P = nd.sigmoid(content + salience - novelty) return P
def _clip_px_gradients(self, batch_grads, px_clipping_factors): # hacky workaround for not knowing how to multiply a (b,) shape array with a (b, x) or (b, x, y) shape array expanded_batch_clipping_factors = nd.expand_dims( px_clipping_factors, 1) if len(batch_grads.shape) == 3: expanded_batch_clipping_factors = nd.expand_dims( expanded_batch_clipping_factors, 1) return nd.multiply(batch_grads, expanded_batch_clipping_factors)
def predict_transform(prediction, input_dim, anchors): ''' 功能: 输入: prediction:经过神经网络,上下采样的数量总和x3的x,y,w,h,pc,c1,c2的原始值[batchnumber,13x13x3+26x26x3+52x52x3,7] input_dim:416 anchors:九个锚框尺寸对 输出: prediction:所有锚框实际值[batchnumber,13x13x3+26x26x3+52x52x3,7] ''' ctx = prediction.context b_xywhs = prediction.copy() if not isinstance(anchors, nd.NDArray): anchors = nd.array(anchors, ctx=ctx) #print('sum(prediction[:,4]==1):{}'.format(nd.sum(prediction[:,:,4]==1))) batch_size = prediction.shape[0] anchors_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] strides = [13, 26, 52] step = [(0, 507), (507, 2535), (2535, 10647)] for i in range(3): stride = strides[i] grid = np.arange(stride) a, b = np.meshgrid(grid, grid) x_offset = nd.array(a.reshape((-1, 1)), ctx=ctx) y_offset = nd.array(b.reshape((-1, 1)), ctx=ctx) x_y_offset = \ nd.repeat( nd.expand_dims( nd.repeat( nd.concat( x_offset, y_offset, dim=1), repeats=3, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) tmp_anchors = \ nd.repeat( nd.expand_dims( nd.repeat( nd.expand_dims( anchors[anchors_masks[i]], 0 ), repeats=stride * stride, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) prediction[:, step[i][0]:step[i][1], :2] += x_y_offset prediction[:, step[i][0]:step[i][1], :2] *= (float(input_dim) * 1.0 / stride) prediction[:, step[i][0]:step[i][1], 2:4] = \ nd.exp(prediction[:, step[i][0]:step[i][1], 2:4]) * tmp_anchors #print('model predict_transform sum(prediction[:,4]==1):{}'.format(nd.sum(prediction[:,:,4]==1))) return prediction
def make_values_L(range_min, range_max, L, batch_size): logs_L = np.linspace(0, np.log(range_max * 1.0 / range_min), num=L / 2) values_L = nd.array(1.0 / range_min * np.exp(-logs_L)) values_L = nd.expand_dims(nd.expand_dims(values_L, axis=0), axis=2) return nd.broadcast_axis(values_L, axis=0, size=batch_size)
def log_prob(self, x: nd.NDArray) -> nd.NDArray: mean = self.get_param_maybe_repeated('mean') variance = self.get_param_maybe_repeated('variance') if x.ndim > mean.ndim: mean = nd.expand_dims(mean, 0) variance = nd.expand_dims(variance, 0) diff = x - mean self._saved_for_backward = [diff] return (-0.5 * nd.log(2. * np.pi * variance) - nd.square(diff) / 2. / variance)
def generate_transpose_conv_kernel(channels): c = channels if c % 2 != 0: raise ValueError('Channel number should be even.') idx = np.zeros(c) idx[np.arange(0, c, 2)] = np.arange(c / 2) idx[np.arange(1, c, 2)] = np.arange(c / 2, c, 1) weights = np.zeros((c, c)) weights[np.arange(c), idx.astype(int)] = 1.0 return nd.expand_dims(nd.expand_dims(nd.array(weights), axis=2), axis=3)
def _score_sentence(self, feats, tags): point_score = nd.sum(nd.sum(feats * tags, 2), 1, keepdims=True) # 逐标签得分 if (feats.shape[1] == 1): # 如果sequence_length==1,没有转移概率 return point_score labels1 = nd.expand_dims(tags[:, :-1], 3) labels2 = nd.expand_dims(tags[:, 1:], 2) labels = labels1 * labels2 # 两个错位labels,负责从转移矩阵中抽取目标转移得分 trans = nd.expand_dims(nd.expand_dims(self.transitions.data(), 0), 0) trans_score = nd.sum(nd.sum(trans * labels, [2, 3]), 1, keepdims=True) return point_score + trans_score # 两部分得分之和
def _forward_alg(self, feats, sequence_length): state = feats[:, 0] # 初始状态 output = state # 如果sequence_length==1,output = state for i in range(1, sequence_length): state = nd.expand_dims(state, 2) # (batch_size, tagset_size, 1) trans = nd.expand_dims(self.transitions.data(), 0) # (1, tagset_size, tagset_size) output = log_sum_exp(state + trans, 1) output = output + feats[:, i] state = output return output
def _get_position_encoding(length, min_timescale=1.0, max_timescale=1.0e4): position = nd.arange(length, ctx=ghp.ctx) num_timescales = ghp.model_dim // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (float(num_timescales) - 1)) inv_timescales = min_timescale * nd.exp( nd.arange(num_timescales, ctx=ghp.ctx) * -log_timescale_increment) scaled_time = nd.expand_dims(position, 1) * nd.expand_dims( inv_timescales, 0) signal = nd.concat(nd.sin(scaled_time), nd.cos(scaled_time), dim=1) return signal
def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): """Train an Gluon RNN model and predict the next item in the sequence.""" loss = gloss.SoftmaxCrossEntropyLoss() loss = gloss.CTCLoss(layout='NTC', label_layout='NT') model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0, 'wd': 0}) for epoch in range(num_epochs): l_sum, n, start = 0.0, 0, time.time() data_iter_fn = data_iter_random data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) # data_iter = data_iter_consecutive( # corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size, ctx=ctx) model.hybridize() for X, Y in data_iter: for s in state: s.detach() with autograd.record(): # X = nd.one_hot(X.T, vocab_size) #print(type(X)) (output, state) = model(X,state) y = Y.T.reshape((-1,)) #l = loss(output, y) # y = nd.one_hot(y,60) #model.forward(X,state) output = nd.expand_dims(output,axis=1) y = nd.expand_dims(y, axis=1) #print(output.shape, y.shape) l = loss(output, y).mean() # if(epoch == 0 ): # sw.add_graph(model) l.backward() params = [p.data() for p in model.collect_params().values()] grad_clipping(params, clipping_theta, ctx) trainer.step(1) l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % ( epoch + 1, math.exp(l_sum / n), time.time() - start)) for prefix in prefixes: print(' -', predict_rnn_gluon( prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx)) #model.save_params("model_lstm.params") model.export("gluon")
def forward(self,X,lrp_aware=False): ''' Realizes the forward pass of an input through the convolution layer. Parameters ---------- X : mxnet.ndarray.ndarray.NDArray a network input, shaped (N,H,W,D), with N = batch size H, W, D = input size in heigth, width, depth lrp_aware : bool controls whether the forward pass is to be computed with awareness for multiple following LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP calls will follow for the current X, e.g. wit different parameter settings or for multiple target classes. Returns ------- Y : mxnet.ndarray.ndarray.NDArray the layer outputs. ''' self.lrp_aware = lrp_aware self.X = X N,H,W,D = X.shape hf, wf, df, nf = self.W.shape hstride, wstride = self.stride numfilters = self.n #assume the given pooling and stride parameters are carefully chosen. Hout = (H - hf) // hstride + 1 Wout = (W - wf) // wstride + 1 #initialize pooled output self.Y = nd.zeros((N,Hout,Wout,numfilters), ctx=self.ctx, dtype=self.dtype) if self.lrp_aware: self.Z = nd.zeros((N, Hout, Wout, hf, wf, df, nf), ctx=self.ctx, dtype=self.dtype) #initialize container for precomputed forward messages for i in range(Hout): for j in range(Wout): self.Z[:,i,j,...] = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) # N, hf, wf, df, nf self.Y[:,i,j,:] = self.Z[:,i,j,...].sum(axis=(1,2,3)) + self.B else: for i in range(Hout): for j in range(Wout): self.Y[:,i,j,:] = nd.sum( nd.expand_dims( X[:, i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ].transpose((1,2,3,0)), 4) * nd.expand_dims(self.W, 3), axis=(0,1,2)) + self.B return self.Y
def __get_one_x_by_date(self, date): x_text = self.data_text_dict[date] print(u'text lenght : {}'.format(len(x_text))) x_digital_series = self.data_digital.loc[date] x_digital = x_digital_series.tolist() x_digital = nd.array(x_digital[:-1]) y = x_digital_series['y_value'] x_text, y = preprocess_imdb(x_text, y) # 扩维 x_text = nd.expand_dims(x_text, axis = 0) x_digital = nd.expand_dims(x_digital, axis = 0) y = nd.expand_dims(y, axis = 0) return x_text, x_digital, y
def __init__(self, d_model, dropout, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(dropout) # Compute the positional encodings once in log space. pe = nd.zeros((max_len, d_model), ctx=cfg.ctx) position = nd.expand_dims(nd.arange(0, max_len), 1) div_term = nd.exp( nd.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) pe[:, 0::2] = nd.sin(position * div_term) pe[:, 1::2] = nd.cos(position * div_term) pe = nd.expand_dims(pe, 0) self.pe = pe #register_buffer('pe', pe)
def _cross_element_wise_mp(p, h): plen = p.shape[1] plen = h.shape[1] # order is important p_expand = nd.tile( nd.expand_dims(p, 2), [1, 1, plen, 1]) # (batch_size, seq_len, seq_len, embed_dim) h_expand = nd.tile(nd.expand_dims(p, 1), [1, hlen, 1, 1]) # (32, 40, 40, 300) out = p_expand * h_expand if interact_dropout != 1: out = nn.Dropout(keep_rate)(out) return out
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None): xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1) bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4)) dx, dy, dw, dh = nd.split(data=bbox_delta_reshape, num_outputs=4, axis=2, squeeze_axis=1) if (means is not None) and (stds is not None): dx = dx * stds[0] + means[0] dy = dy * stds[1] + means[1] dw = dw * stds[2] + means[2] dh = dh * stds[3] + means[3] refine_center_x = nd.broadcast_add(lhs=center_x, rhs=nd.broadcast_mul(lhs=bbox_width, rhs=dx)) refine_center_y = nd.broadcast_add(lhs=center_y, rhs=nd.broadcast_mul(lhs=bbox_height, rhs=dy)) refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw)) refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh)) w_offset = 0.5 * (refined_width - 1.) h_offset = 0.5 * (refined_height - 1.) refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1) refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1) refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1) refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1) refined_bbox = nd.concat(refined_xmin, refined_ymin, refined_xmax, refined_ymax, dim=1) if im_info is not None: # assume im_info [[height, width, scale]] with shape (1,3) im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2) im_wh = nd.reverse(im_hw, axis=1) im_wh = im_wh - 1. im_wh = nd.tile(data=im_wh, reps=(1, 2)) im_wh = nd.Reshape(im_wh, shape=(1, 4, 1)) refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh) refined_bbox = nd.broadcast_maximum(lhs=refined_bbox, rhs=nd.zeros_like(refined_bbox)) # print refined_bbox.debug_str() return refined_bbox
def Route(self, x): b_mat = nd.zeros((x.shape[0],1,self.num_cap, self.num_locations), ctx=x.context) x_expand = nd.expand_dims(nd.expand_dims(x, axis=2),2) w_expand = nd.repeat(nd.expand_dims(self.w_ij.data(x.context),axis=0), repeats=x.shape[0], axis=0) u_ = w_expand*x_expand u = nd.sum(u_, axis = 1) for i in range(self.route_num): c_mat = nd.softmax(b_mat, axis=2) s = nd.sum(u * c_mat, axis=-1) v = squash(s, 1) v1 = nd.expand_dims(v, axis=-1) update_term = nd.sum(u * v1, axis=1, keepdims=True) b_mat = b_mat + update_term return v
def make_dynamic_dec(T, values_L): values_T = nd.array(np.linspace(1, T, num=T), ctx=values_L.context) values_T = nd.expand_dims(nd.expand_dims(values_T, axis=0), axis=2) values_T = nd.broadcast_axis(values_T, axis=0, size=values_L.shape[0]) values_TL = nd.batch_dot(values_T, values_L, transpose_b=True) values_sin = nd.sin(values_TL) values_cos = nd.cos(values_TL) return nd.concat(values_sin, values_cos, dim=2)
def data_iter(batch_size, dir_name, im_dir, gt_dir, ctx): dir_file = open(dir_name) lines = dir_file.readlines() indexs = list(range(len(lines))) random.shuffle(indexs) for i in range(0, len(indexs), batch_size): samples = np.array(indexs[i: min(i + batch_size, len(indexs))]) if batch_size > 1: xs, ys = [], [] for index in samples: # 获取路径 file_name = lines[index] im_name, gt_name = file_name.split(' ') gt_name = gt_name.split('\n')[0] # 训练数据(图片) batch_xs = mx.image.imread(im_dir + im_name).astype('float32') batch_xs = batch_xs.transpose((2, 0, 1)) # 训练数据 标签(密度图) batch_ys = nd.array(np.load(gt_dir + gt_name)).astype('float32') batch_ys = batch_ys.reshape([-1, batch_ys.shape[0], batch_ys.shape[1]]) xs.append(batch_xs) ys.append(batch_ys) nd_xs = nd.stack(xs[0], xs[1]) for j in range(2, len(xs)): nd_xs = nd.concat(nd_xs, nd.expand_dims(xs[j], 0), dim=0) nd_ys = nd.stack(ys[0], ys[1]) for j in range(2, len(ys)): nd_ys = nd.concat(nd_ys, nd.expand_dims(ys[j], 0), dim=0) # print(nd_xs.shape, nd_ys.shape) yield nd_xs, nd_ys else: file_name = lines[samples[0]] im_name, gt_name = file_name.split(' ') gt_name = gt_name.split('\n')[0] # 训练数据(图片) batch_xs = mx.image.imread(im_dir + im_name).astype('float32') batch_xs = nd.expand_dims(batch_xs.transpose((2, 0, 1)), 0) # 训练数据 标签(密度图) batch_ys = nd.array(np.load(gt_dir + gt_name)).astype('float32') batch_ys = nd.expand_dims(batch_ys.reshape([-1, batch_ys.shape[0], batch_ys.shape[1]]), 0) yield batch_xs, batch_ys
def predict_transform(prediction, input_dim, anchors): ctx = prediction.context if not isinstance(anchors, nd.NDArray): anchors = nd.array(anchors, ctx=ctx) batch_size = prediction.shape[0] anchors_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] strides = [13, 26, 52] step = [(0, 507), (507, 2535), (2535, 10647)] for i in range(3): #pdb.set_trace() stride = strides[i] grid = np.arange(stride) a, b = np.meshgrid(grid, grid) x_offset = nd.array(a.reshape((-1, 1)), ctx=ctx) y_offset = nd.array(b.reshape((-1, 1)), ctx=ctx) x_y_offset = \ nd.repeat( nd.expand_dims( nd.repeat( nd.concat( x_offset, y_offset, dim=1), repeats=3, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) tmp_anchors = \ nd.repeat( nd.expand_dims( nd.repeat( nd.expand_dims( anchors[anchors_masks[i]], 0 ), repeats=stride * stride, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) prediction[:, step[i][0]:step[i][1], :2] += x_y_offset prediction[:, step[i][0]:step[i][1], :2] *= (float(input_dim) / stride) prediction[:, step[i][0]:step[i][1], 2:4] = \ nd.exp(prediction[:, step[i][0]:step[i][1], 2:4]) * tmp_anchors return prediction
def batch_loss(encoder, sent_rnn, X, Y, vocab, loss, ctx): batch_size = X.shape[1] sentence_hidden, doc_encode = encoder(X) sentence_hidden = nd.transpose(sentence_hidden, axes=(1, 0, 2)) # 我们将使用掩码变量mask来忽略掉标签为填充项PAD的损失 # mask, num_not_pad_tokens = nd.ones(shape=(batch_size,), ctx=ctx), 0 l = nd.array([0], ctx=ctx) # 以前所有步 previous = sentence_hidden[0] # sent_hidden: (batch_size, hidden) for sent_hidden, y in zip(sentence_hidden, Y.T): y_h = sent_rnn(sent_hidden, previous, doc_encode) y_h = nd.squeeze(y_h) los = loss(y_h, y).sum() # print('los', los) l = l + los # 公式 7,这里使用强制教学 y = nd.expand_dims(y, -1) previous = previous + sent_hidden * y return l / batch_size
def getSelfMask(q_seq): batch_size, seq_len = q_seq.shape mask_matrix = np.ones(shape=(seq_len, seq_len), dtype=np.float) mask = np.tril(mask_matrix, k=0) mask = nd.expand_dims(nd.array(mask, ctx=ghp.ctx), axis=0) mask = nd.broadcast_axes(mask, axis=0, size=batch_size) return mask
def decode(self, targets, encoder_outputs, attention_bias): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = nd.expand_dims(decoder_inputs, axis=0) decoder_inputs = nd.pad(data=decoder_inputs, mode="constant", constant_value=0, pad_width=(0, 0, 0, 0, 1, 0, 0, 0)) decoder_inputs = nd.reshape(data=decoder_inputs, shape=decoder_inputs.shape[1:])[:, :-1, :] length = decoder_inputs.shape[1] decoder_inputs = decoder_inputs + model_utils.get_position_encoding( length, self.param.hidden_size, targets.context) if self.train: decoder_inputs = self.dropout_output(decoder_inputs) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length, targets.context) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias) logits = self.embedding_softmax_layer.linear(outputs) return logits
def log_prob(self, x: nd.NDArray) -> nd.NDArray: mean = self.get_param_maybe_repeated('mean') if x.ndim > mean.ndim: mean = nd.expand_dims(mean, 0) np_x = x.asnumpy().astype(np.int32).astype(np.float32) np.testing.assert_almost_equal(x.asnumpy(), np_x) return x * nd.log(mean) - mean - nd.gammaln(x + 1.)
def _prepare_data_pattern(self): if self._in is None: raise RuntimeError('Block has not yet executed forward_logged!') for x,y in zip(self._in[0], self._out): # -> patch_size x number_of_patches -> transposed x = im2col_indices(nd.expand_dims(x, 0), self._kwargs['kernel'][0], self._kwargs['kernel'][1], self._kwargs['pad'][0], self._kwargs['stride'][0]).T # -> outsize x number_of_patches -> transposed y = y.flatten().T yield x, y
def _epsilon_lrp_slow(self,R,epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 This function shows all necessary operations to perform LRP in one place and is therefore not optimized ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], 4) Zs = Z.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B, 0), 0), 0), 0) Zs += epsilon*((Zs >= 0)*2-1) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) return Rx
def _epsilon_lrp(self,R,epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) R_norm = R / (self.Y + epsilon*((self.Y >= 0)*2 - 1.)) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:,i,j,...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , : ], axis=4) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Z * ( nd.expand_dims(R_norm[:,i:i+1,j:j+1,:], axis=3) )).sum(axis=4) return Rx
def update(self,lrate): N,Hx,Wx,Dx = self.X.shape N,Hy,Wy,NF = self.DY.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride DW = nd.zeros_like(self.W,ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1,1)): for i in range(Hy): for j in range(Wy): DW += ( nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) * nd.expand_dims(self.DY[:,i:i+1,j:j+1,:], axis=3)).sum(axis=0) else: for i in range(hf): for j in range(wf): DW[i,j,:,:] = nd.sum( nd.expand_dims(self.X[:,i:i+Hy:hstride,j:j+Wy:wstride,:], axis=4) * nd.expand_dims(self.DY, axis=3) ,axis=(0,1,2)) DB = self.DY.sum(axis=(0,1,2)) self.W -= lrate * DW / (hf*wf*df*Hy*Wy)**.5 self.B -= lrate * DB / (Hy*Wy)**.5
def _alphabeta_lrp(self,R,alpha): ''' LRP according to Eq(60) in DOI: 10.1371/journal.pone.0130140 ''' beta = 1 - alpha N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:,i,j,...] else: Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) Zplus = Z > 0 #index mask of positive forward predictions if alpha * beta != 0 : #the general case: both parameters are not 0 Zp = Z * Zplus Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Zn = Z - Zp Zsn = nd.expand_dims(self.Y[:,i:i+1,j:j+1,:], axis=3) - Zsp - 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((alpha * (Zp/Zsp) + beta * (Zn/Zsn))* nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3)).sum(axis=4) elif alpha: #only alpha is not 0 -> alpha = 1, beta = 0 Zp = Z * Zplus Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Zp*( nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) /Zsp)).sum(axis=4) elif beta: # only beta is not 0 -> alpha = 0, beta = 1 Zn = Z * (Z < 0) Zsn = Zn.sum(axis=(1,2,3),keepdims=True) + nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B < 0), axis=0), axis=0), axis=0), axis=0) + 1e-16 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += (Zn*( nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) /Zsn)).sum(axis=4) else: raise Exception('This case should never occur: alpha={}, beta={}.'.format(alpha, beta)) return Rx
def _ww_lrp(self,R): ''' LRP according to Eq(12) in https://arxiv.org/pdf/1512.02479v1.pdf ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, 0)**2 Zs = Z.sum(axis=(1,2,3),keepdims=True) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3)).sum(axis=4) return Rx
def backward(self,DY): ''' Backward-passes an input error gradient DY towards the input neurons of this layer. Parameters ---------- DY : mxnet.ndarray.ndarray.NDArray an error gradient shaped same as the output array of forward, i.e. (N,Hy,Wy,Dy) with N = number of samples in the batch Hy = heigth of the output Wy = width of the output Dy = output depth = input depth Returns ------- DX : mxnet.ndarray.ndarray.NDArray the error gradient propagated towards the input ''' self.DY = DY N,Hy,Wy,NF = DY.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride DX = nd.zeros_like(self.X,ctx=self.ctx, dtype=self.dtype) if not (hf == wf and self.stride == (1,1)): for i in range(Hy): for j in range(Wy): DX[:,i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , : ] += ( nd.expand_dims(self.W, axis=0) * nd.expand_dims(DY[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) #sum over all the filters else: for i in range(hf): for j in range(wf): DX[:,i:i+Hy:hstride,j:j+Wy:wstride,:] += nd.dot(DY,self.W[i,j,:,:].T) return DX #* (hf*wf*df)**.5 / (NF*Hy*Wy)**.5
def test_expand_dims(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) res = nd.expand_dims(a, axis=1) assert res.shape == (a.shape[0], 1, a.shape[1])
def _alphabeta_lrp_slow(self,R,alpha): ''' LRP according to Eq(60) in DOI: 10.1371/journal.pone.0130140 This function shows all necessary operations to perform LRP in one place and is therefore not optimized ''' beta = 1 - alpha N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) if not alpha == 0: Zp = Z * (Z > 0) Bp = nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B > 0), axis=0), axis=0), axis=0), axis=0) Zsp = Zp.sum(axis=(1,2,3),keepdims=True) + Bp Ralpha = alpha * ((Zp/Zsp) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) else: Ralpha = 0 if not beta == 0: Zn = Z * (Z < 0) Bn = nd.expand_dims(nd.expand_dims(nd.expand_dims(nd.expand_dims(self.B * (self.B < 0), axis=0), axis=0), axis=0), axis=0) Zsn = Zn.sum(axis=(1,2,3),keepdims=True) + Bn Rbeta = beta * ((Zn/Zsn) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) else: Rbeta = 0 Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += Ralpha + Rbeta return Rx
def _flat_lrp(self,R): ''' distribute relevance for each output evenly to the output neurons' receptive fields. ''' N,Hout,Wout,NF = R.shape hf,wf,df,NF = self.W.shape hstride, wstride = self.stride Rx = nd.zeros_like(self.X,ctx = self.ctx) for i in range(Hout): for j in range(Wout): Z = nd.ones((N,hf,wf,df,NF), ctx=self.ctx, dtype=self.dtype) Zs = Z.sum(axis=(1,2,3),keepdims=True) Rx[:,i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ] += ((Z/Zs) * nd.expand_dims(R[:,i:i+1,j:j+1,:], axis=3) ).sum(axis=4) return Rx
def test_squeeze(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) data = nd.expand_dims(a, axis=1) res = nd.squeeze(data) assert res.shape == a.shape