def train(self, model): buffer = self.get_initial_buffer(self.args.vision_init_rounds) trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': self.args.vision_lr}) i = 0 # Do some loops for e in range(self.args.vision_epochs): print(f"epoch {e}/{self.args.vision_epochs}") epoch_loss = 0 for im in buffer: # reshaped_state = nd.reshape(im, (3, size, size)) tensor = nd.array( nd.reshape(im, (1, 3, self.args.size, self.args.size))) target = locate_agents(im, self.args) with autograd.record(): out = model(tensor) loss = nd.sum(nd.square(out - target)) loss.backward() trainer.step(1) # batch size = 1 epoch_loss += loss i += 1 if i % 50 == 0: image = nd.array( nd.reshape(im, (self.args.size, self.args.size, 3))) self.plot_predictions(image, out) buffer = self.update_buffer(buffer) gc.collect()
def forward(self, x, padding=None): ctx = x.context batch_size = x.shape[0] length = x.shape[1] if padding is not None: # Flattten padding to [batch_size * length] pad_mask = nd.reshape(padding, (-1)) nonpad_ids = nd.array(np.where(pad_mask.asnumpy() < 1e-9), ctx=ctx) # Reshape x to [batch_size*length, hidden_size] to remove padding x = nd.reshape(x, (-1, self.hidden_size)) x = nd.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions x = nd.expand_dims(x, axis=0) output = self.filter_dense_layer(x) if self.train: output = self.dropout(output) output = self.output_dense_layer(output) if padding is not None: output = nd.squeeze(output, axis=0) output = nd.scatter_nd(data=output, indices=nonpad_ids, shape=(batch_size * length, self.hidden_size)) output = nd.reshape(output, shape=(batch_size, length, self.hidden_size)) return output
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()],ispred=True): """Evaluate accuracy of a model on the given data set.""" if isinstance(ctx, mx.Context): ctx = [ctx] if ispred: pred_list=[] acc_sum, n = nd.array([0]), 0 for batch in data_iter: features, labels, _ = _get_batch(batch, ctx) for X, y in zip(features, labels): n += y.size y = y.astype('float32') output_features=net.features(X.as_in_context(ctx[0])) output = nd.softmax(net.output_new(output_features)) pred=nd.reshape(output.argmax(axis=1),(-1)) true=nd.reshape(y,(-1)) acc_sum += (pred == true).sum().copyto(mx.cpu()) if ispred: pred_list+=pred.asnumpy().astype(np.int).tolist() acc_sum.wait_to_read() if ispred: print('test set acc: %.3f'%(acc_sum.asscalar()/n)) return np.reshape(pred_list,(-1)) else: return acc_sum.asscalar() / n
def ssd_calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks): cls_loss = gluon.loss.SoftmaxCrossEntropyLoss() #bbox_loss = gluon.loss.L1Loss() bbox_loss = gluon.loss.HuberLoss() #print(cls_preds.shape, cls_labels.shape) batch_size, anchor_size, cls_num = cls_preds.shape cls_preds_ = nd.reshape(cls_preds, (-1, cls_preds.shape[-1])) cls_labels_ = nd.reshape(cls_labels, (-1, 1)) #cls_mask = (cls_labels_[:,0] >= 0).reshape( cls_labels_.shape ) #???? including background? cls_mask = (cls_labels_[:, 0] > 0).reshape( cls_labels_.shape) # ???? including background? indices = nd.array(np.where(cls_mask.asnumpy() > 0)[0], ctx=cls_preds.context) cls_preds_valid = nd.take(cls_preds_, indices) cls_labels_valid = nd.take(cls_labels_, indices) cls = cls_loss(cls_preds_valid, cls_labels_valid) bbox_labels = nd.reshape(bbox_labels, (-1, 4)) bbox_masks = nd.sum(nd.reshape(bbox_masks, (-1, 4)), axis=-1) bbox_preds = nd.reshape(bbox_preds, (-1, 4)) indices = nd.array(np.where(bbox_masks.asnumpy() > 0)[0], ctx=bbox_preds.context) bbox_labels_valid = nd.take(bbox_labels, indices) bbox_preds_valid = nd.take(bbox_preds, indices) bbox = bbox_loss(bbox_preds_valid, bbox_labels_valid) return (cls.mean() + bbox.mean()) * batch_size, cls.mean(), bbox.mean()
def forward(self, data, label): """ Forward the seq2seq network. Args: data: NDArray with shape [b, t, row, col, d]. label: NDArray with shape [b, t, row, col, d]. Returns: loss: loss for gradient descent. (pred, label): each of them is a NDArray with shape [n, b, t, d]. """ B = data.shape[0] data = nd.transpose(data, axes=(0, 1, 4, 2, 3)) # [b, t, d, row, col] data = nd.reshape(data, shape=(B, -1, ROWS, COLUMES)) # [b, t * d, row, col] # resnet data = self.res_units(data) data = nd.transpose(data, axes=(2, 3, 0, 1)) # [row, col, b, d] data = nd.reshape(data, shape=(ROWS * COLUMES, B, -1)) # dense layers data = self.denses(data) data = nd.reshape(data, shape=(ROWS, COLUMES, B, FLOW_OUTPUT_LEN, -1)) data = nd.transpose(data, axes=(2, 0, 1, 3, 4)) label = nd.transpose(label, axes=(0, 2, 3, 1, 4)) # [b, row, col, t, d] label = label[:, :, :, :, :FLOW_OUTPUT_DIM] loss = nd.sum((data - label)**2) return loss, {'flow_pred': data, 'flow_label': label}
def link_loss(self, input, target, neighbors=8): batch_size = input.shape[0] self.pos_link_weight = (target == 1).astype('float32') * nd.broadcast_axes( nd.expand_dims(self.pixel_weight, axis=1), axis=1, size=neighbors) # (2, 8, 256, 256) self.neg_link_weight = (target == 0).astype('float32') * nd.broadcast_axes( nd.expand_dims(self.pixel_weight, axis=1), axis=1, size=neighbors) sum_pos_link_weight = nd.sum(nd.reshape(self.pos_link_weight, (batch_size, -1)), axis=1) sum_neg_link_weight = nd.sum(nd.reshape(self.neg_link_weight, (batch_size, -1)), axis=1) self.link_cross_entropy = [] for i in range(neighbors): assert input.shape[1] == 16 this_input = input[:, [2 * i, 2 * i + 1]] this_target = target[:, i] self.link_cross_entropy.append( self.link_cross_entropy_layer(this_input, this_target)[1]) self.link_cross_entropy = nd.concat(*self.link_cross_entropy, dim=1) # (2, 8, 256, 256) loss_link_pos = [] loss_link_neg = [] ctx = try_gpu() for i in range(batch_size): if sum_pos_link_weight[i].asscalar() == 0: loss_link_pos_temp = nd.zeros(self.pos_link_weight[0].shape, ctx, 'float32') loss_link_pos.append(nd.expand_dims(loss_link_pos_temp, axis=0)) else: loss_link_pos_temp = self.pos_link_weight[ i] * self.link_cross_entropy[i] / sum_pos_link_weight[i] loss_link_pos.append(nd.expand_dims(loss_link_pos_temp, axis=0)) if sum_neg_link_weight[i].asscalar() == 0: loss_link_neg_temp = nd.zeros(self.neg_link_weight[0].shape, ctx, 'float32') loss_link_neg.append(nd.expand_dims(loss_link_neg_temp, axis=0)) else: loss_link_neg_temp = self.neg_link_weight[ i] * self.link_cross_entropy[i] / sum_neg_link_weight[ i] # (8, 256, 256) loss_link_neg.append(nd.expand_dims(loss_link_neg_temp, axis=0)) loss_link_pos = nd.concat(*loss_link_pos, dim=0) loss_link_neg = nd.concat(*loss_link_neg, dim=0) # (2, 8, 256, 256) loss_link_pos = nd.sum(nd.reshape(loss_link_pos, (batch_size, -1)), axis=1) loss_link_neg = nd.sum(nd.reshape(loss_link_neg, (batch_size, -1)), axis=1) return nd.mean(loss_link_pos), nd.mean(loss_link_neg)
def PreImg(self, img): imgs = self.Timg(img, None) out = nd.softmax( self.net( nd.reshape(imgs[0], (1, 3, 224, 224)).as_in_context(self.ctx), nd.reshape(imgs[1], (1, 3, 299, 299)).as_in_context( self.ctx))).asnumpy() return self.idx[np.where(out == out.max())[1][0]]
def __call__(self, x, y, norm, train_mode): x = self.generator(x) x = nd.reshape(x, (-1, x.shape[-1])) y = nd.reshape(y,(-1,)) loss = self.criterion(x,y).sum() / norm if train_mode: loss.backward() return loss[0] * norm
def forward(self, feature, label, begin_states, is_training): ''' Decode the hidden states to a temporal sequence. Parameters ---------- feature: a NDArray with shape [n, d]. label: a NDArray with shape [n, b, t, d]. begin_states: a list of hidden states (list of hidden units with shape [n, b, d]) of RNNs. is_training: bool Returns ------- outputs: the prediction, which is a NDArray with shape [n, b, t, d] ''' ctx = label.context num_nodes, batch_size, seq_len, _ = label.shape aux = label[:, :, :, self.output_dim:] # [n,b,t,d] label = label[:, :, :, :self.output_dim] # [n,b,t,d] go = nd.zeros(shape=(num_nodes, batch_size, self.input_dim), ctx=ctx) output, states = [], begin_states for i in range(seq_len): # get next input if i == 0: data = go else: prev = nd.concat(output[i - 1], aux[:, :, i - 1], dim=-1) truth = nd.concat(label[:, :, i - 1], aux[:, :, i - 1], dim=-1) if is_training and self.use_sampling: value = self.sampling() else: value = 0 data = value * truth + (1 - value) * prev # unroll 1 step for depth, cell in enumerate(self.cells): data, states[depth] = cell.forward_single( feature, data, states[depth]) if self.graphs[depth] is not None: _data = data for g in self.graphs[depth]: _data = _data + g(data, feature) data = _data # append feature to output _feature = nd.expand_dims(feature, axis=1) # [n, 1, d] _feature = nd.broadcast_to(_feature, shape=(0, batch_size, 0)) # [n, b, d] data = nd.concat(data, _feature, dim=-1) # [n, b, t, d] # proj output to prediction data = nd.reshape(data, shape=(num_nodes * batch_size, -1)) data = self.proj(data) data = nd.reshape(data, shape=(num_nodes, batch_size, -1)) output.append(data) output = nd.stack(*output, axis=2) return output
def vectorize_matrices_in_vector(vec): for i in range(0, (num_layers + 1) * 2, 2): if i == 0: vec[i] = nd.reshape(vec[i], num_inputs * num_hidden) elif i == num_layers * 2: vec[i] = nd.reshape(vec[i], num_hidden * num_outputs) else: vec[i] = nd.reshape(vec[i], num_hidden * num_hidden) return vec
def compute_attention(features, fconv, fatt): output_conv = fconv(features) output_att = fatt(features) temp_f = nd.reshape(output_att, (output_att.shape[0] * output_att.shape[1], output_att.shape[2] * output_att.shape[3])) spatial_softmax = nd.reshape(nd.softmax(temp_f), (output_att.shape[0], output_att.shape[1], output_att.shape[2], output_att.shape[3])) return output_conv, spatial_softmax
def test(self, model, rounds): data = self.get_single_buffer(rounds)[:rounds] for im in data: tensor = nd.array( nd.reshape(im, (1, 3, self.args.size, self.args.size))) out = model(tensor) image = nd.array( nd.reshape(im, (self.args.size, self.args.size, 3))) self.plot_predictions(image, out)
def linear(self, x): ctx = x.context with self.name_scope(): batch_size = x.shape[0] length = x.shape[1] x = nd.reshape(x, (-1, self.hidden_size)) logits = nd.dot(x, self.embedding.weight.data(ctx=ctx).T) return nd.reshape(logits, (batch_size, length, self.vocab_size))
def forward(self, x, size=(1, 2)): # data.shape = 'NCHW' n, c, h, w = x.shape #print(x.shape) rh, rw = size oh, ow = h * rh, w * rw oc = c // (rh * rw) outputs = nd.reshape(data=x, shape=(n, oc, rh, rw, h, w)) outputs = nd.transpose(data=outputs, axes=(0, 1, 4, 2, 5, 3)) outputs = nd.reshape(data=outputs, shape=(n, oc, oh, ow)) return outputs
def get_pred_result(hm_pred, offset_pred, wh_pred, k=100): ctx = hm_pred.context batch_size, num_classes, _, _ = hm_pred.shape topk_cat_x_idx, topk_cat_y_idx, cls_id = topk(hm_pred, k=k) batch_index = nd.arange(batch_size) batch_indices = nd.repeat(batch_index, repeats=num_classes) batch_indices = nd.reshape(batch_indices, (1, batch_size*k)) batch_indices = batch_indices.as_in_context(ctx) cls_id = nd.reshape(cls_id, (1, batch_size*k)) topk_cat_y_idx = nd.reshape(topk_cat_y_idx, (1, batch_size*k)) topk_cat_x_idx = nd.reshape(topk_cat_x_idx, (1, batch_size*k)) score_indices = nd.concat(batch_indices, cls_id, topk_cat_y_idx, topk_cat_x_idx, dim=0) scores = nd.gather_nd(hm_pred, score_indices) fake_idx_0 = nd.zeros_like(nd.arange(batch_size*k)).reshape((1, -1)) fake_idx_0 = fake_idx_0.as_in_context(ctx) fake_idx_1 = nd.ones((1, batch_size*k)) fake_idx_1 = fake_idx_1.as_in_context(ctx) fake_indices_0 = nd.concat(batch_indices, fake_idx_0, topk_cat_y_idx, topk_cat_x_idx, dim=0) fake_indices_1 = nd.concat(batch_indices, fake_idx_1, topk_cat_y_idx, topk_cat_x_idx, dim=0) x_offset = nd.gather_nd(offset_pred, fake_indices_0) y_offset = nd.gather_nd(offset_pred, fake_indices_1) h = nd.gather_nd(wh_pred, fake_indices_0) w = nd.gather_nd(wh_pred, fake_indices_1) x_offset_ = nd.broadcast_mul(topk_cat_x_idx, x_offset) y_offset_ = nd.broadcast_mul(topk_cat_y_idx, y_offset) topk_cat_x_idx = nd.broadcast_add(topk_cat_x_idx, x_offset_) topk_cat_y_idx = nd.broadcast_add(topk_cat_y_idx, y_offset_) xmin = topk_cat_x_idx - w/2 ymin = topk_cat_y_idx - h/2 xmax = topk_cat_x_idx + w/2 ymax = topk_cat_y_idx + h/2 xmin = nd.reshape(xmin, (batch_size, k)).expand_dims(axis=-1) ymin = nd.reshape(ymin, (batch_size, k)).expand_dims(axis=-1) xmax = nd.reshape(xmax, (batch_size, k)).expand_dims(axis=-1) ymax = nd.reshape(ymax, (batch_size, k)).expand_dims(axis=-1) cls_id = nd.reshape(cls_id, (batch_size, k)).expand_dims(axis=-1) scores = nd.reshape(scores, (batch_size, k)).expand_dims(axis=-1) results = nd.concat(xmin, ymin, xmax, ymax, cls_id, scores, dim=-1) return results
def __init__(self, ctx, config, lr_mult=None): lr_mult = config['optimizer']['lr_mult'] lr_schedule = config['optimizer']['learning_rate'] lr_schedule = [(s, lr * lr_mult) for s, lr in lr_schedule] optimizer_type = config['optimizer'].get('type', 'sgd') optimizer_params = {'learning_rate': 1e-3, 'wd': 2e-4} if optimizer_type == 'sgd': optimizer_params['momentum'] = 0.9 optimizer_params.update(config['optimizer'].get('params', dict())) model = vision.resnet50_v1( root= r'\\msralab\ProjectData\ScratchSSD\Users\v-dinliu\.mxnet\models', pretrained=True, ctx=ctx) # HybridNetCoarse Network = eval(config['network']['class']) scale = get_param(config, 'network.scale', 20) network = Network(model.features, config) network.hybridize() for k, v in network.collect_params().items(): if k.startswith(network.prefix): v.initialize(ctx=ctx) for k, v in config['optimizer'].get('lr_mult_layer', dict()).items(): for _, param in getattr(network, k).collect_params().items(): param.lr_mult = param.lr_mult * v trainer = gluon.trainer.Trainer(network.collect_params(), optimizer_type, optimizer_params) super().__init__(network, trainer, lr_schedule, ctx) self.epeloss = EpeLoss() self.epeloss.hybridize() self.color_mean = nd.reshape(nd.array([0.485, 0.456, 0.406]), [1, 3, 1, 1]) self.color_std = nd.reshape(nd.array([0.229, 0.224, 0.225]), [1, 3, 1, 1]) self.upsampler = Upsample(2, 32) self.upsampler.collect_params().initialize(ctx=ctx) self.scale = scale loss_scales = get_param(config, 'loss.scales', [32]) loss_weights = get_param(config, 'loss.weights', [1 for _ in loss_scales]) self.msloss = MultiscaleEpe(loss_scales, loss_weights, match=get_param(config, 'loss.match', 'downsampling')) self.msloss.hybridize() self.msloss.collect_params().initialize(ctx=self.ctx)
def forward(self, small_embed_ids, dense_input, wide_input): embed_x = self.embedding(small_embed_ids) embedding_input = nd.reshape(dense_input, shape=(-1, self.large_field_num, self.embedding_dim)) embed_x = nd.concat(embed_x, embedding_input, dim=1) inputs = nd.reshape(embed_x, (-1, self.embed_output_dim)) x = self.linear_layer( self.fc(small_embed_ids).sum(1) + wide_input.sum(1)) + self.mlp(inputs) # x = nd.concat(self.mlp(inputs), x, dim=1) # x = mx.nd.sigmoid(x) return x
def forward(self, feature, data): """ Forward process of a HyperDense layer Args: feature: a NDArray with shape [n, d] data: a NDArray with shape [n, b, pre_d] Returns: output: a NDArray with shape [n, b, d] """ weight = self.w_mlp(feature) # [n, pre_hidden_size * hidden_size] weight = nd.reshape(weight, (-1, self.pre_hidden_size, self.hidden_size)) bias = nd.reshape(self.b_mlp(feature), shape=(-1, 1, 1)) # [n, 1, 1] return nd.batch_dot(data, weight) + bias
def forward(self, query, key, value, mask=None): if mask is not None: if mask.shape[1] == 1: #encoding otherwise decoder #mask = nd.expand_dims(nd.squeeze(mask),-1) ##!!!!!!!! mask = nd.tile(mask, reps=(1, query.shape[1], 1)) bs = query.shape[0] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #1) run linear transform from d_model to d_model #2) reshape and transpose to split input h heads query = nd.transpose( nd.reshape(self.linears_0(query), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) key = nd.transpose( nd.reshape(self.linears_1(key), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) value = nd.transpose( nd.reshape(self.linears_2(value), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) #x = nd.zeros(value.shape) #for h in range(self.h): # x[:,h,:,:],_ = attention(query[:,h,:,:], key[:,h,:,:], value[:,h,:,:], mask=mask, dropout=self.dropout) query, key, value = nd.reshape( query, (bs * self.h, -1, self.d_k)), nd.reshape( key, (bs * self.h, -1, self.d_k)), nd.reshape( value, (bs * self.h, -1, self.d_k)) mask = nd.tile(mask, reps=(self.h, 1, 1)) x, _ = attention(query, key, value, mask=mask, dropout=self.dropout) x = nd.reshape(x, (bs, self.h, -1, self.d_k)) x = nd.reshape(nd.transpose(x, (0, 2, 1, 3)), (bs, -1, self.h * self.d_k)) return self.linears_3(x)
def _topk(scores, K=40): batch, cat, height, width = scores.shape [topk_scores, topk_inds] = nd.topk(nd.reshape(scores, (batch, cat, -1)), ret_typ='both', k=K) # return both value and indices topk_inds = topk_inds % (height * width) topk_ys = (topk_inds / width).astype('int32').astype('float32') topk_xs = (topk_inds % width).astype('int32').astype('float32') [topk_score, topk_ind] = nd.topk(nd.reshape(topk_scores, (batch, -1)), ret_typ='both', k=K) topk_clses = (topk_ind / K).astype('int32') topk_inds = _gather_feat(nd.reshape(topk_inds, (batch, -1, 1)), topk_ind) topk_inds = nd.reshape(topk_inds, (batch, K)) topk_ys = _gather_feat(nd.reshape(topk_ys, (batch, -1, 1)), topk_ind) topk_ys = nd.reshape(topk_ys, (batch, K)) topk_xs = _gather_feat(nd.reshape(topk_xs, (batch, -1, 1)), topk_ind) topk_xs = nd.reshape(topk_xs, (batch, K)) return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def forward(self, pred, label): num = pred.shape[0] if not self.from_logists: pred = nd.softmax(pred, self.axis) if self.sparse_label: with autograd.pause(): label_dense = nd.zeros_like(pred) for l in range(label_dense.shape[1]): label_dense[:, l, :] = (label == l) * 1.0 label = label_dense pred, label = nd.reshape(pred, (num, -1)), nd.reshape(label, (num, -1)) union = pred.sum() + label.sum() inter = (pred * label).sum() return 1 - (2 * inter + self.smooth) / (self.smooth + union)
def test_stack_neightbor(in_data, factor=2): # in_data: 1, 3, 416, 416 out = nd.reshape(in_data, shape=(0, 0, -4, -1, factor, -2)) # -4后面的两个参数表明h维被分割成h/2和2(factor)了 #print('out shape = ', out.shape) # 1, 3, 208, 2, 416 out = nd.transpose(out, axes=(0, 1, 3, 2, 4)) #print('out shape = ', out.shape) # 1, 3, 2, 208, 416 out = nd.reshape(out, shape=(0, -3, -1, -2)) #print('out shape = ', out.shape) # 1, 6, 208, 416 out = nd.reshape(out, shape=(0, 0, 0, -4, -1, factor)) #print('out shape = ', out.shape) # 1, 6, 208, 208, 2 out = nd.transpose(out, axes=(0, 1, 4, 2, 3)) #print('out shape = ', out.shape) # 1, 6, 2, 208, 208 out = nd.reshape(out, shape=(0, -3, -1, -2)) # output: 1, 12, 208, 208 return out
def forward(self, user_id, text, topics): user_word = self.emb_uw(user_id) word_emb = self.emb_word(text) topics_emb = self.emb_word(topics) topics_emb = nd.transpose(topics_emb, axes=(1,0)) topics_emb = nd.reshape(topics_emb, (self.word_dim,self.topics_num,1)) topics_emb = nd.dot(word_emb, topics_emb) topics_emb = nd.reshape(topics_emb, (self.batch_size,self.sentence_length,self.topics_num)) topics_emb = nd.softmax(topics_emb,axis=2) topics_emb = self.mlp_topic(topics_emb) xw = nd.concat(user_word, topics_emb, dim=1) xw_1 = self.mlp_w1(xw) xw_2 = self.mlp_w2(xw_1) res = self.mlp(xw_2) return res
def deal_output(y: nd.NDArray, s, b, c): """ :param y: :param s: :param b: :param c: :return: """ label = y[:, 0:s * s * c] preds = y[:, s * s * c: s * s * c + s * s * b] location = y[:, s * s * c + s * s * b:] label = nd.reshape(label, shape=(-1, s * s, c)) location = nd.reshape(location, shape=(-1, s * s, b, 4)) return label, preds, location
def evaluate_accuracy(self, data_iter, net, ctx=[mx.cpu()]): """Evaluate accuracy of a model on the given data set.""" if isinstance(ctx, mx.Context): ctx = [ctx] acc_sum, n = nd.array([0]), 0 for batch in data_iter: features, labels, _ = self._get_batch(batch, ctx) for X, y in zip(features, labels): y = y.astype('float32') temp1=nd.reshape(net(X).argmax(axis=1),(-1)) temp2=nd.reshape(y,(-1)) acc_sum += (temp1 == temp2).sum().copyto(mx.cpu()) n += y.size acc_sum.wait_to_read() return acc_sum.asscalar() / n
def kron(matrix1, matrix2): """Kronecker product""" s1, s2 = matrix1.shape s3, s4 = matrix2.shape return nd.reshape( matrix1.reshape((s1, 1, s2, 1))*matrix2.reshape((1, s3, 1, s4)), (s1*s3, s2*s4))
def decode(self, targets, encoder_outputs, attention_bias): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = nd.expand_dims(decoder_inputs, axis=0) decoder_inputs = nd.pad(data=decoder_inputs, mode="constant", constant_value=0, pad_width=(0, 0, 0, 0, 1, 0, 0, 0)) decoder_inputs = nd.reshape(data=decoder_inputs, shape=decoder_inputs.shape[1:])[:, :-1, :] length = decoder_inputs.shape[1] decoder_inputs = decoder_inputs + model_utils.get_position_encoding( length, self.param.hidden_size, targets.context) if self.train: decoder_inputs = self.dropout_output(decoder_inputs) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length, targets.context) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias) logits = self.embedding_softmax_layer.linear(outputs) return logits
def emb_tree(self, root): """ 将句法解析转换为向量形式 :param root: 句子元素的TNode根节点 :return: 输入元素的嵌入向量 """ next_emb = None if isinstance(root.next, int): # Leaf Node,如果是叶子节点,也就是单独词的情况 # next shape = 1 * word_size next_emb = self.word_embedding(nd.array([root.next],ctx=self.ctx)) elif isinstance(root.next, list): # Mid Node # 非叶子节点,则需要计算所有子节点的整合向量 begin_state = self.word_set.begin_state(batch_size=1, ctx=self.ctx) next_emb = [] for i in root.next: next_emb.append(self.emb_tree(i)) next_emb = nd.stack(*next_emb,axis=1) _, next_emb = self.word_set(next_emb, states=begin_state) next_emb = nd.reshape(next_emb[-1], [1, next_emb[-1].shape[-1]]) # 1 * C else: # Wrong Node raise Exception('Error with Parse Tree Node.next type' + str(type(root.nexts))) # 将标签与嵌入向量整合 tag_emb = self.tag_embedding(nd.array([self.vocab_tag.word2id(root.val)],ctx=self.ctx)) emb = nd.concat(tag_emb, next_emb) emb = (self.word_ass(emb) + next_emb) / 2 # 残差 self.element.append(emb) # 记录元素,后面元素Attention return emb
def forward(self, feature, data): """ Forward process of a MetaDense layer Parameters ---------- feature: NDArray with shape [n, d] data: NDArray with shape [n, b, input_hidden_size] Returns ------- output: NDArray with shape [n, b, output_hidden_size] """ weight = self.w_mlp(feature) # [n, input_hidden_size * output_hidden_size] weight = nd.reshape(weight, (-1, self.input_hidden_size, self.output_hidden_size)) bias = nd.reshape(self.b_mlp(feature), shape=(-1, 1, 1)) # [n, 1, 1] return nd.batch_dot(data, weight) + bias
def sample_neighbours(self, data, query_network): num_stored_samples = self.key_memory.shape[0] batch_size = data[0].shape[0] query = query_network(*data).as_in_context(mx.cpu()) vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0) vec2 = nd.tile(self.key_memory, reps=(batch_size, 1)) diff = nd.subtract(vec1, vec2) sq = nd.square(diff) batch_sum = nd.sum(sq, exclude=1, axis=0) sqrt = nd.sqrt(batch_sum) dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples)) sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices") num_outputs = len(self.label_memory) sample_labels = [ self.label_memory[i][sample_ind] for i in range(num_outputs) ] sample_batches = [[ self.value_memory[j][sample_ind] for j in range(len(self.value_memory)) ], sample_labels] return sample_batches
def _spectral_norm(self): """ spectral normalization """ w = self.params.get('weight').data(self.ctx) w_mat = nd.reshape(w, [w.shape[0], -1]) _u = self.u.data(self.ctx) _v = None for _ in range(POWER_ITERATION): _v = nd.L2Normalization(nd.dot(_u, w_mat)) _u = nd.L2Normalization(nd.dot(_v, w_mat.T)) sigma = nd.sum(nd.dot(_u, w_mat) * _v) if sigma == 0.: sigma = EPSILON self.params.setattr('u', _u) return w / sigma
def predict_sentiment(net, vocab, sentence): """Predict the sentiment of a given sentence.""" sentence = nd.array([vocab.token_to_idx[token] for token in sentence], ctx=try_gpu()) label = nd.argmax(net(nd.reshape(sentence, shape=(1, -1))), axis=1) return 'positive' if label.asscalar() == 1 else 'negative'