def validate(val_data, val_dataset, net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] val_metric.reset() from tqdm import tqdm for batch in tqdm(val_data): data, scale, center, score, imgid = val_batch_fn(batch, ctx) outputs = [net(X) for X in data] if opt.flip_test: data_flip = [nd.flip(X, axis=3) for X in data] outputs_flip = [net(X) for X in data_flip] outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip] outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)] if len(outputs) > 1: outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0) else: outputs_stack = outputs[0].as_in_context(mx.cpu()) preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy()) val_metric.update(preds, maxvals, score, imgid) res = val_metric.get() return
def get_final_preds(batch_heatmaps, center, scale): coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = nd.concat(hm[py][px+1] - hm[py][px-1], hm[py+1][px] - hm[py-1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def forward(self, x): path_1 = self.path_1_conv_1(x) path_2 = self.path_2_conv_3(self.path_2_conv_1(x)) path_3 = self.path_3_conv_5(self.path_3_conv_1(x)) path_4 = self.path_4_conv_1(self.path_4_pool_3(x)) return nd.concat(path_1, path_2, path_3, path_4, dim=1)
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens, corpus_indices, vocab, ctx, is_random_iter, num_epochs, num_steps, lr, clipping_theta, batch_size, prefixes): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter_fn = data_iter_random else: data_iter_fn = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() start = time.time() for epoch in range(1, num_epochs+1): if not is_random_iter: # If adjacent sampling is used, the hidden state is initialized # at the beginning of the epoch state = init_rnn_state(batch_size, num_hiddens, ctx) l_sum, n = 0.0, 0 data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) for X, Y in data_iter: if is_random_iter: # If random sampling is used, the hidden state is initialized # before each mini-batch update state = init_rnn_state(batch_size, num_hiddens, ctx) else: # Otherwise, the detach function needs to be used to separate # the hidden state from the computational graph to avoid # backpropagation beyond the current sample for s in state: s.detach() with autograd.record(): inputs = to_onehot(X, len(vocab)) # outputs is num_steps terms of shape (batch_size, len(vocab)) (outputs, state) = rnn(inputs, state, params) # After stitching it is (num_steps * batch_size, len(vocab)) outputs = nd.concat(*outputs, dim=0) # The shape of Y is (batch_size, num_steps), and then becomes # a vector with a length of batch * num_steps after # transposition. This gives it a one-to-one correspondence # with output rows y = Y.T.reshape((-1,)) # Average classification error via cross entropy loss l = loss(outputs, y).mean() l.backward() grad_clipping(params, clipping_theta, ctx) # Clip the gradient sgd(params, lr, 1) # Since the error is the mean, no need to average gradients here l_sum += l.asscalar() * y.size n += y.size if epoch % (num_epochs // 4) == 0: print('epoch %d, perplexity %f, time %.2f sec' % ( epoch, math.exp(l_sum / n), time.time() - start)) start = time.time() if epoch % (num_epochs // 2) == 0: for prefix in prefixes: print(' -', predict_rnn(prefix, 50, rnn, params, init_rnn_state, num_hiddens, vocab, ctx))
def _load_embedding_serialized(self, pretrained_file_path): """Load embedding vectors from a pre-trained token embedding file. For every unknown token, if its representation `self.unknown_token` is encountered in the pre-trained token embedding file, index 0 of `self.idx_to_vec` maps to the pre-trained token embedding vector loaded from the file; otherwise, index 0 of `self.idx_to_vec` maps to the text embedding vector initialized by `self._init_unknown_vec`. ValueError is raised if a token occurs multiple times. """ deserialized_embedding = TokenEmbedding.deserialize(pretrained_file_path) if deserialized_embedding.unknown_token: # Some .npz files on S3 may contain an unknown token and its # respective embedding. As a workaround, we assume that C.UNK_IDX # is the same now as it was when the .npz was generated. Under this # assumption we can safely overwrite the respective token and # vector from the npz. if deserialized_embedding.unknown_token: idx_to_token = deserialized_embedding.idx_to_token idx_to_vec = deserialized_embedding.idx_to_vec idx_to_token[C.UNK_IDX] = self.unknown_token if self._init_unknown_vec: vec_len = idx_to_vec.shape[1] idx_to_vec[C.UNK_IDX] = self._init_unknown_vec(shape=vec_len) else: # If the TokenEmbedding shall not have an unknown token, we # just delete the one in the npz. assert C.UNK_IDX == 0 idx_to_token = deserialized_embedding.idx_to_token[C.UNK_IDX + 1:] idx_to_vec = deserialized_embedding.idx_to_vec[C.UNK_IDX + 1:] else: idx_to_token = deserialized_embedding.idx_to_token idx_to_vec = deserialized_embedding.idx_to_vec if not len(set(idx_to_token)) == len(idx_to_token): raise ValueError('Serialized embedding invalid. ' 'It contains duplicate tokens.') if self.unknown_token: try: unknown_token_idx = deserialized_embedding.idx_to_token.index( self.unknown_token) idx_to_token[C.UNK_IDX], idx_to_token[ unknown_token_idx] = idx_to_token[ unknown_token_idx], idx_to_token[C.UNK_IDX] idxs = [C.UNK_IDX, unknown_token_idx] idx_to_vec[idxs] = idx_to_vec[idxs[::-1]] except ValueError: vec_len = idx_to_vec.shape[1] idx_to_token.insert(0, self.unknown_token) idx_to_vec = nd.concat( self._init_unknown_vec(shape=vec_len).reshape((1, -1)), idx_to_vec, dim=0) self._idx_to_token = idx_to_token self._idx_to_vec = idx_to_vec self._token_to_idx.update((token, idx) for idx, token in enumerate(self._idx_to_token))
def _slice(self, x, num_anchors, num_offsets): """since some stages won't see partial anchors, so we have to slice the correct targets""" # x with shape (B, N, A, 1 or 2) anchors = [0] + num_anchors.tolist() offsets = [0] + num_offsets.tolist() ret = [] for i in range(len(num_anchors)): y = x[:, offsets[i]:offsets[i+1], anchors[i]:anchors[i+1], :] ret.append(y.reshape((0, -3, -1))) return nd.concat(*ret, dim=1)
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): if is_lstm: outputs, state_h, state_c = rnn( get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn( get_inputs(X, vocab_size), state_h, *params) y = Y.T.reshape((-1,)) outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print(' - ', predict_rnn( rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def ten_crop(src, size): """Crop 10 regions from an array. This is performed same as: http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop This method crops 10 regions. All regions will be in shape :obj`size`. These regions consist of 1 center crop and 4 corner crops and horizontal flips of them. The crops are ordered in this order. * center crop * top-left crop * bottom-left crop * top-right crop * bottom-right crop * center crop (flipped horizontally) * top-left crop (flipped horizontally) * bottom-left crop (flipped horizontally) * top-right crop (flipped horizontally) * bottom-right crop (flipped horizontally) Parameters ---------- src : mxnet.nd.NDArray Input image. size : tuple Tuple of length 2, as (width, height) of the cropped areas. Returns ------- mxnet.nd.NDArray The cropped images with shape (10, size[1], size[0], C) """ h, w, _ = src.shape ow, oh = size if h < oh or w < ow: raise ValueError( "Cannot crop area {} from image with size ({}, {})".format(str(size), h, w)) center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :] tl = src[0:oh, 0:ow, :] bl = src[h - oh:h, 0:ow, :] tr = src[0:oh, w - ow:w, :] br = src[h - oh:h, w - ow:w, :] crops = nd.stack(*[center, tl, bl, tr, br], axis=0) crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0) return crops
def forward(self, x, sampled_values, label): """Forward computation.""" sampled_candidates, _, _ = sampled_values # (batch_size,) label = label.reshape(shape=(-1,)) # (num_sampled+batch_size,) ids = nd.concat(sampled_candidates, label, dim=0) # lookup weights and biases weight = self.weight.row_sparse_data(ids) bias = self.bias.data(ids.context) # (num_sampled+batch_size, dim) w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs) # (num_sampled+batch_size,) b_all = nd.take(bias, indices=ids) out, new_targets = self._dense(x, sampled_values, label, w_all, b_all) return out, new_targets
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, is_random_iter, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter_fn = data_iter_random else: data_iter_fn = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): if not is_random_iter: state = init_rnn_state(batch_size, num_hiddens, ctx) l_sum, n, start = 0.0, 0, time.time() data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) for X, Y in data_iter: if is_random_iter: state = init_rnn_state(batch_size, num_hiddens, ctx) else: for s in state: s.detach() with autograd.record(): inputs = to_onehot(X, vocab_size) (outputs, state) = rnn(inputs, state, params) outputs = nd.concat(*outputs, dim=0) y = Y.T.reshape((-1,)) l = loss(outputs, y).mean() l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % ( epoch + 1, math.exp(l_sum / n), time.time() - start)) for prefix in prefixes: print(' -', predict_rnn( prefix, pred_len, rnn, params, init_rnn_state, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx))
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): # pylint: disable=arguments-differ """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ def flatten_numpy(arr): """Flatten nd-array to 1-d column vector Parameters ---------- arr : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(arr, (-1, ), 'F') is_train = autograd.is_training() batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = utils.biLSTM(self.f_lstm, self.b_lstm, emb_inputs, dropout_x=self.dropout_lstm_input) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep = nd.Dropout( data=utils.leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), axes=[0], p=self.dropout_mlp) head = nd.Dropout( data=utils.leaky_relu(nd.dot(top_recur, W_head.T) + b_head), axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = utils.bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = utils.reshape_fortran( arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape( nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = utils.bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = utils.reshape_fortran( rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) if is_train: # pylint: disable=using-constant-test _target_vec = targets_1D else: _target_vec = flatten_numpy(arc_preds.asnumpy()) _target_vec = nd.array(_target_vec).reshape(seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype( np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose( np.reshape( nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: l = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: # pylint: disable=using-constant-test return arc_accuracy, rel_accuracy, overall_accuracy, l outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = utils.arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = utils.rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs
def forward(self, x): for layer in self.net: out = layer(x) x = nd.concat(x, out, dim=1) return x
def concat_predictions(preds): return nd.concat(*preds, dim=1)
def _load_embedding_serialized(self, pretrained_file_path, init_unknown_vec): """Load embedding vectors from a pre-trained token embedding file. For every unknown token, if its representation `self.unknown_token` is encountered in the pre-trained token embedding file, index 0 of `self.idx_to_vec` maps to the pre-trained token embedding vector loaded from the file; otherwise, index 0 of `self.idx_to_vec` maps to the text embedding vector initialized by `init_unknown_vec`. ValueError is raised if a token occurs multiple times. """ deserialized_embedding = TokenEmbedding.deserialize( pretrained_file_path) if deserialized_embedding.unknown_token: # Some .npz files on S3 may contain an unknown token and its # respective embedding. As a workaround, we assume that C.UNK_IDX # is the same now as it was when the .npz was generated. Under this # assumption we can safely overwrite the respective token and # vector from the npz. if deserialized_embedding.unknown_token == self.unknown_token: # If the unknown_token is the same, we will find it below and a # new unknown token wont be inserted. idx_to_token = deserialized_embedding.idx_to_token idx_to_vec = deserialized_embedding.idx_to_vec elif self.unknown_token: # If they are different, we need to manually replace it so that # it is found below and no new unknown token would be inserted. idx_to_token = deserialized_embedding.idx_to_token idx_to_vec = deserialized_embedding.idx_to_vec idx_to_token[C.UNK_IDX] = self.unknown_token vec_len = idx_to_vec.shape[1] idx_to_vec[C.UNK_IDX] = init_unknown_vec(shape=vec_len) else: # If the TokenEmbedding shall not have an unknown token, we # just delete the one in the npz. idx_to_token = ( deserialized_embedding.idx_to_token[:C.UNK_IDX] + deserialized_embedding.idx_to_token[C.UNK_IDX + 1:]) idx_to_vec = nd.concat( deserialized_embedding.idx_to_vec[:C.UNK_IDX], deserialized_embedding.idx_to_vec[C.UNK_IDX + 1:]) else: idx_to_token = deserialized_embedding.idx_to_token idx_to_vec = deserialized_embedding.idx_to_vec if not np.all(np.unique(idx_to_token, return_counts=True)[1] == 1): raise ValueError('Serialized embedding invalid. ' 'It contains duplicate tokens.') if self.unknown_token: try: unknown_token_idx = deserialized_embedding.idx_to_token.index( self.unknown_token) idx_to_token[ C.UNK_IDX], idx_to_token[unknown_token_idx] = idx_to_token[ unknown_token_idx], idx_to_token[C.UNK_IDX] idxs = [C.UNK_IDX, unknown_token_idx] idx_to_vec[idxs] = idx_to_vec[idxs[::-1]] except ValueError: vec_len = idx_to_vec.shape[1] idx_to_token.insert(0, self.unknown_token) idx_to_vec = nd.concat(init_unknown_vec(shape=vec_len).reshape( (1, -1)), idx_to_vec, dim=0) self._idx_to_token = idx_to_token self._idx_to_vec = idx_to_vec self._token_to_idx.update( (token, idx) for idx, token in enumerate(self._idx_to_token))
def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None): """Generating training targets that do not require network predictions. Parameters ---------- img : mxnet.nd.NDArray Original image tensor. xs : list of mxnet.nd.NDArray List of feature maps. anchors : mxnet.nd.NDArray YOLO3 anchors. offsets : mxnet.nd.NDArray Pre-generated x and y offsets for YOLO3. gt_boxes : mxnet.nd.NDArray Ground-truth boxes. gt_ids : mxnet.nd.NDArray Ground-truth IDs. gt_mixratio : mxnet.nd.NDArray, optional Mixup ratio from 0 to 1. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ assert isinstance(anchors, (list, tuple)) # 这里的anchors中是一个大列表套接着三个小列表 # 以416*416为例,all_anchors---(9, 2) all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0) assert isinstance(offsets, (list, tuple)) # 这里offsets的作用 # 以416*416为例,all_offsets---(3549, 2), 3549 = 169(13*13) + 676(26*26) + 2704(52*52) all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0) # 以416*416为例,num_anchors----[3, 6, 9] num_anchors = np.cumsum([a.size // 2 for a in anchors]) # 以416*416为例,num_offsets----[169, 845, 3549] num_offsets = np.cumsum([o.size // 2 for o in offsets]) _offsets = [0] + num_offsets.tolist() assert isinstance(xs, (list, tuple)) assert len(xs) == len(anchors) == len(offsets) # orig image size # 获取训练图片的大小 orig_height = img.shape[2] orig_width = img.shape[3] with autograd.pause(): # outputs # shape_like: (N * 3549 * 9 * 2): 部分target的维度 shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape( (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0) # 下面就是存储需要返回的转换好的ground truth值 # center_targets:cx, cy , (N * 3549 * 9 * 2) center_targets = nd.zeros_like(shape_like) # scale_targets: w, h , (N * 3549 * 9 * 2) scale_targets = nd.zeros_like(center_targets) # weights: 含义(TO_DO ), (N * 3549 * 9 * 2) weights = nd.zeros_like(center_targets) # objectness: 置信度, (N * 3549 * 9 * 1) objectness = nd.zeros_like( weights.split(axis=-1, num_outputs=2)[0]) # class_targets: target的label值,这里用one-hot向量表示, (N * 3549 * 9 * self._num_class),初始值全部设置为-1,代表忽略 class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class) class_targets[:] = -1 # prefill -1 for ignores # for each ground-truth, find the best matching anchor within the particular grid # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map # then only the anchor in (3, 4) is going to be matched # 寻找最为匹配的anchor值 # 由于yolo进行iou匹配时,只看大小上的匹配,这里将box的格式从corner转换为center gtx, gty, gtw, gth = self.bbox2center(gt_boxes) # 得到一个以(0, 0)为中心点,与样本框同样大小的框,格式又转换为了corner格式 shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1) # 给预设的9个anchor,前面添加(0,0,),得到如(0, 0, 116, 90),即变成了center格式的,大小为预设框大小的框 anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1) # zero center anchors # 将预设框格式转换为corner的格式与gt的格式对齐 shift_anchor_boxes = self.bbox2corner(anchor_boxes) # 求取anchor 与 gt box的 iou 值 ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2)) # real value is required to process, convert to Numpy # 得到每个gt box与哪一个预设框匹配的最好,也即iou最大 matches = ious.argmax(axis=1).asnumpy() # (B, M) # valid_gts是用来记录有效的box的信息,这里相当于一个mask值,对于在dataloader中为了batch同意而pad成-1的框,给出-1的mask值 valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1) # (B, M) np_gtx, np_gty, np_gtw, np_gth = [ x.asnumpy() for x in [gtx, gty, gtw, gth] ] np_anchors = all_anchors.asnumpy() np_gt_ids = gt_ids.asnumpy() np_gt_mixratios = gt_mixratio.asnumpy( ) if gt_mixratio is not None else None # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop # should not be a problem right now. Switch to better solution is needed. # 外循环:batch的大小,内循环:一张图片中框的匹配层数 # 这里的循环其实也说明在yolov3训练 for b in range(matches.shape[0]): for m in range(matches.shape[1]): # pad的过程中是向下增加pad,因此遇到第一个0时,就可跳出当前内循环,进去下一张图片 if valid_gts[b, m] < 1: break # 第b张图片的第m个框匹配的最佳anchor的索引 ,这里anchor的索引是从大到小 match = int(matches[b, m]) # 确切的得到这个框所匹配的anchor处于哪一层 nlayer = np.nonzero(num_anchors > match)[0][0] # 这里的xs是特征图的集合,这里用以在选择特征图后,提供特征图的高宽 height = xs[nlayer].shape[2] width = xs[nlayer].shape[3] # 得到当前框真实的(cx,cy,w,h),相对于原图上的坐标 gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0], np_gtw[b, m, 0], np_gth[b, m, 0]) # compute the location of the gt centers # 将目标框的cx, cy映射到对应anchor层的特征图的坐标 loc_x = int(gtx / orig_width * width) loc_y = int(gty / orig_height * height) # write back to targets # 获取框匹配的cell的索引 index = _offsets[nlayer] + loc_y * width + loc_x # 这里组成一个batch的标签的方法是,做一个B*Cell*Anchor*x ,这里的x针对不同的类别值不相同,例如对于center坐标,就是2 #获得了cx, cy的标签值,取值范围[0,1] center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x # tx center_targets[ b, index, match, 1] = gty / orig_height * height - loc_y # ty # 获得w,h的标签值 scale_targets[b, index, match, 0] = np.log( max(gtw, 1) / np_anchors[match, 0]) scale_targets[b, index, match, 1] = np.log( max(gth, 1) / np_anchors[match, 1]) # 这里是为了减小box大小对于loss的影响,在YOLOv1中使用的是预测根号w的方式,这里采用的是如下加权重的方式 weights[ b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height # 这里一般讲objectness的target值设置为1 # 这样的话,在没有使用mix_up的前提下,在这个target_generator中不同的anchor分为两类,iou最大匹配的设置为1,其他情况设置为0 objectness[b, index, match, 0] = (np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets # 最后对所有的标签做最后一次切分,得到B * (Cell*Anchor) * x 的格式 # (TO_DO:)这里的_slice方法的必要性,看的还不太明白 objectness = self._slice(objectness, num_anchors, num_offsets) center_targets = self._slice(center_targets, num_anchors, num_offsets) scale_targets = self._slice(scale_targets, num_anchors, num_offsets) weights = self._slice(weights, num_anchors, num_offsets) class_targets = self._slice(class_targets, num_anchors, num_offsets) return objectness, center_targets, scale_targets, weights, class_targets
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_norm, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # If consecutive sampling is used, in the same epoch, the hidden state # is initialized only at the beginning of the epoch. if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # If random sampling is used, the hidden state has to be # initialized for each mini-batch. if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs shape: (batch_size, vocab_size) if is_lstm: outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # Let t_ib_j be the j-th element of the mini-batch at time i. # label shape: (batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]. label = label.T.reshape((-1, )) # Concatenate outputs: # shape: (batch_size * num_steps, vocab_size). outputs = nd.concat(*outputs, dim=0) # Now outputs and label are aligned. loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_norm, ctx) SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Training perplexity %f" % (e, exp(train_loss / num_examples))) for seq in seqs: print( ' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm)) print()
def forward(self, input_data): ep1 = input_data[:, 0].astype(int).asnumpy().tolist() ep2 = input_data[:, 1].astype(int).asnumpy().tolist() input_data = input_data[:, 2:] x_sen = input_data[:, :DIMENSION * FIXED_WORD_LENGTH].reshape( (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION)) e1_start = DIMENSION * FIXED_WORD_LENGTH e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * WORD_DIMENSION].reshape( (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH, WORD_DIMENSION)) # (batch_size,20,10,100) e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * WORD_DIMENSION e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * WORD_DIMENSION].reshape( (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH, WORD_DIMENSION)) # (batch_size,20,10,100) conv_result = self.conv( x_sen.expand_dims(axis=1)) # (128, 230, 62, 1) NCHW be1_mask = nd.zeros(conv_result.shape, ctx=CTX) aes_mask = nd.zeros(conv_result.shape, ctx=CTX) be2_mask = nd.zeros(conv_result.shape, ctx=CTX) be1_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100) aes_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100) be2_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100) for i in range(x_sen.shape[0]): if ep1[i] == 0: ep1[i] += 1 ep2[i] += 1 be1_mask[i, :, :ep1[i], :] = 1 be1_pad[i, :, :ep1[i], :] = 0 aes_mask[i, :, ep1[i]:ep2[i], :] = 1 aes_pad[i, :, ep1[i]:ep2[i], :] = 0 be2_mask[i, :, ep2[i]:, :] = 1 be2_pad[i, :, ep2[i]:, :] = 0 be1 = conv_result * be1_mask aes = conv_result * aes_mask be2 = conv_result * be2_mask be1 = be1 + be1_pad aes = aes + aes_pad be2 = be2 + be2_pad o1 = self.pmp(be1) o2 = self.pmp(aes) o3 = self.pmp(be2) out = nd.concat(o1, o2, o3, dim=2) # (128, 230, 3, 1) h_sen = self.conv_out(out) # (128, 690) e1_infobox_list_all = nd.ones( (e1_infobox.shape[0], e1_infobox.shape[1], 42, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,42,1) e2_infobox_list_all = nd.ones( (e1_infobox.shape[0], e2_infobox.shape[1], 42, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,42,1) for i in range(e1_infobox.shape[0]): e1 = self.conv_info( x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e1_infobox[i].expand_dims(axis=1)) e1_p = self.pool_info(e1) # (1, 20, 42, 11) e1_infobox_list_all[i] = e1_p.reshape( (e1_p.shape[1], e1_p.shape[2], e1_p.shape[3])) e2 = self.conv_info( x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e2_infobox[i].expand_dims(axis=1)) e2_p = self.pool_info(e2) e2_infobox_list_all[i] = e2_p.reshape( (e2_p.shape[1], e2_p.shape[2], e2_p.shape[3])) g1 = nd.softmax(self.att_info(e1_infobox_list_all), axis=2) # (batch_size,INFOBOX_LENGTH,42,1) g2 = nd.softmax(self.att_info(e2_infobox_list_all), axis=2) # (batch_size,INFOBOX_LENGTH,42,1) g1_att = self.dense_info(g1 * e1_infobox_list_all) g2_att = self.dense_info(g2 * e2_infobox_list_all) # (batch_size,128) e_infobox_list_all_att = nd.concat(g1_att, g2_att, dim=1) # (batch_size,256) h_sen_infobox = nd.concat(h_sen, e_infobox_list_all_att, dim=1) y = self.output(h_sen_infobox) return y
def test_concat(): a = nd.array(np.ones((SMALL_Y, LARGE_X))) b = nd.array(np.zeros((SMALL_Y, LARGE_X))) c = nd.concat(a, b, dim=0) assert c.shape == (b.shape[0]*2, LARGE_X)
def concat_preds(preds): return nd.concat(*[flatten_pred(p) for p in preds], dim=1)
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) num_iters = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): if is_lstm: outputs, state_h, state_c = rnn(get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(X, vocab_size), state_h, *params) y = Y.T.reshape((-1, )) outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y).sum() / (batch_size * num_steps) l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) train_l_sum = train_l_sum + l num_iters += 1 if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / num_iters).exp().asscalar())) for prefix in prefixes: print( ' - ', predict_rnn(rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
import numpy as np import pandas as pd import sys sys.path.append("./modules/Model") sys.path.append("./modules/preprocessing") sys.path.append("./models") from Model import Model from load_data import train_X, train_Y, test_X # print(train_X.shape) # print(test_X.shape) # Change to your own model from demo import demo_model from zhan_yuan import ZY_model # demo_model.train(train_features=train_X, train_labels=train_Y, print_iter=True) # demo_model.export_predict(test_X) noised_train_X = train_X + nd.random.normal(0, 0.01, shape=train_X.shape) noised_train_Y = train_Y + nd.random.normal(0, 0.01, shape=train_Y.shape) aug_train_X = nd.concat(train_X, noised_train_X, dim=0) aug_train_Y = nd.concat(train_Y, noised_train_Y, dim=0) # ZY_model.train_k_fold_cv(train_features = aug_train_X, # train_labels = aug_train_Y, # force_reinit=False) ZY_model.train(aug_train_X, aug_train_Y) ZY_model.export_predict(test_X, path_="./submission_PY.csv")
def __getitem__(self, index): """the index is the video index in clip_list,read several frame from the index""" filename, label = self.clip_lst[index] if not os.path.exists(filename): print("the file not exist", filename) return None cthw_data = None nd_image_list = [] while len(nd_image_list) is 0: v = cv2.VideoCapture(filename) width = v.get(cv2.CAP_PROP_FRAME_WIDTH) height = v.get(cv2.CAP_PROP_FRAME_HEIGHT) length = v.get(cv2.CAP_PROP_FRAME_COUNT) assert self.crop_size <= width and self.crop_size <= height, '%d' length = int(length) if length < self.n_frame: logger.info("%s length %d <%d" % (filename, length, self.n_frame)) # the following operation will tail the last frame # set the sample begin frame id if not self.is_train: frame_st = 0 if length <= self.n_frame else int( (length - self.n_frame) // 2) else: frame_st = 0 if length <= self.n_frame else random.randrange( length - self.n_frame + 1) # set random crop position in single frame if self.is_train: row_st = random.randrange(self.scale_h - self.crop_size + 1) col_st = random.randrange(self.scale_w - self.crop_size + 1) else: row_st = int((self.scale_h - self.crop_size) / 2) col_st = int((self.scale_w - self.crop_size) / 2) # allocate the capacity to store image and jump to the position v.set(cv2.CAP_PROP_POS_FRAMES, frame_st) #start to read the following frames by current start position import ipdb #ipdb.set_trace() for frame_p in range(min(self.n_frame, length)): _, f = v.read() if f is not None: f = cv2.resize( f, (self.scale_w, self.scale_h)) #in dim of hwc f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) f = f[row_st:row_st + self.crop_size, col_st:col_st + self.crop_size, :] if self._transform: nd_image_list.append(self._transform( nd.array(f))) # the frame_p transform else: nd_image_list.clear() #clear the image_list break # after transform return CHW dim # replication the last frame if the length < self.n_frame current_length = len(nd_image_list) cthw_data = nd.stack(*nd_image_list, axis=1) #from CHW, to CTHW #tmp = nd.zeros(shape=(self.n_frame, self.crop_size, self.crop_size, 3), dtype='float32') if current_length < self.n_frame: #construct the last frame and concat extra_data = nd.tile(nd_image_list[-1], reps=(self.n_frame - current_length, 1, 1, 1)) extra_data = extra_data.transpose((1, 0, 2, 3)) cthw_data = nd.concat(cthw_data, extra_data, dim=1) return cthw_data, label
def forward(self, X): for block in self.net: Y = block(X) # 在通道维上将输入和输出连结 X = nd.concat(X, Y, dim=1) return X
def hybrid_forward(self, F, lagX, x2): out = nd.relu(self.bn1(self.fc1(x2))) out = self.bn2(self.fc2(out)) return nd.relu(nd.concat(lagX, out, dim=2))
def forward(self, inputs, target, next_word_history, cache_history, begin_state=None): # pylint: disable=arguments-differ """Defines the forward computation for cache cell. Arguments can be either :py:class:`NDArray` or :py:class:`Symbol`. Parameters ---------- inputs: NDArray The input data target: NDArray The label next_word_history: NDArray The next word in memory cache_history: NDArray The hidden state in cache history Returns -------- out: NDArray The linear interpolation of the cache language model with the regular word-level language model next_word_history: NDArray The next words to be kept in the memory for look up (size is equal to the window size) cache_history: NDArray The hidden states to be kept in the memory for look up (size is equal to the window size) """ output, hidden, encoder_hs, _ = \ super(self.lm_model.__class__, self.lm_model).\ forward(inputs, begin_state) encoder_h = encoder_hs[-1].reshape(-3, -2) output = output.reshape(-1, self._vocab_size) start_idx = len(next_word_history) \ if next_word_history is not None else 0 next_word_history = nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0) for t in target], dim=0) if next_word_history is None \ else nd.concat(next_word_history, nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0) for t in target], dim=0), dim=0) cache_history = encoder_h if cache_history is None \ else nd.concat(cache_history, encoder_h, dim=0) out = None softmax_output = nd.softmax(output) for idx, vocab_L in enumerate(softmax_output): joint_p = vocab_L if start_idx + idx > self._window: valid_next_word = next_word_history[start_idx + idx - self._window:start_idx + idx] valid_cache_history = cache_history[start_idx + idx - self._window:start_idx + idx] logits = nd.dot(valid_cache_history, encoder_h[idx]) cache_attn = nd.softmax(self._theta * logits).reshape(-1, 1) cache_dist = (cache_attn.broadcast_to(valid_next_word.shape) * valid_next_word).sum(axis=0) joint_p = self._lambdas * cache_dist + (1 - self._lambdas) * vocab_L out = joint_p[target[idx]] if out is None \ else nd.concat(out, joint_p[target[idx]], dim=0) next_word_history = next_word_history[-self._window:] cache_history = cache_history[-self._window:] return out, next_word_history, cache_history, hidden
from mxnet import nd import random import zipfile X, W_xh = nd.random.uniform(shape=(3, 1)), nd.random.uniform(shape=(1, 4)) H, W_hh = nd.random.uniform(shape=(3, 4)), nd.random.uniform(shape=(4, 4)) print(nd.dot(X, W_xh) + nd.dot(H, W_hh)) print(nd.dot(nd.concat(X, H, dim=1), nd.concat(W_xh, W_hh, dim=0))) # 读取歌词文件,6w字 with zipfile.ZipFile('jaychou_lyrics.txt.zip') as zin: # 打开压缩文件jaychou_lyrics.txt.zip with zin.open('jaychou_lyrics.txt') as fd: # 获取里面的一个叫jaychou_lyrics.txt的文件 corpus_chars = fd.read().decode('utf-8') # utf-8解码 # print(corpus_chars[:40]) # 展示前40个字 # 用前1w字训练模型 corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ') corpus_chars = corpus_chars[0:10000] #print(corpus_chars) # 建立字符索引, 索引到字 idx_to_char = list(set(corpus_chars)) # set创建无序不重复序列并转成列表,这里共有1027个不同的字 # 创建字典(char<->idx), 字到索引 char_to_idx = dict([(char, i) for i,char in enumerate(idx_to_char)]) # 列表解析,要求[]括起来, i,char不能换 print(len(char_to_idx)) # 1027 # 歌词的对应的index
def fit(self, num_steps=1): """ Fit the models Returns: Loss Functions (Q1-mse, Q2-mse, alpha-entropy, Policy-kl) """ logger_data = {k: [] for k in ["LossPi", "LossQ1", "LossQ2", "LossV"]} for step in range(num_steps): # sample a batch from memory minibatch = self.memory.sample(self.batch_size) obs = nd.array(minibatch["obs"], self.ctx) acts = nd.array(minibatch["act"], self.ctx) rewards = nd.array(minibatch["rew"], self.ctx) next_obs = nd.array(minibatch["next_obs"], self.ctx) nonterm = nd.array(minibatch["nt"], self.ctx) lr = self.lr(self.steps) * self.lrmult # update the policy function with autograd.record(): _mu, _pi, _logp_pi = self.policy(obs) _obspi = nd.concat(obs, _pi, dim=-1) _q1_pi = self.qfn1(_obspi) pi_loss = nd.mean(self.alpha * _logp_pi - _q1_pi) pi_loss.backward() self.mu.update(lr) self.logstd.update(lr) self.policy_base.update(lr) # update the value functions logp_pi = nd.stop_gradient(_logp_pi) obspi = nd.stop_gradient(_obspi) obsact = nd.concat(obs, acts, dim=-1) q1_pi = self.qfn1(obspi) q2_pi = self.qfn2(obspi) min_q_pi = nd.minimum(q1_pi, q2_pi) v_targ = self.vfn_targ(next_obs) q_backup = nd.stop_gradient(rewards + self.gamma * nonterm * v_targ) v_backup = nd.stop_gradient(min_q_pi - self.alpha * logp_pi) with autograd.record(): _q1 = self.qfn1(obsact) _q2 = self.qfn2(obsact) _v = self.vfn(obs) q1_loss = 0.5 * nd.mean(nd.square(q_backup - _q1)) q2_loss = 0.5 * nd.mean(nd.square(q_backup - _q2)) v_loss = 0.5 * nd.mean(nd.square(v_backup - _v)) total_loss = q1_loss + q2_loss + v_loss total_loss.backward() self.qfn1.update(lr) self.qfn2.update(lr) self.vfn.update(lr) # update the target network for i in range(len(self.vfn.weights)): self.vfn_targ.weights[i][:] = \ self.polyak * self.vfn_targ.weights[i][:] + \ (1 - self.polyak) * self.vfn.weights[i][:] logger_data["LossPi"].append(pi_loss.asnumpy()[0]) logger_data["LossQ1"].append(q1_loss.asnumpy()[0]) logger_data["LossQ2"].append(q2_loss.asnumpy()[0]) logger_data["LossV"].append(v_loss.asnumpy()[0]) return logger_data
def forward(self, x): p1 = self.p1_conv_1(x) p2 = self.p2_conv_3(self.p2_conv_1(x)) p3 = self.p3_conv_5(self.p3_conv_1(x)) p4 = self.p4_conv_1(self.p4_pool_3(x)) return nd.concat(p1, p2, p3, p4, dim=1)
features = nd.random.normal(shape=(n_train + n_test, 1)) # features x # nd.power(features, 2) x**2 平方操作 # nd.power(features, 3) x**3 三次方操作 #---------------------------------------------------------- # <NDArray 200x3 @cpu(0)> # 1 1 1 => 3 # 200 [] 200[] 200[] => 200[] #---------------------------------------------------------- # poly features 聚合特点,特征,容貌 # -------------------- # 生成的X X**2 X**3 # -------------------- poly_features = nd.concat(features, nd.power(features, 2), nd.power(features, 3)) # y = 1.2x − 3.4x**2 + 5.6x**3 + 5 + ϵ # 此处用了广播 # true_w[0] 1.2 poly_features[:, 0](<NDArray 200 @cpu(0)>) # true_w[1] -3.4 poly_features[:, 1](<NDArray 200 @cpu(0)>) # true_w[2] 5.6 poly_features[:, 2](<NDArray 200 @cpu(0)>) # true_b 5 # # [200] * [200] + [200] * [200] + [200] * [200] + 5 (广播) == [200] # labels 标签 # ---------- # 生成的Y # ----------
return features batch_size = 64 data_iter_224 = gluon.data.DataLoader(gluon.data.ArrayDataset(X_224), batch_size=batch_size) data_iter_299 = gluon.data.DataLoader(gluon.data.ArrayDataset(X_299), batch_size=batch_size) model_names = ['inceptionv3', 'resnet152_v1'] features = [] import pickle as pkl for model_name in model_names: if model_name == 'inceptionv3': features.append(get_features(model_name, data_iter_299)) print("Done inceptionv3") data111 = pkl.dumps(features) # else: # features.append(get_features(model_name, data_iter_224)) # print("Done resnet152_v1") # data222 = pkl.dumps(features) features = nd.concat(*features, dim=1) import pickle as pkl data333 = pkl.dumps(features) pkl.dump(data333, open('tmp.pickle', 'wb'))
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): # 如使用相邻采样,隐藏变量只需在该 epoch 开始时初始化。 if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): # 如使用随机采样,读取每个随机小批量前都需要初始化隐藏变量。 if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) # 如使用相邻采样,需要使用 detach 函数从计算图分离隐藏状态变量。 else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): # outputs 形状:(batch_size, vocab_size)。 if is_lstm: outputs, state_h, state_c = rnn(get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(X, vocab_size), state_h, *params) # 设 t_ib_j 为时间步 i 批量中的元素 j: # y 形状:(batch_size * num_steps,) # y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]。 y = Y.T.reshape((-1, )) # 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。 outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() # 裁剪梯度。 grad_clipping(params, state_h, Y, clipping_theta, ctx) gb.sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print( ' - ', predict_rnn(rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def store_samples(self, data, y, query_network, store_prob, context): if not (self.memory_replacement_strategy == "no_replacement" and self.max_stored_samples != -1 and self.key_memory.shape[0] >= self.max_stored_samples): num_pus = len(data) sub_batch_sizes = [data[i][0][0].shape[0] for i in range(num_pus)] num_inputs = len(data[0][0]) num_outputs = len(y) mx_context = context[0] if len(self.key_memory) == 0: self.key_memory = nd.empty(0, ctx=mx.cpu()) self.value_memory = [] self.label_memory = [ ] #nd.empty((num_outputs, 0), ctx=mx.cpu()) ind = [ nd.sample_multinomial( store_prob, sub_batch_sizes[i]).as_in_context(mx_context) for i in range(num_pus) ] max_inds = [nd.max(ind[i]) for i in range(num_pus)] if any(max_inds): to_store_values = [] for i in range(num_inputs): tmp_values = [] for j in range(0, num_pus): if max_inds[j]: if isinstance(tmp_values, list): tmp_values = nd.contrib.boolean_mask( data[j][0][i].as_in_context(mx_context), ind[j]) else: tmp_values = nd.concat( tmp_values, nd.contrib.boolean_mask( data[j][0][i].as_in_context( mx_context), ind[j]), dim=0) to_store_values.append(tmp_values) to_store_labels = [] for i in range(num_outputs): tmp_labels = [] for j in range(0, num_pus): if max_inds[j]: if isinstance(tmp_labels, list): tmp_labels = nd.contrib.boolean_mask( y[i][j].as_in_context(mx_context), ind[j]) else: tmp_labels = nd.concat( tmp_labels, nd.contrib.boolean_mask( y[i][j].as_in_context(mx_context), ind[j]), dim=0) to_store_labels.append(tmp_labels) to_store_keys = query_network( *to_store_values[0:self.query_net_num_inputs]) if self.key_memory.shape[0] == 0: self.key_memory = to_store_keys.as_in_context(mx.cpu()) for i in range(num_inputs): self.value_memory.append( to_store_values[i].as_in_context(mx.cpu())) for i in range(num_outputs): self.label_memory.append( to_store_labels[i].as_in_context(mx.cpu())) elif self.memory_replacement_strategy == "replace_oldest" and self.max_stored_samples != -1 and self.key_memory.shape[ 0] >= self.max_stored_samples: num_to_store = to_store_keys.shape[0] self.key_memory = nd.concat(self.key_memory[num_to_store:], to_store_keys.as_in_context( mx.cpu()), dim=0) for i in range(num_inputs): self.value_memory[i] = nd.concat( self.value_memory[i][num_to_store:], to_store_values[i].as_in_context(mx.cpu()), dim=0) for i in range(num_outputs): self.label_memory[i] = nd.concat( self.label_memory[i][num_to_store:], to_store_labels[i].as_in_context(mx.cpu()), dim=0) else: self.key_memory = nd.concat(self.key_memory, to_store_keys.as_in_context( mx.cpu()), dim=0) for i in range(num_inputs): self.value_memory[i] = nd.concat( self.value_memory[i], to_store_values[i].as_in_context(mx.cpu()), dim=0) for i in range(num_outputs): self.label_memory[i] = nd.concat( self.label_memory[i], to_store_labels[i].as_in_context(mx.cpu()), dim=0)
def concatenate(tensors, axis): return nd.concat(*tensors, dim=axis)
def forward(self, x): p1 = self.p1_1(x) p2 = self.p2_2(self.p2_1(x)) p3 = self.p3_2(self.p3_1(x)) p4 = self.p4_2(self.p4_1(x)) return nd.concat(p1, p2, p3, p4, dim=1) # 在通道维上连结输出。
def forward(self, inpt): fwd = self._lstm_fwd(inpt) bwd_inpt = nd.flip(inpt, 0) bwd = self._lstm_bwd(bwd_inpt) bwd = nd.flip(bwd, 0) return nd.concat(fwd, bwd, dim=2)
def main(): ctx = mx.gpu() batch_size = 100 num_inputs = 784 num_outputs = 10 # Get MNIST Data def transform(data, label): return data.astype(np.float32) / 255, label.astype(np.float32) train_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST( train=True, transform=transform), batch_size, shuffle=True) test_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST( train=False, transform=transform), batch_size, shuffle=False) train_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST( train=True, transform=transform), batch_size, shuffle=True) test_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST( train=False, transform=transform), batch_size, shuffle=False) net_siamese = gluon.nn.Sequential() with net_siamese.name_scope(): net_siamese.add(gluon.nn.Dense(256, activation='relu')) net_siamese.add(gluon.nn.Dense(128, activation='relu')) net_out = gluon.nn.Sequential() with net_out.name_scope(): net_out.add(gluon.nn.Dense(128, activation='relu')) net_out.add(gluon.nn.Dense(64, activation='relu')) net_out.add(gluon.nn.Dense(num_outputs)) net_siamese.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx) net_out.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer_siamese = gluon.Trainer(net_siamese.collect_params(), 'sgd', {'learning_rate': 0.05}) trainer_out = gluon.Trainer(net_out.collect_params(), 'sgd', {'learning_rate': 0.05}) def evaluate_accuracy(data_iterator1, data_iterator2, net): acc = mx.metric.Accuracy() for i, ((data1, label1), (data2, label2)) in enumerate(zip(data_iterator1, data_iterator2)): data1 = data1.as_in_context(ctx).reshape((-1, 784)) data2 = data2.as_in_context(ctx).reshape((-1, 784)) label1 = label1.as_in_context(ctx) inter1 = net_siamese(data1) inter2 = net_siamese(data2) output = net_out(nd.concat(inter1, inter2)) acc.update([label1], [output]) return acc.get() epochs = 4 moving_loss = 0. smoothing_constant = .01 metric = mx.metric.Accuracy() print("\n#### Shared+Module1 Training ####") for e in range(epochs): metric.reset() # Train Branch with mod1 on dataset 1 for i, ((data1, label1), (data2, label2)) in enumerate(zip(train_data1, train_data2)): data1 = data1.as_in_context(ctx).reshape((-1, 784)) data2 = data2.as_in_context(ctx).reshape((-1, 784)) label1 = label1.as_in_context(ctx) with autograd.record(): inter1 = net_siamese(data1) inter2 = net_siamese(data2) output = net_out(nd.concat(inter1, inter2)) loss = softmax_cross_entropy(output, label1) loss.backward() trainer_siamese.step(batch_size) trainer_out.step(batch_size) metric.update([label1], [output]) curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss) if i % 100 == 0 and i > 0: name, acc = metric.get() print('[Epoch %d Batch %d] Loss: %s Training: %s=%f' % (e, i, moving_loss, name, acc)) _, train_accuracy = metric.get() _, test_accuracy = evaluate_accuracy( test_data1, test_data2, lambda x, y: net_out(nd.concat(net_siamese(x), net_siamese(y)))) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\n" % (e, moving_loss, train_accuracy, test_accuracy))
def forward(self, input_data): x_sen = input_data[:, :DIMENSION * FIXED_WORD_LENGTH].reshape( (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION)) e1_kernel_num = input_data[:, DIMENSION * FIXED_WORD_LENGTH] e2_kernel_num = input_data[:, DIMENSION * FIXED_WORD_LENGTH + 1] e1_size = input_data[:, DIMENSION * FIXED_WORD_LENGTH + 2:DIMENSION * FIXED_WORD_LENGTH + 2 + INFOBOX_LENGTH] e2_size = input_data[:, DIMENSION * FIXED_WORD_LENGTH + 2 + INFOBOX_LENGTH:DIMENSION * FIXED_WORD_LENGTH + 2 + INFOBOX_LENGTH * 2] e1_start = DIMENSION * FIXED_WORD_LENGTH + 2 + INFOBOX_LENGTH * 2 e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape( (input_data.shape[0], -1, DIMENSION)) # (batch_size, word_num, 100) e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape( (input_data.shape[0], -1, DIMENSION)) # (batch_size, word_num, 100) h_sen = self.lstm(x_sen) # (batch_size,60,128) e1_infobox_list_all = nd.zeros( (e1_infobox.shape[0], e1_infobox.shape[1], 60, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,60,1) e2_infobox_list_all = nd.zeros( (e2_infobox.shape[0], e2_infobox.shape[1], 60, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,60,1) for i in range(e1_infobox.shape[0]): base = 0 for j in range(int(e1_kernel_num.asnumpy().item(0))): w = int(e1_size[i, j].asnumpy().item(0)) if w == 0: continue kernel = e1_infobox[i, base:base + w, :].reshape( (1, 1, w, DIMENSION)) base += w e1 = self.conv1(x_sen[i].reshape( (1, 1, FIXED_WORD_LENGTH, DIMENSION)), kernel) # (1, 1, 59, 1) e1_infobox_list_all[ i, :e1.shape[1], :e1.shape[2], :e1.shape[3]] = e1.reshape( (e1.shape[1], e1.shape[2], e1.shape[3])) for i in range(e2_infobox.shape[0]): base = 0 for j in range(int(e2_kernel_num.asnumpy().item(0))): w = int(e2_size[i, j].asnumpy().item(0)) if w == 0: continue kernel = e2_infobox[i, base:base + w, :].reshape( (1, 1, w, DIMENSION)) base += w e2 = self.conv2(x_sen[i].reshape( (1, 1, FIXED_WORD_LENGTH, DIMENSION)), kernel) # (1, 1, 59, 1) e2_infobox_list_all[ i, :e2.shape[1], :e2.shape[2], :e2.shape[3]] = e2.reshape( (e2.shape[1], e2.shape[2], e2.shape[3])) # e1 = self.conv1(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e1_infobox[i].expand_dims(axis=1)) # # e1_p = self.pool(e1) # e1_infobox_list_all[i] = e1.reshape((e1.shape[1], e1.shape[2], e1.shape[3])) # e2 = self.conv2(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e2_infobox[i].expand_dims(axis=1)) # # e2_p = self.pool(e2) # e2_infobox_list_all[i] = e2.reshape((e2.shape[1], e2.shape[2], e2.shape[3])) e1_infobox_list_all = e1_infobox_list_all.reshape( (e1_infobox.shape[0], e1_infobox.shape[1], -1)) # (batch_size,INFOBOX_LENGTH,51) e2_infobox_list_all = e2_infobox_list_all.reshape( (e2_infobox.shape[0], e2_infobox.shape[1], -1)) # (batch_size,INFOBOX_LENGTH,51) e1_infobox_list_all_new = self.dense1(e1_infobox_list_all) e2_infobox_list_all_new = self.dense2(e2_infobox_list_all) # g1 = nd.softmax(self.att(e1_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1) # g2 = nd.softmax(self.att(e2_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1) # g1_att = nd.batch_dot(nd.transpose(g1,axes = (0,2,1)),e1_infobox_list_all) # (batch_size,1,64) # g2_att = nd.batch_dot(nd.transpose(g2,axes = (0,2,1)),e2_infobox_list_all) # (batch_size,1,64) # g1_att = g1_att.reshape((g1_att.shape[0],-1)) # (batch_size,64) # g2_att = g2_att.reshape((g2_att.shape[0],-1)) # (batch_size,64) # (batch_size,128) e_infobox_list_all_att = nd.concat(e1_infobox_list_all_new, e2_infobox_list_all_new, dim=1) h_sen_new = self.lstm_out(h_sen.expand_dims(1)) h_sen_new = h_sen_new.reshape( (h_sen_new.shape[0], -1)) # (batch_size,128) # (batch_size,256) h_sen_infobox = nd.concat(h_sen_new, e_infobox_list_all_att, dim=1) y = self.output(h_sen_infobox) return y
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ is_train = autograd.is_training() def flatten_numpy(ndarray): """Flatten nd-array to 1-d column vector Parameters ---------- ndarray : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(ndarray, (-1,), 'F') batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head) dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape( seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: loss = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: return arc_accuracy, rel_accuracy, overall_accuracy, loss outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs
def forward(self, is_train, req, in_data, out_data, aux): # im_info.shape(batch_size, 3) rpn_cls_score = in_data[0] gt_boxes = in_data[1] im_info = in_data[2] base_anchors = in_data[3] feat_stride = in_data[4] allowed_border = in_data[5] ctx = rpn_cls_score.context batch_size = rpn_cls_score.shape[0] feat_height, feat_width = rpn_cls_score.shape[-2:] A = base_anchors.shape[0] K = feat_height * feat_width N = K * A # generate anchors shifts shift_x = (nd.arange(0, feat_width, ctx=ctx) * feat_stride). \ broadcast_to((feat_height, feat_width)).reshape(K) shift_y = (nd.arange(0, feat_height, ctx=ctx) * feat_stride). \ reshape(feat_height, 1).broadcast_to((feat_height, feat_width)).reshape(K) # add A anchors (1, A, 4) to cell K shifts (K, 1, 4) to get shift anchors (K, A, 4) # then reshape and broadcast to (batch_size, K*A, 4) shifted anchors shifts = nd.stack(shift_x, shift_y, shift_x, shift_y, axis=-1).reshape(K, 1, 4) all_anchors = (base_anchors.reshape((1, A, 4)) + shifts).reshape(1, N, 4) \ .broadcast_to((batch_size, N, 4)) # keep only inside anchors, set outside anchors coordinate = (-1, -1, -1, -1) inside_bool_mask = (all_anchors[:, :, 0] >= -allowed_border) * \ (all_anchors[:, :, 1] >= -allowed_border) * \ (all_anchors[:, :, 2] < (im_info[:, 1] + allowed_border).reshape(0, 1)) * \ (all_anchors[:, :, 3] < (im_info[:, 0] + allowed_border).reshape(0, 1)) all_anchors[:] = inside_bool_mask.reshape(batch_size, -1, 1) * (all_anchors + 1) - 1 overlaps = bbox_overlaps(gt_boxes, all_anchors) # get max iou anchor for each gt_boxes gt_max_overlaps = overlaps.max(axis=2) gt_argmax_overlaps = overlaps.argmax(axis=2) # get max iou for each anchors max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) # set positive anchor label=1, other=0 labels = max_overlaps >= self._positive_iou_th # set neither positive nor negative anchor label = -1 labels[:] = labels - ((max_overlaps > self._negative_iou_th) * (max_overlaps < self._positive_iou_th)) # set max iou anchor for each gt_boxes label >= 1 (<=3) and ignore padded gt_box batch_idx = nd.arange(batch_size, ctx=ctx).reshape(-1, 1) labels[batch_idx, gt_argmax_overlaps] = labels[ batch_idx, gt_argmax_overlaps] + 2 * (gt_max_overlaps > 0) # set outside anchor label <= -1 # then remain label=0 is negative samples labels[:] = labels - 4 * (1 - inside_bool_mask) # clip label values to -1, 0, 1 labels[:] = nd.clip(labels, -1, 1) # random choice labels labels_with_idx = nd.concat(labels.transpose(), nd.arange(N, ctx=ctx).reshape(-1, 1), dim=1) # column 0:batch_size is label, column batch_size is labels original index rand_labels_with_idx = nd.random.shuffle(labels_with_idx) # may include some bg_label if labels==1 num < num_fg fg_rand_labels_idx = rand_labels_with_idx[:, :batch_size].argsort( axis=0, is_ascend=0)[:self._rpn_fg_num] # use abs() to invert all label=-1, so that label=0 will at top after ascend sort abs_rand_labels = nd.abs(rand_labels_with_idx[:, :batch_size]) # set fg_label=-1 to let it at top after ascend sort abs_rand_labels[fg_rand_labels_idx, batch_idx.transpose()] = -1 # select rand labels idx that will be excluded exclude_rand_labels_idx = abs_rand_labels.argsort( axis=0, is_ascend=1)[self._rpn_batch_size:] # get original label index exclude_labels_idx = rand_labels_with_idx[exclude_rand_labels_idx, batch_size] # set exclude label = -1 labels[batch_idx, exclude_labels_idx.transpose()] = -1 # assign gt_boxes to anchor, anchor box_target is its max iou gt_box bbox_targets = nd.empty((batch_size, N, 4), ctx=ctx) bbox_targets[:] = bbox_transform( all_anchors, gt_boxes[batch_idx, argmax_overlaps, :4]) labels = labels.reshape((batch_size, feat_height, feat_width, A)).transpose(axes=(0, 3, 1, 2)) labels = labels.reshape((batch_size, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (batch_size, feat_height, feat_width, A, 4)).transpose(axes=(0, 4, 3, 1, 2)) bbox_targets = bbox_targets.reshape( (batch_size, 4, A * feat_height * feat_width)) out_data[0][:] = labels out_data[1][:] = bbox_targets
def forward(self, X): return nd.concat(*X, dim=self._concat_dim)
def forward(self, enc1, enc2): x = nd.concat(enc1, enc2) x = self.dense(x) x = nd.log_softmax(x) return x
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_norm, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # If consecutive sampling is used, in the same epoch, the hidden state # is initialized only at the beginning of the epoch. if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # If random sampling is used, the hidden state has to be # initialized for each mini-batch. if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs shape: (batch_size, vocab_size) if is_lstm: outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # Let t_ib_j be the j-th element of the mini-batch at time i. # label shape: (batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]. label = label.T.reshape((-1,)) # Concatenate outputs: # shape: (batch_size * num_steps, vocab_size). outputs = nd.concat(*outputs, dim=0) # Now outputs and label are aligned. loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_norm, ctx) SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Training perplexity %f" % (e, exp(train_loss/num_examples))) for seq in seqs: print(' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm)) print()
def forward(self, X): for blk in self.net: Y = blk(X) X = nd.concat(X, Y, dim = 1) return Y
def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None): """Generating training targets that do not require network predictions. Parameters ---------- img : mxnet.nd.NDArray Original image tensor. xs : list of mxnet.nd.NDArray List of feature maps. anchors : mxnet.nd.NDArray YOLO3 anchors. offsets : mxnet.nd.NDArray Pre-generated x and y offsets for YOLO3. gt_boxes : mxnet.nd.NDArray Ground-truth boxes. gt_ids : mxnet.nd.NDArray Ground-truth IDs. gt_mixratio : mxnet.nd.NDArray, optional Mixup ratio from 0 to 1. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ assert isinstance(anchors, (list, tuple)) all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0) assert isinstance(offsets, (list, tuple)) all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0) num_anchors = np.cumsum([a.size // 2 for a in anchors]) num_offsets = np.cumsum([o.size // 2 for o in offsets]) _offsets = [0] + num_offsets.tolist() assert isinstance(xs, (list, tuple)) assert len(xs) == len(anchors) == len(offsets) # orig image size orig_height = img.shape[2] orig_width = img.shape[3] with autograd.pause(): # outputs shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape( (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0) center_targets = nd.zeros_like(shape_like) scale_targets = nd.zeros_like(center_targets) weights = nd.zeros_like(center_targets) objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0]) class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class) class_targets[:] = -1 # prefill -1 for ignores # for each ground-truth, find the best matching anchor within the particular grid # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map # then only the anchor in (3, 4) is going to be matched gtx, gty, gtw, gth = self.bbox2center(gt_boxes) shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1) anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1) # zero center anchors shift_anchor_boxes = self.bbox2corner(anchor_boxes) ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2)) # real value is required to process, convert to Numpy matches = ious.argmax(axis=1).asnumpy() # (B, M) valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1) # (B, M) np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]] np_anchors = all_anchors.asnumpy() np_gt_ids = gt_ids.asnumpy() np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop # should not be a problem right now. Switch to better solution is needed. for b in range(matches.shape[0]): for m in range(matches.shape[1]): if valid_gts[b, m] < 1: break match = int(matches[b, m]) nlayer = np.nonzero(num_anchors > match)[0][0] height = xs[nlayer].shape[2] width = xs[nlayer].shape[3] gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0], np_gtw[b, m, 0], np_gth[b, m, 0]) # compute the location of the gt centers loc_x = int(gtx / orig_width * width) loc_y = int(gty / orig_height * height) # write back to targets index = _offsets[nlayer] + loc_y * width + loc_x center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x # tx center_targets[b, index, match, 1] = gty / orig_height * height - loc_y # ty scale_targets[b, index, match, 0] = np.log(max(gtw, 1) / np_anchors[match, 0]) scale_targets[b, index, match, 1] = np.log(max(gth, 1) / np_anchors[match, 1]) weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height objectness[b, index, match, 0] = ( np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets objectness = self._slice(objectness, num_anchors, num_offsets) center_targets = self._slice(center_targets, num_anchors, num_offsets) scale_targets = self._slice(scale_targets, num_anchors, num_offsets) weights = self._slice(weights, num_anchors, num_offsets) class_targets = self._slice(class_targets, num_anchors, num_offsets) return objectness, center_targets, scale_targets, weights, class_targets
def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None): """Generating training targets that do not require network predictions. Parameters ---------- img : mxnet.nd.NDArray Original image tensor. xs : list of mxnet.nd.NDArray List of feature maps. anchors : mxnet.nd.NDArray YOLO3 anchors. offsets : mxnet.nd.NDArray Pre-generated x and y offsets for YOLO3. gt_boxes : mxnet.nd.NDArray Ground-truth boxes. gt_ids : mxnet.nd.NDArray Ground-truth IDs. gt_mixratio : mxnet.nd.NDArray, optional Mixup ratio from 0 to 1. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ assert isinstance(anchors, (list, tuple)) all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0) assert isinstance(offsets, (list, tuple)) all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0) num_anchors = np.cumsum([a.size // 2 for a in anchors]) num_offsets = np.cumsum([o.size // 2 for o in offsets]) _offsets = [0] + num_offsets.tolist() assert isinstance(xs, (list, tuple)) assert len(xs) == len(anchors) == len(offsets) # orig image size orig_height = img.shape[2] orig_width = img.shape[3] with autograd.pause(): # outputs shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape( (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0) center_targets = nd.zeros_like(shape_like) scale_targets = nd.zeros_like(center_targets) weights = nd.zeros_like(center_targets) objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0]) class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class) class_targets[:] = -1 # prefill -1 for ignores # for each ground-truth, find the best matching anchor within the particular grid # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map # then only the anchor in (3, 4) is going to be matched gtx, gty, gtw, gth = self.bbox2center(gt_boxes) shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1) anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1) # zero center anchors shift_anchor_boxes = self.bbox2corner(anchor_boxes) ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2)) # real value is required to process, convert to Numpy matches = ious.argmax(axis=1).asnumpy() # (B, M) valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1) # (B, M) np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]] np_anchors = all_anchors.asnumpy() np_gt_ids = gt_ids.asnumpy() np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop # should not be a problem right now. Switch to better solution is needed. for b in range(matches.shape[0]): for m in range(matches.shape[1]): if valid_gts[b, m] < 1: break match = int(matches[b, m]) nlayer = np.nonzero(num_anchors > match)[0][0] height = xs[nlayer].shape[2] width = xs[nlayer].shape[3] gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0], np_gtw[b, m, 0], np_gth[b, m, 0]) # compute the location of the gt centers loc_x = int(gtx / orig_width * width) loc_y = int(gty / orig_height * height) # write back to targets index = _offsets[nlayer] + loc_y * width + loc_x center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x # tx center_targets[b, index, match, 1] = gty / orig_height * height - loc_y # ty scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0]) scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1]) weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height objectness[b, index, match, 0] = ( np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets objectness = self._slice(objectness, num_anchors, num_offsets) center_targets = self._slice(center_targets, num_anchors, num_offsets) scale_targets = self._slice(scale_targets, num_anchors, num_offsets) weights = self._slice(weights, num_anchors, num_offsets) class_targets = self._slice(class_targets, num_anchors, num_offsets) return objectness, center_targets, scale_targets, weights, class_targets