Esempio n. 1
0
def validate(val_data, val_dataset, net, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric.reset()

    from tqdm import tqdm
    for batch in tqdm(val_data):
        data, scale, center, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
        if opt.flip_test:
            data_flip = [nd.flip(X, axis=3) for X in data]
            outputs_flip = [net(X) for X in data_flip]
            outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip]
            outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)]

        if len(outputs) > 1:
            outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0)
        else:
            outputs_stack = outputs[0].as_in_context(mx.cpu())

        preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy())
        val_metric.update(preds, maxvals, score, imgid)

    res = val_metric.get()
    return
Esempio n. 2
0
def get_final_preds(batch_heatmaps, center, scale):
    coords, maxvals = get_max_pred(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = batch_heatmaps[n][p]
            px = int(nd.floor(coords[n][p][0] + 0.5).asscalar())
            py = int(nd.floor(coords[n][p][1] + 0.5).asscalar())
            if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
                diff = nd.concat(hm[py][px+1] - hm[py][px-1],
                                 hm[py+1][px] - hm[py-1][px],
                                 dim=0)
                coords[n][p] += nd.sign(diff) * .25

    preds = nd.zeros_like(coords)

    # Transform back
    for i in range(coords.shape[0]):
        preds[i] = transform_preds(coords[i], center[i], scale[i],
                                   [heatmap_width, heatmap_height])

    return preds, maxvals
Esempio n. 3
0
    def forward(self, x):
        path_1 = self.path_1_conv_1(x)
        path_2 = self.path_2_conv_3(self.path_2_conv_1(x))
        path_3 = self.path_3_conv_5(self.path_3_conv_1(x))
        path_4 = self.path_4_conv_1(self.path_4_pool_3(x))

        return nd.concat(path_1, path_2, path_3, path_4, dim=1)
Esempio n. 4
0
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          corpus_indices, vocab, ctx, is_random_iter,
                          num_epochs, num_steps, lr, clipping_theta,
                          batch_size, prefixes):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter_fn = data_iter_random
    else:
        data_iter_fn = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()
    start = time.time()
    for epoch in range(1, num_epochs+1):
        if not is_random_iter:
            # If adjacent sampling is used, the hidden state is initialized
            # at the beginning of the epoch
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        l_sum, n = 0.0, 0
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        for X, Y in data_iter:
            if is_random_iter:
                # If random sampling is used, the hidden state is initialized
                # before each mini-batch update
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:
                # Otherwise, the detach function needs to be used to separate
                # the hidden state from the computational graph to avoid
                # backpropagation beyond the current sample
                for s in state:
                    s.detach()
            with autograd.record():
                inputs = to_onehot(X, len(vocab))
                # outputs is num_steps terms of shape (batch_size, len(vocab))
                (outputs, state) = rnn(inputs, state, params)
                # After stitching it is (num_steps * batch_size, len(vocab))
                outputs = nd.concat(*outputs, dim=0)
                # The shape of Y is (batch_size, num_steps), and then becomes
                # a vector with a length of batch * num_steps after
                # transposition. This gives it a one-to-one correspondence
                # with output rows
                y = Y.T.reshape((-1,))
                # Average classification error via cross entropy loss
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)  # Clip the gradient
            sgd(params, lr, 1)
            # Since the error is the mean, no need to average gradients here
            l_sum += l.asscalar() * y.size
            n += y.size
        if epoch % (num_epochs // 4) == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch, math.exp(l_sum / n), time.time() - start))
            start = time.time()
        if epoch % (num_epochs // 2) == 0:
            for prefix in prefixes:
                print(' -',  predict_rnn(prefix, 50, rnn, params,
                                         init_rnn_state, num_hiddens,
                                         vocab, ctx))
    def _load_embedding_serialized(self, pretrained_file_path):
        """Load embedding vectors from a pre-trained token embedding file.

        For every unknown token, if its representation `self.unknown_token` is encountered in the
        pre-trained token embedding file, index 0 of `self.idx_to_vec` maps to the pre-trained token
        embedding vector loaded from the file; otherwise, index 0 of `self.idx_to_vec` maps to the
        text embedding vector initialized by `self._init_unknown_vec`.

        ValueError is raised if a token occurs multiple times.
        """

        deserialized_embedding = TokenEmbedding.deserialize(pretrained_file_path)
        if deserialized_embedding.unknown_token:
            # Some .npz files on S3 may contain an unknown token and its
            # respective embedding. As a workaround, we assume that C.UNK_IDX
            # is the same now as it was when the .npz was generated. Under this
            # assumption we can safely overwrite the respective token and
            # vector from the npz.
            if deserialized_embedding.unknown_token:
                idx_to_token = deserialized_embedding.idx_to_token
                idx_to_vec = deserialized_embedding.idx_to_vec
                idx_to_token[C.UNK_IDX] = self.unknown_token
                if self._init_unknown_vec:
                    vec_len = idx_to_vec.shape[1]
                    idx_to_vec[C.UNK_IDX] = self._init_unknown_vec(shape=vec_len)
            else:
                # If the TokenEmbedding shall not have an unknown token, we
                # just delete the one in the npz.
                assert C.UNK_IDX == 0
                idx_to_token = deserialized_embedding.idx_to_token[C.UNK_IDX + 1:]
                idx_to_vec = deserialized_embedding.idx_to_vec[C.UNK_IDX + 1:]
        else:
            idx_to_token = deserialized_embedding.idx_to_token
            idx_to_vec = deserialized_embedding.idx_to_vec

        if not len(set(idx_to_token)) == len(idx_to_token):
            raise ValueError('Serialized embedding invalid. '
                             'It contains duplicate tokens.')

        if self.unknown_token:
            try:
                unknown_token_idx = deserialized_embedding.idx_to_token.index(
                    self.unknown_token)
                idx_to_token[C.UNK_IDX], idx_to_token[
                    unknown_token_idx] = idx_to_token[
                        unknown_token_idx], idx_to_token[C.UNK_IDX]
                idxs = [C.UNK_IDX, unknown_token_idx]
                idx_to_vec[idxs] = idx_to_vec[idxs[::-1]]
            except ValueError:
                vec_len = idx_to_vec.shape[1]
                idx_to_token.insert(0, self.unknown_token)
                idx_to_vec = nd.concat(
                    self._init_unknown_vec(shape=vec_len).reshape((1, -1)),
                    idx_to_vec, dim=0)

        self._idx_to_token = idx_to_token
        self._idx_to_vec = idx_to_vec
        self._token_to_idx.update((token, idx) for idx, token in enumerate(self._idx_to_token))
Esempio n. 6
0
 def _slice(self, x, num_anchors, num_offsets):
     """since some stages won't see partial anchors, so we have to slice the correct targets"""
     # x with shape (B, N, A, 1 or 2)
     anchors = [0] + num_anchors.tolist()
     offsets = [0] + num_offsets.tolist()
     ret = []
     for i in range(len(num_anchors)):
         y = x[:, offsets[i]:offsets[i+1], anchors[i]:anchors[i+1], :]
         ret.append(y.reshape((0, -3, -1)))
     return nd.concat(*ret, dim=1)
Esempio n. 7
0
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
                          num_hiddens, lr, clipping_theta, batch_size,
                          vocab_size, pred_period, pred_len, prefixes,
                          get_params, get_inputs, ctx, corpus_indices,
                          idx_to_char, char_to_idx, is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        train_l_cnt = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()       
            with autograd.record():
                if is_lstm:
                    outputs, state_h, state_c = rnn(
                        get_inputs(X, vocab_size), state_h, state_c, *params) 
                else:
                    outputs, state_h = rnn(
                        get_inputs(X, vocab_size), state_h, *params)
                y = Y.T.reshape((-1,))
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y)
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            train_l_sum = train_l_sum + l.sum()
            train_l_cnt += l.size
        if epoch % pred_period == 0:
            print("\nepoch %d, perplexity %f"
                  % (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
            for prefix in prefixes:
                print(' - ', predict_rnn(
                    rnn, prefix, pred_len, params, num_hiddens, vocab_size,
                    ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
Esempio n. 8
0
def ten_crop(src, size):
    """Crop 10 regions from an array.
    This is performed same as:
    http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop

    This method crops 10 regions. All regions will be in shape
    :obj`size`. These regions consist of 1 center crop and 4 corner
    crops and horizontal flips of them.
    The crops are ordered in this order.
    * center crop
    * top-left crop
    * bottom-left crop
    * top-right crop
    * bottom-right crop
    * center crop (flipped horizontally)
    * top-left crop (flipped horizontally)
    * bottom-left crop (flipped horizontally)
    * top-right crop (flipped horizontally)
    * bottom-right crop (flipped horizontally)

    Parameters
    ----------
    src : mxnet.nd.NDArray
        Input image.
    size : tuple
        Tuple of length 2, as (width, height) of the cropped areas.

    Returns
    -------
    mxnet.nd.NDArray
        The cropped images with shape (10, size[1], size[0], C)

    """
    h, w, _ = src.shape
    ow, oh = size

    if h < oh or w < ow:
        raise ValueError(
            "Cannot crop area {} from image with size ({}, {})".format(str(size), h, w))

    center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :]
    tl = src[0:oh, 0:ow, :]
    bl = src[h - oh:h, 0:ow, :]
    tr = src[0:oh, w - ow:w, :]
    br = src[h - oh:h, w - ow:w, :]
    crops = nd.stack(*[center, tl, bl, tr, br], axis=0)
    crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0)
    return crops
Esempio n. 9
0
 def forward(self, x, sampled_values, label):
     """Forward computation."""
     sampled_candidates, _, _ = sampled_values
     # (batch_size,)
     label = label.reshape(shape=(-1,))
     # (num_sampled+batch_size,)
     ids = nd.concat(sampled_candidates, label, dim=0)
     # lookup weights and biases
     weight = self.weight.row_sparse_data(ids)
     bias = self.bias.data(ids.context)
     # (num_sampled+batch_size, dim)
     w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs)
     # (num_sampled+batch_size,)
     b_all = nd.take(bias, indices=ids)
     out, new_targets = self._dense(x, sampled_values, label, w_all, b_all)
     return out, new_targets
Esempio n. 10
0
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, is_random_iter, num_epochs, num_steps,
                          lr, clipping_theta, batch_size, pred_period,
                          pred_len, prefixes):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter_fn = data_iter_random
    else:
        data_iter_fn = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        for X, Y in data_iter:
            if is_random_iter:
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:
                for s in state:
                    s.detach()
            with autograd.record():
                inputs = to_onehot(X, vocab_size)
                (outputs, state) = rnn(inputs, state, params)
                outputs = nd.concat(*outputs, dim=0)
                y = Y.T.reshape((-1,))
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn(
                    prefix, pred_len, rnn, params, init_rnn_state,
                    num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx))
Esempio n. 11
0
    def forward(self,
                word_inputs,
                tag_inputs,
                arc_targets=None,
                rel_targets=None):
        # pylint: disable=arguments-differ
        """Run decoding

        Parameters
        ----------
        word_inputs : mxnet.ndarray.NDArray
            word indices of seq_len x batch_size
        tag_inputs : mxnet.ndarray.NDArray
            tag indices of seq_len x batch_size
        arc_targets : mxnet.ndarray.NDArray
            gold arc indices of seq_len x batch_size
        rel_targets : mxnet.ndarray.NDArray
            gold rel indices of seq_len x batch_size
        Returns
        -------
        tuple
            (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training,
            else if given gold target
            then return arc_accuracy, rel_accuracy, overall_accuracy, outputs,
            otherwise return outputs, where outputs is a list of (arcs, rels).
        """
        def flatten_numpy(arr):
            """Flatten nd-array to 1-d column vector

            Parameters
            ----------
            arr : numpy.ndarray
                input tensor

            Returns
            -------
            numpy.ndarray
                A column vector

            """
            return np.reshape(arr, (-1, ), 'F')

        is_train = autograd.is_training()
        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32)
        num_tokens = int(np.sum(mask))  # non padding, non root token number

        if is_train or arc_targets is not None:
            mask_1D = flatten_numpy(mask)
            mask_1D_tensor = nd.array(mask_1D)

        unked_words = np.where(word_inputs < self._vocab.words_in_train,
                               word_inputs, self._vocab.UNK)
        word_embs = self.word_embs(nd.array(unked_words, dtype='int'))
        if self.pret_word_embs:
            word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs))
        tag_embs = self.tag_embs(nd.array(tag_inputs))

        # Dropout
        emb_inputs = nd.concat(word_embs, tag_embs,
                               dim=2)  # seq_len x batch_size

        top_recur = utils.biLSTM(self.f_lstm,
                                 self.b_lstm,
                                 emb_inputs,
                                 dropout_x=self.dropout_lstm_input)
        top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp)

        W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data()
        W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data()
        dep = nd.Dropout(
            data=utils.leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep),
            axes=[0],
            p=self.dropout_mlp)
        head = nd.Dropout(
            data=utils.leaky_relu(nd.dot(top_recur, W_head.T) + b_head),
            axes=[0],
            p=self.dropout_mlp)
        dep, head = nd.transpose(dep, axes=[2, 0,
                                            1]), nd.transpose(head,
                                                              axes=[2, 0, 1])
        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        W_arc = self.arc_W.data()
        arc_logits = utils.bilinear(dep_arc,
                                    W_arc,
                                    head_arc,
                                    self.mlp_arc_size,
                                    seq_len,
                                    batch_size,
                                    num_outputs=1,
                                    bias_x=True,
                                    bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = utils.reshape_fortran(
            arc_logits, (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)

        arc_preds = arc_logits.argmax(0)
        # seq_len x batch_size

        if is_train or arc_targets is not None:
            correct = np.equal(arc_preds.asnumpy(), arc_targets)
            arc_correct = correct.astype(np.float32) * mask
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = flatten_numpy(arc_targets)
            losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
            arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            arc_probs = np.transpose(
                np.reshape(
                    nd.softmax(flat_arc_logits, axis=0).asnumpy(),
                    (seq_len, seq_len, batch_size), 'F'))
        # #batch_size x #dep x #head

        W_rel = self.rel_W.data()
        rel_logits = utils.bilinear(dep_rel,
                                    W_rel,
                                    head_rel,
                                    self.mlp_rel_size,
                                    seq_len,
                                    batch_size,
                                    num_outputs=self._vocab.rel_size,
                                    bias_x=True,
                                    bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = utils.reshape_fortran(
            rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size))
        # (#head x rel_size) x (#dep x batch_size)

        if is_train:  # pylint: disable=using-constant-test
            _target_vec = targets_1D
        else:
            _target_vec = flatten_numpy(arc_preds.asnumpy())
        _target_vec = nd.array(_target_vec).reshape(seq_len * batch_size, 1)
        _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size))

        partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0)
        # (rel_size) x (#dep x batch_size)

        if is_train or arc_targets is not None:
            rel_preds = partial_rel_logits.argmax(0)
            targets_1D = flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(
                np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = self.softmax_loss(partial_rel_logits,
                                       nd.array(targets_1D))
            rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            rel_probs = np.transpose(
                np.reshape(
                    nd.softmax(flat_rel_logits.transpose([1, 0, 2]),
                               axis=0).asnumpy(),
                    (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))
        # batch_size x #dep x #head x #nclasses

        if is_train or arc_targets is not None:
            l = arc_loss + rel_loss
            correct = rel_correct * flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if is_train:  # pylint: disable=using-constant-test
            return arc_accuracy, rel_accuracy, overall_accuracy, l

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs,
                                           rel_probs):
            # parse sentences one by one
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = utils.arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = utils.rel_argmax(rel_prob, sent_len)
            outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len]))

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs
Esempio n. 12
0
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = nd.concat(x, out, dim=1)

        return x
Esempio n. 13
0
def concat_predictions(preds):
    return nd.concat(*preds, dim=1)
Esempio n. 14
0
    def _load_embedding_serialized(self, pretrained_file_path,
                                   init_unknown_vec):
        """Load embedding vectors from a pre-trained token embedding file.

        For every unknown token, if its representation `self.unknown_token` is encountered in the
        pre-trained token embedding file, index 0 of `self.idx_to_vec` maps to the pre-trained token
        embedding vector loaded from the file; otherwise, index 0 of `self.idx_to_vec` maps to the
        text embedding vector initialized by `init_unknown_vec`.

        ValueError is raised if a token occurs multiple times.
        """

        deserialized_embedding = TokenEmbedding.deserialize(
            pretrained_file_path)
        if deserialized_embedding.unknown_token:
            # Some .npz files on S3 may contain an unknown token and its
            # respective embedding. As a workaround, we assume that C.UNK_IDX
            # is the same now as it was when the .npz was generated. Under this
            # assumption we can safely overwrite the respective token and
            # vector from the npz.
            if deserialized_embedding.unknown_token == self.unknown_token:
                # If the unknown_token is the same, we will find it below and a
                # new unknown token wont be inserted.
                idx_to_token = deserialized_embedding.idx_to_token
                idx_to_vec = deserialized_embedding.idx_to_vec
            elif self.unknown_token:
                # If they are different, we need to manually replace it so that
                # it is found below and no new unknown token would be inserted.
                idx_to_token = deserialized_embedding.idx_to_token
                idx_to_vec = deserialized_embedding.idx_to_vec
                idx_to_token[C.UNK_IDX] = self.unknown_token
                vec_len = idx_to_vec.shape[1]
                idx_to_vec[C.UNK_IDX] = init_unknown_vec(shape=vec_len)
            else:
                # If the TokenEmbedding shall not have an unknown token, we
                # just delete the one in the npz.
                idx_to_token = (
                    deserialized_embedding.idx_to_token[:C.UNK_IDX] +
                    deserialized_embedding.idx_to_token[C.UNK_IDX + 1:])
                idx_to_vec = nd.concat(
                    deserialized_embedding.idx_to_vec[:C.UNK_IDX],
                    deserialized_embedding.idx_to_vec[C.UNK_IDX + 1:])
        else:
            idx_to_token = deserialized_embedding.idx_to_token
            idx_to_vec = deserialized_embedding.idx_to_vec

        if not np.all(np.unique(idx_to_token, return_counts=True)[1] == 1):
            raise ValueError('Serialized embedding invalid. '
                             'It contains duplicate tokens.')

        if self.unknown_token:
            try:
                unknown_token_idx = deserialized_embedding.idx_to_token.index(
                    self.unknown_token)
                idx_to_token[
                    C.UNK_IDX], idx_to_token[unknown_token_idx] = idx_to_token[
                        unknown_token_idx], idx_to_token[C.UNK_IDX]
                idxs = [C.UNK_IDX, unknown_token_idx]
                idx_to_vec[idxs] = idx_to_vec[idxs[::-1]]
            except ValueError:
                vec_len = idx_to_vec.shape[1]
                idx_to_token.insert(0, self.unknown_token)
                idx_to_vec = nd.concat(init_unknown_vec(shape=vec_len).reshape(
                    (1, -1)),
                                       idx_to_vec,
                                       dim=0)

        self._idx_to_token = idx_to_token
        self._idx_to_vec = idx_to_vec
        self._token_to_idx.update(
            (token, idx) for idx, token in enumerate(self._idx_to_token))
Esempio n. 15
0
    def forward(self,
                img,
                xs,
                anchors,
                offsets,
                gt_boxes,
                gt_ids,
                gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        # 这里的anchors中是一个大列表套接着三个小列表
        # 以416*416为例,all_anchors---(9, 2)
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        #  这里offsets的作用
        # 以416*416为例,all_offsets---(3549, 2), 3549 = 169(13*13) + 676(26*26) + 2704(52*52)
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        # 以416*416为例,num_anchors----[3, 6, 9]
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        # 以416*416为例,num_offsets----[169, 845, 3549]
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        # 获取训练图片的大小
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            # shape_like: (N * 3549 * 9 * 2): 部分target的维度
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0],
                                                  axis=0)
            # 下面就是存储需要返回的转换好的ground truth值
            # center_targets:cx, cy , (N * 3549 * 9 * 2)
            center_targets = nd.zeros_like(shape_like)
            # scale_targets: w, h , (N * 3549 * 9 * 2)
            scale_targets = nd.zeros_like(center_targets)
            # weights: 含义(TO_DO ), (N * 3549 * 9 * 2)
            weights = nd.zeros_like(center_targets)
            # objectness: 置信度, (N * 3549 * 9 * 1)
            objectness = nd.zeros_like(
                weights.split(axis=-1, num_outputs=2)[0])
            # class_targets: target的label值,这里用one-hot向量表示, (N * 3549 * 9 * self._num_class),初始值全部设置为-1,代表忽略
            class_targets = nd.one_hot(objectness.squeeze(axis=-1),
                                       depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            # 寻找最为匹配的anchor值
            # 由于yolo进行iou匹配时,只看大小上的匹配,这里将box的格式从corner转换为center
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            # 得到一个以(0, 0)为中心点,与样本框同样大小的框,格式又转换为了corner格式
            shift_gt_boxes = nd.concat(-0.5 * gtw,
                                       -0.5 * gth,
                                       0.5 * gtw,
                                       0.5 * gth,
                                       dim=-1)
            # 给预设的9个anchor,前面添加(0,0,),得到如(0, 0, 116, 90),即变成了center格式的,大小为预设框大小的框
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors,
                                     dim=-1)  # zero center anchors
            # 将预设框格式转换为corner的格式与gt的格式对齐
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            # 求取anchor 与 gt box的 iou 值
            ious = nd.contrib.box_iou(shift_anchor_boxes,
                                      shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            # 得到每个gt box与哪一个预设框匹配的最好,也即iou最大
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            # valid_gts是用来记录有效的box的信息,这里相当于一个mask值,对于在dataloader中为了batch同意而pad成-1的框,给出-1的mask值
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [
                x.asnumpy() for x in [gtx, gty, gtw, gth]
            ]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy(
            ) if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            # 外循环:batch的大小,内循环:一张图片中框的匹配层数
            # 这里的循环其实也说明在yolov3训练
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    # pad的过程中是向下增加pad,因此遇到第一个0时,就可跳出当前内循环,进去下一张图片
                    if valid_gts[b, m] < 1:
                        break
                    # 第b张图片的第m个框匹配的最佳anchor的索引 ,这里anchor的索引是从大到小
                    match = int(matches[b, m])
                    # 确切的得到这个框所匹配的anchor处于哪一层
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    # 这里的xs是特征图的集合,这里用以在选择特征图后,提供特征图的高宽
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    # 得到当前框真实的(cx,cy,w,h),相对于原图上的坐标
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    # 将目标框的cx, cy映射到对应anchor层的特征图的坐标
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    # 获取框匹配的cell的索引
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    # 这里组成一个batch的标签的方法是,做一个B*Cell*Anchor*x ,这里的x针对不同的类别值不相同,例如对于center坐标,就是2
                    #获得了cx, cy的标签值,取值范围[0,1]
                    center_targets[b, index, match,
                                   0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[
                        b, index, match,
                        1] = gty / orig_height * height - loc_y  # ty
                    # 获得w,h的标签值
                    scale_targets[b, index, match, 0] = np.log(
                        max(gtw, 1) / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(
                        max(gth, 1) / np_anchors[match, 1])
                    # 这里是为了减小box大小对于loss的影响,在YOLOv1中使用的是预测根号w的方式,这里采用的是如下加权重的方式
                    weights[
                        b, index,
                        match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    # 这里一般讲objectness的target值设置为1
                    # 这样的话,在没有使用mix_up的前提下,在这个target_generator中不同的anchor分为两类,iou最大匹配的设置为1,其他情况设置为0
                    objectness[b, index, match,
                               0] = (np_gt_mixratios[b, m, 0]
                                     if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            # 最后对所有的标签做最后一次切分,得到B * (Cell*Anchor) * x 的格式
            # (TO_DO:)这里的_slice方法的必要性,看的还不太明白
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors,
                                         num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors,
                                        num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors,
                                        num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets
Esempio n. 16
0
def train_and_predict_rnn(rnn,
                          is_random_iter,
                          epochs,
                          num_steps,
                          hidden_dim,
                          learning_rate,
                          clipping_norm,
                          batch_size,
                          pred_period,
                          pred_len,
                          seqs,
                          get_params,
                          get_inputs,
                          ctx,
                          corpus_indices,
                          idx_to_char,
                          char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1):
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps,
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape: (batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape: (batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1, ))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" %
                  (e, exp(train_loss / num_examples)))
            for seq in seqs:
                print(
                    ' - ',
                    predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx,
                                idx_to_char, char_to_idx, get_inputs, is_lstm))
            print()
Esempio n. 17
0
    def forward(self, input_data):
        ep1 = input_data[:, 0].astype(int).asnumpy().tolist()
        ep2 = input_data[:, 1].astype(int).asnumpy().tolist()
        input_data = input_data[:, 2:]

        x_sen = input_data[:, :DIMENSION * FIXED_WORD_LENGTH].reshape(
            (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION))

        e1_start = DIMENSION * FIXED_WORD_LENGTH
        e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH *
                                INFOBOX_VALUE_LENGTH * WORD_DIMENSION].reshape(
                                    (input_data.shape[0], INFOBOX_LENGTH,
                                     INFOBOX_VALUE_LENGTH,
                                     WORD_DIMENSION))  # (batch_size,20,10,100)
        e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * WORD_DIMENSION
        e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH *
                                INFOBOX_VALUE_LENGTH * WORD_DIMENSION].reshape(
                                    (input_data.shape[0], INFOBOX_LENGTH,
                                     INFOBOX_VALUE_LENGTH,
                                     WORD_DIMENSION))  # (batch_size,20,10,100)

        conv_result = self.conv(
            x_sen.expand_dims(axis=1))  # (128, 230, 62, 1) NCHW
        be1_mask = nd.zeros(conv_result.shape, ctx=CTX)
        aes_mask = nd.zeros(conv_result.shape, ctx=CTX)
        be2_mask = nd.zeros(conv_result.shape, ctx=CTX)
        be1_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100)
        aes_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100)
        be2_pad = nd.ones(conv_result.shape, ctx=CTX) * (-100)
        for i in range(x_sen.shape[0]):
            if ep1[i] == 0:
                ep1[i] += 1
                ep2[i] += 1
            be1_mask[i, :, :ep1[i], :] = 1
            be1_pad[i, :, :ep1[i], :] = 0
            aes_mask[i, :, ep1[i]:ep2[i], :] = 1
            aes_pad[i, :, ep1[i]:ep2[i], :] = 0
            be2_mask[i, :, ep2[i]:, :] = 1
            be2_pad[i, :, ep2[i]:, :] = 0
        be1 = conv_result * be1_mask
        aes = conv_result * aes_mask
        be2 = conv_result * be2_mask
        be1 = be1 + be1_pad
        aes = aes + aes_pad
        be2 = be2 + be2_pad
        o1 = self.pmp(be1)
        o2 = self.pmp(aes)
        o3 = self.pmp(be2)
        out = nd.concat(o1, o2, o3, dim=2)  # (128, 230, 3, 1)
        h_sen = self.conv_out(out)  # (128, 690)

        e1_infobox_list_all = nd.ones(
            (e1_infobox.shape[0], e1_infobox.shape[1], 42, 1),
            ctx=CTX)  # (batch_size,INFOBOX_LENGTH,42,1)
        e2_infobox_list_all = nd.ones(
            (e1_infobox.shape[0], e2_infobox.shape[1], 42, 1),
            ctx=CTX)  # (batch_size,INFOBOX_LENGTH,42,1)

        for i in range(e1_infobox.shape[0]):
            e1 = self.conv_info(
                x_sen[i].expand_dims(axis=0).expand_dims(axis=1),
                e1_infobox[i].expand_dims(axis=1))
            e1_p = self.pool_info(e1)  # (1, 20, 42, 11)
            e1_infobox_list_all[i] = e1_p.reshape(
                (e1_p.shape[1], e1_p.shape[2], e1_p.shape[3]))
            e2 = self.conv_info(
                x_sen[i].expand_dims(axis=0).expand_dims(axis=1),
                e2_infobox[i].expand_dims(axis=1))
            e2_p = self.pool_info(e2)
            e2_infobox_list_all[i] = e2_p.reshape(
                (e2_p.shape[1], e2_p.shape[2], e2_p.shape[3]))

        g1 = nd.softmax(self.att_info(e1_infobox_list_all),
                        axis=2)  # (batch_size,INFOBOX_LENGTH,42,1)
        g2 = nd.softmax(self.att_info(e2_infobox_list_all),
                        axis=2)  # (batch_size,INFOBOX_LENGTH,42,1)

        g1_att = self.dense_info(g1 * e1_infobox_list_all)
        g2_att = self.dense_info(g2 * e2_infobox_list_all)

        # (batch_size,128)
        e_infobox_list_all_att = nd.concat(g1_att, g2_att, dim=1)

        # (batch_size,256)
        h_sen_infobox = nd.concat(h_sen, e_infobox_list_all_att, dim=1)

        y = self.output(h_sen_infobox)
        return y
def test_concat():
    a = nd.array(np.ones((SMALL_Y, LARGE_X)))
    b = nd.array(np.zeros((SMALL_Y, LARGE_X)))
    c = nd.concat(a, b, dim=0)
    assert c.shape == (b.shape[0]*2, LARGE_X)
Esempio n. 19
0
def concat_preds(preds):
    return nd.concat(*[flatten_pred(p) for p in preds], dim=1)
Esempio n. 20
0
def train_and_predict_rnn(rnn,
                          is_random_iter,
                          num_epochs,
                          num_steps,
                          num_hiddens,
                          lr,
                          clipping_theta,
                          batch_size,
                          vocab_size,
                          pred_period,
                          pred_len,
                          prefixes,
                          get_params,
                          get_inputs,
                          ctx,
                          corpus_indices,
                          idx_to_char,
                          char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        num_iters = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()
            with autograd.record():
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(X, vocab_size),
                                                    state_h, state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(X, vocab_size), state_h,
                                           *params)
                y = Y.T.reshape((-1, ))
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y).sum() / (batch_size * num_steps)
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            sgd(params, lr, 1)
            train_l_sum = train_l_sum + l
            num_iters += 1
        if epoch % pred_period == 0:
            print("\nepoch %d, perplexity %f" %
                  (epoch, (train_l_sum / num_iters).exp().asscalar()))
            for prefix in prefixes:
                print(
                    ' - ',
                    predict_rnn(rnn, prefix, pred_len, params, num_hiddens,
                                vocab_size, ctx, idx_to_char, char_to_idx,
                                get_inputs, is_lstm))
Esempio n. 21
0
import numpy as np
import pandas as pd
import sys
sys.path.append("./modules/Model")
sys.path.append("./modules/preprocessing")
sys.path.append("./models")
from Model import Model
from load_data import train_X, train_Y, test_X

# print(train_X.shape)
# print(test_X.shape)

# Change to your own model
from demo import demo_model
from zhan_yuan import ZY_model

# demo_model.train(train_features=train_X, train_labels=train_Y, print_iter=True)
# demo_model.export_predict(test_X)

noised_train_X = train_X + nd.random.normal(0, 0.01, shape=train_X.shape)
noised_train_Y = train_Y + nd.random.normal(0, 0.01, shape=train_Y.shape)

aug_train_X = nd.concat(train_X, noised_train_X, dim=0)
aug_train_Y = nd.concat(train_Y, noised_train_Y, dim=0)

# ZY_model.train_k_fold_cv(train_features = aug_train_X,
#                         train_labels = aug_train_Y,
#                         force_reinit=False)

ZY_model.train(aug_train_X, aug_train_Y)
ZY_model.export_predict(test_X, path_="./submission_PY.csv")
Esempio n. 22
0
    def __getitem__(self, index):
        """the index is the video index in clip_list,read several frame from the index"""
        filename, label = self.clip_lst[index]
        if not os.path.exists(filename):
            print("the file not exist", filename)
            return None
        cthw_data = None
        nd_image_list = []
        while len(nd_image_list) is 0:
            v = cv2.VideoCapture(filename)
            width = v.get(cv2.CAP_PROP_FRAME_WIDTH)
            height = v.get(cv2.CAP_PROP_FRAME_HEIGHT)
            length = v.get(cv2.CAP_PROP_FRAME_COUNT)

            assert self.crop_size <= width and self.crop_size <= height, '%d'
            length = int(length)
            if length < self.n_frame:
                logger.info("%s length %d <%d" %
                            (filename, length, self.n_frame))
                # the following operation will tail the last frame

            # set the sample begin frame id
            if not self.is_train:
                frame_st = 0 if length <= self.n_frame else int(
                    (length - self.n_frame) // 2)
            else:
                frame_st = 0 if length <= self.n_frame else random.randrange(
                    length - self.n_frame + 1)

            # set random crop position in single frame
            if self.is_train:
                row_st = random.randrange(self.scale_h - self.crop_size + 1)
                col_st = random.randrange(self.scale_w - self.crop_size + 1)
            else:
                row_st = int((self.scale_h - self.crop_size) / 2)
                col_st = int((self.scale_w - self.crop_size) / 2)

            # allocate the capacity to store image and jump to the position

            v.set(cv2.CAP_PROP_POS_FRAMES, frame_st)
            #start to read the following frames by current start position
            import ipdb
            #ipdb.set_trace()
            for frame_p in range(min(self.n_frame, length)):
                _, f = v.read()
                if f is not None:
                    f = cv2.resize(
                        f, (self.scale_w, self.scale_h))  #in dim of hwc
                    f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
                    f = f[row_st:row_st + self.crop_size,
                          col_st:col_st + self.crop_size, :]
                    if self._transform:
                        nd_image_list.append(self._transform(
                            nd.array(f)))  # the frame_p transform

                else:
                    nd_image_list.clear()  #clear the image_list
                    break
        # after transform return CHW dim
        # replication the last frame if the length < self.n_frame
        current_length = len(nd_image_list)
        cthw_data = nd.stack(*nd_image_list, axis=1)  #from CHW, to CTHW
        #tmp = nd.zeros(shape=(self.n_frame, self.crop_size, self.crop_size, 3), dtype='float32')
        if current_length < self.n_frame:
            #construct the last frame and concat
            extra_data = nd.tile(nd_image_list[-1],
                                 reps=(self.n_frame - current_length, 1, 1, 1))
            extra_data = extra_data.transpose((1, 0, 2, 3))
            cthw_data = nd.concat(cthw_data, extra_data, dim=1)
        return cthw_data, label
Esempio n. 23
0
 def forward(self, X):
     for block in self.net:
         Y = block(X)
         # 在通道维上将输入和输出连结
         X = nd.concat(X, Y, dim=1)
     return X
Esempio n. 24
0
 def hybrid_forward(self, F, lagX, x2):
     out = nd.relu(self.bn1(self.fc1(x2)))
     out = self.bn2(self.fc2(out))
     return nd.relu(nd.concat(lagX, out, dim=2))
Esempio n. 25
0
    def forward(self, inputs, target, next_word_history, cache_history, begin_state=None): # pylint: disable=arguments-differ
        """Defines the forward computation for cache cell. Arguments can be either
        :py:class:`NDArray` or :py:class:`Symbol`.

        Parameters
        ----------
        inputs: NDArray
            The input data
        target: NDArray
            The label
        next_word_history: NDArray
            The next word in memory
        cache_history: NDArray
            The hidden state in cache history


        Returns
        --------
        out: NDArray
            The linear interpolation of the cache language model
            with the regular word-level language model
        next_word_history: NDArray
            The next words to be kept in the memory for look up
            (size is equal to the window size)
        cache_history: NDArray
            The hidden states to be kept in the memory for look up
            (size is equal to the window size)
        """
        output, hidden, encoder_hs, _ = \
            super(self.lm_model.__class__, self.lm_model).\
                forward(inputs, begin_state)
        encoder_h = encoder_hs[-1].reshape(-3, -2)
        output = output.reshape(-1, self._vocab_size)

        start_idx = len(next_word_history) \
            if next_word_history is not None else 0
        next_word_history = nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                        for t in target], dim=0) if next_word_history is None \
            else nd.concat(next_word_history,
                           nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                       for t in target], dim=0), dim=0)
        cache_history = encoder_h if cache_history is None \
            else nd.concat(cache_history, encoder_h, dim=0)

        out = None
        softmax_output = nd.softmax(output)
        for idx, vocab_L in enumerate(softmax_output):
            joint_p = vocab_L
            if start_idx + idx > self._window:
                valid_next_word = next_word_history[start_idx + idx - self._window:start_idx + idx]
                valid_cache_history = cache_history[start_idx + idx - self._window:start_idx + idx]
                logits = nd.dot(valid_cache_history, encoder_h[idx])
                cache_attn = nd.softmax(self._theta * logits).reshape(-1, 1)
                cache_dist = (cache_attn.broadcast_to(valid_next_word.shape)
                              * valid_next_word).sum(axis=0)
                joint_p = self._lambdas * cache_dist + (1 - self._lambdas) * vocab_L

            out = joint_p[target[idx]] if out is None \
                else nd.concat(out, joint_p[target[idx]], dim=0)
        next_word_history = next_word_history[-self._window:]
        cache_history = cache_history[-self._window:]
        return out, next_word_history, cache_history, hidden
Esempio n. 26
0
from mxnet import nd
import random
import zipfile


X, W_xh = nd.random.uniform(shape=(3, 1)), nd.random.uniform(shape=(1, 4))
H, W_hh = nd.random.uniform(shape=(3, 4)), nd.random.uniform(shape=(4, 4))

print(nd.dot(X, W_xh) + nd.dot(H, W_hh))
print(nd.dot(nd.concat(X, H, dim=1), nd.concat(W_xh, W_hh, dim=0)))


# 读取歌词文件,6w字
with zipfile.ZipFile('jaychou_lyrics.txt.zip') as zin: # 打开压缩文件jaychou_lyrics.txt.zip
    with zin.open('jaychou_lyrics.txt') as fd:                              # 获取里面的一个叫jaychou_lyrics.txt的文件
        corpus_chars = fd.read().decode('utf-8')                            # utf-8解码
# print(corpus_chars[:40])    # 展示前40个字

# 用前1w字训练模型
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]       #print(corpus_chars)



# 建立字符索引, 索引到字
idx_to_char = list(set(corpus_chars))   # set创建无序不重复序列并转成列表,这里共有1027个不同的字
# 创建字典(char<->idx), 字到索引
char_to_idx = dict([(char, i) for i,char in enumerate(idx_to_char)])    # 列表解析,要求[]括起来, i,char不能换
print(len(char_to_idx)) # 1027

# 歌词的对应的index
Esempio n. 27
0
    def fit(self, num_steps=1):
        """
    Fit the models

    Returns:
      Loss Functions (Q1-mse, Q2-mse, alpha-entropy, Policy-kl)
    """
        logger_data = {k: [] for k in ["LossPi", "LossQ1", "LossQ2", "LossV"]}
        for step in range(num_steps):
            # sample a batch from memory
            minibatch = self.memory.sample(self.batch_size)
            obs = nd.array(minibatch["obs"], self.ctx)
            acts = nd.array(minibatch["act"], self.ctx)
            rewards = nd.array(minibatch["rew"], self.ctx)
            next_obs = nd.array(minibatch["next_obs"], self.ctx)
            nonterm = nd.array(minibatch["nt"], self.ctx)

            lr = self.lr(self.steps) * self.lrmult

            # update the policy function
            with autograd.record():
                _mu, _pi, _logp_pi = self.policy(obs)
                _obspi = nd.concat(obs, _pi, dim=-1)
                _q1_pi = self.qfn1(_obspi)
                pi_loss = nd.mean(self.alpha * _logp_pi - _q1_pi)
                pi_loss.backward()
            self.mu.update(lr)
            self.logstd.update(lr)
            self.policy_base.update(lr)

            # update the value functions
            logp_pi = nd.stop_gradient(_logp_pi)
            obspi = nd.stop_gradient(_obspi)
            obsact = nd.concat(obs, acts, dim=-1)
            q1_pi = self.qfn1(obspi)
            q2_pi = self.qfn2(obspi)
            min_q_pi = nd.minimum(q1_pi, q2_pi)
            v_targ = self.vfn_targ(next_obs)
            q_backup = nd.stop_gradient(rewards +
                                        self.gamma * nonterm * v_targ)
            v_backup = nd.stop_gradient(min_q_pi - self.alpha * logp_pi)
            with autograd.record():
                _q1 = self.qfn1(obsact)
                _q2 = self.qfn2(obsact)
                _v = self.vfn(obs)

                q1_loss = 0.5 * nd.mean(nd.square(q_backup - _q1))
                q2_loss = 0.5 * nd.mean(nd.square(q_backup - _q2))
                v_loss = 0.5 * nd.mean(nd.square(v_backup - _v))
                total_loss = q1_loss + q2_loss + v_loss
                total_loss.backward()
            self.qfn1.update(lr)
            self.qfn2.update(lr)
            self.vfn.update(lr)

            # update the target network
            for i in range(len(self.vfn.weights)):
                self.vfn_targ.weights[i][:] = \
                    self.polyak * self.vfn_targ.weights[i][:] + \
                    (1 - self.polyak) * self.vfn.weights[i][:]

            logger_data["LossPi"].append(pi_loss.asnumpy()[0])
            logger_data["LossQ1"].append(q1_loss.asnumpy()[0])
            logger_data["LossQ2"].append(q2_loss.asnumpy()[0])
            logger_data["LossV"].append(v_loss.asnumpy()[0])
        return logger_data
Esempio n. 28
0
 def forward(self, x):
     p1 = self.p1_conv_1(x)
     p2 = self.p2_conv_3(self.p2_conv_1(x))
     p3 = self.p3_conv_5(self.p3_conv_1(x))
     p4 = self.p4_conv_1(self.p4_pool_3(x))
     return nd.concat(p1, p2, p3, p4, dim=1)
Esempio n. 29
0
features = nd.random.normal(shape=(n_train + n_test, 1))

# features                  x
# nd.power(features, 2)     x**2    平方操作
# nd.power(features, 3)     x**3    三次方操作
#----------------------------------------------------------
# <NDArray 200x3 @cpu(0)>
#       1          1           1       =>          3
#   200 []      200[]       200[]      =>       200[]
#----------------------------------------------------------

# poly features     聚合特点,特征,容貌
# --------------------
# 生成的X X**2 X**3
# --------------------
poly_features = nd.concat(features, nd.power(features, 2),
                          nd.power(features, 3))

# y = 1.2x − 3.4x**2 + 5.6x**3 + 5 + ϵ
# 此处用了广播
# true_w[0]         1.2             poly_features[:, 0](<NDArray 200 @cpu(0)>)
# true_w[1]         -3.4            poly_features[:, 1](<NDArray 200 @cpu(0)>)
# true_w[2]         5.6             poly_features[:, 2](<NDArray 200 @cpu(0)>)
# true_b            5

#
# [200] * [200] + [200] * [200] + [200] * [200] + 5 (广播)  == [200]

# labels            标签
# ----------
# 生成的Y
# ----------
    return features


batch_size = 64

data_iter_224 = gluon.data.DataLoader(gluon.data.ArrayDataset(X_224),
                                      batch_size=batch_size)
data_iter_299 = gluon.data.DataLoader(gluon.data.ArrayDataset(X_299),
                                      batch_size=batch_size)

model_names = ['inceptionv3', 'resnet152_v1']

features = []
import pickle as pkl

for model_name in model_names:
    if model_name == 'inceptionv3':
        features.append(get_features(model_name, data_iter_299))
        print("Done inceptionv3")
        data111 = pkl.dumps(features)

#    else:
#        features.append(get_features(model_name, data_iter_224))
#	print("Done resnet152_v1")
#	data222 = pkl.dumps(features)

features = nd.concat(*features, dim=1)
import pickle as pkl
data333 = pkl.dumps(features)
pkl.dump(data333, open('tmp.pickle', 'wb'))
Esempio n. 31
0
def train_and_predict_rnn(rnn,
                          is_random_iter,
                          num_epochs,
                          num_steps,
                          num_hiddens,
                          lr,
                          clipping_theta,
                          batch_size,
                          vocab_size,
                          pred_period,
                          pred_len,
                          prefixes,
                          get_params,
                          get_inputs,
                          ctx,
                          corpus_indices,
                          idx_to_char,
                          char_to_idx,
                          is_lstm=False):
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        # 如使用相邻采样,隐藏变量只需在该 epoch 开始时初始化。
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        train_l_cnt = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            # 如使用随机采样,读取每个随机小批量前都需要初始化隐藏变量。
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            # 如使用相邻采样,需要使用 detach 函数从计算图分离隐藏状态变量。
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()
            with autograd.record():
                # outputs 形状:(batch_size, vocab_size)。
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(X, vocab_size),
                                                    state_h, state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(X, vocab_size), state_h,
                                           *params)
                # 设 t_ib_j 为时间步 i 批量中的元素 j:
                # y 形状:(batch_size * num_steps,)
                # y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]。
                y = Y.T.reshape((-1, ))
                # 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y)
            l.backward()
            # 裁剪梯度。
            grad_clipping(params, state_h, Y, clipping_theta, ctx)
            gb.sgd(params, lr, 1)
            train_l_sum = train_l_sum + l.sum()
            train_l_cnt += l.size
        if epoch % pred_period == 0:
            print("\nepoch %d, perplexity %f" %
                  (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
            for prefix in prefixes:
                print(
                    ' - ',
                    predict_rnn(rnn, prefix, pred_len, params, num_hiddens,
                                vocab_size, ctx, idx_to_char, char_to_idx,
                                get_inputs, is_lstm))
    def store_samples(self, data, y, query_network, store_prob, context):
        if not (self.memory_replacement_strategy == "no_replacement"
                and self.max_stored_samples != -1
                and self.key_memory.shape[0] >= self.max_stored_samples):
            num_pus = len(data)
            sub_batch_sizes = [data[i][0][0].shape[0] for i in range(num_pus)]
            num_inputs = len(data[0][0])
            num_outputs = len(y)
            mx_context = context[0]

            if len(self.key_memory) == 0:
                self.key_memory = nd.empty(0, ctx=mx.cpu())
                self.value_memory = []
                self.label_memory = [
                ]  #nd.empty((num_outputs, 0), ctx=mx.cpu())

            ind = [
                nd.sample_multinomial(
                    store_prob, sub_batch_sizes[i]).as_in_context(mx_context)
                for i in range(num_pus)
            ]

            max_inds = [nd.max(ind[i]) for i in range(num_pus)]
            if any(max_inds):
                to_store_values = []
                for i in range(num_inputs):
                    tmp_values = []
                    for j in range(0, num_pus):
                        if max_inds[j]:
                            if isinstance(tmp_values, list):
                                tmp_values = nd.contrib.boolean_mask(
                                    data[j][0][i].as_in_context(mx_context),
                                    ind[j])
                            else:
                                tmp_values = nd.concat(
                                    tmp_values,
                                    nd.contrib.boolean_mask(
                                        data[j][0][i].as_in_context(
                                            mx_context), ind[j]),
                                    dim=0)
                    to_store_values.append(tmp_values)

                to_store_labels = []
                for i in range(num_outputs):
                    tmp_labels = []
                    for j in range(0, num_pus):
                        if max_inds[j]:
                            if isinstance(tmp_labels, list):
                                tmp_labels = nd.contrib.boolean_mask(
                                    y[i][j].as_in_context(mx_context), ind[j])
                            else:
                                tmp_labels = nd.concat(
                                    tmp_labels,
                                    nd.contrib.boolean_mask(
                                        y[i][j].as_in_context(mx_context),
                                        ind[j]),
                                    dim=0)
                    to_store_labels.append(tmp_labels)

                to_store_keys = query_network(
                    *to_store_values[0:self.query_net_num_inputs])

                if self.key_memory.shape[0] == 0:
                    self.key_memory = to_store_keys.as_in_context(mx.cpu())
                    for i in range(num_inputs):
                        self.value_memory.append(
                            to_store_values[i].as_in_context(mx.cpu()))
                    for i in range(num_outputs):
                        self.label_memory.append(
                            to_store_labels[i].as_in_context(mx.cpu()))
                elif self.memory_replacement_strategy == "replace_oldest" and self.max_stored_samples != -1 and self.key_memory.shape[
                        0] >= self.max_stored_samples:
                    num_to_store = to_store_keys.shape[0]
                    self.key_memory = nd.concat(self.key_memory[num_to_store:],
                                                to_store_keys.as_in_context(
                                                    mx.cpu()),
                                                dim=0)
                    for i in range(num_inputs):
                        self.value_memory[i] = nd.concat(
                            self.value_memory[i][num_to_store:],
                            to_store_values[i].as_in_context(mx.cpu()),
                            dim=0)
                    for i in range(num_outputs):
                        self.label_memory[i] = nd.concat(
                            self.label_memory[i][num_to_store:],
                            to_store_labels[i].as_in_context(mx.cpu()),
                            dim=0)
                else:
                    self.key_memory = nd.concat(self.key_memory,
                                                to_store_keys.as_in_context(
                                                    mx.cpu()),
                                                dim=0)
                    for i in range(num_inputs):
                        self.value_memory[i] = nd.concat(
                            self.value_memory[i],
                            to_store_values[i].as_in_context(mx.cpu()),
                            dim=0)
                    for i in range(num_outputs):
                        self.label_memory[i] = nd.concat(
                            self.label_memory[i],
                            to_store_labels[i].as_in_context(mx.cpu()),
                            dim=0)
Esempio n. 33
0
def concatenate(tensors, axis):
    return nd.concat(*tensors, dim=axis)
Esempio n. 34
0
 def forward(self, x):
     p1 = self.p1_1(x)
     p2 = self.p2_2(self.p2_1(x))
     p3 = self.p3_2(self.p3_1(x))
     p4 = self.p4_2(self.p4_1(x))
     return nd.concat(p1, p2, p3, p4, dim=1)  # 在通道维上连结输出。
Esempio n. 35
0
 def forward(self, x):
     for layer in self.net:
         out = layer(x)
         x = nd.concat(x, out, dim=1)
     return x
Esempio n. 36
0
 def forward(self, inpt):
     fwd = self._lstm_fwd(inpt)
     bwd_inpt = nd.flip(inpt, 0)
     bwd = self._lstm_bwd(bwd_inpt)
     bwd = nd.flip(bwd, 0)
     return nd.concat(fwd, bwd, dim=2)
Esempio n. 37
0
def main():
    ctx = mx.gpu()

    batch_size = 100
    num_inputs = 784
    num_outputs = 10

    # Get MNIST Data
    def transform(data, label):
        return data.astype(np.float32) / 255, label.astype(np.float32)

    train_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(
        train=True, transform=transform),
                                           batch_size,
                                           shuffle=True)
    test_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(
        train=False, transform=transform),
                                          batch_size,
                                          shuffle=False)
    train_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(
        train=True, transform=transform),
                                           batch_size,
                                           shuffle=True)
    test_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(
        train=False, transform=transform),
                                          batch_size,
                                          shuffle=False)

    net_siamese = gluon.nn.Sequential()
    with net_siamese.name_scope():
        net_siamese.add(gluon.nn.Dense(256, activation='relu'))
        net_siamese.add(gluon.nn.Dense(128, activation='relu'))

    net_out = gluon.nn.Sequential()
    with net_out.name_scope():
        net_out.add(gluon.nn.Dense(128, activation='relu'))
        net_out.add(gluon.nn.Dense(64, activation='relu'))
        net_out.add(gluon.nn.Dense(num_outputs))

    net_siamese.collect_params().initialize(mx.init.Uniform(scale=0.1),
                                            ctx=ctx)
    net_out.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx)

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    trainer_siamese = gluon.Trainer(net_siamese.collect_params(), 'sgd',
                                    {'learning_rate': 0.05})
    trainer_out = gluon.Trainer(net_out.collect_params(), 'sgd',
                                {'learning_rate': 0.05})

    def evaluate_accuracy(data_iterator1, data_iterator2, net):
        acc = mx.metric.Accuracy()
        for i, ((data1, label1),
                (data2,
                 label2)) in enumerate(zip(data_iterator1, data_iterator2)):
            data1 = data1.as_in_context(ctx).reshape((-1, 784))
            data2 = data2.as_in_context(ctx).reshape((-1, 784))
            label1 = label1.as_in_context(ctx)
            inter1 = net_siamese(data1)
            inter2 = net_siamese(data2)
            output = net_out(nd.concat(inter1, inter2))
            acc.update([label1], [output])
        return acc.get()

    epochs = 4
    moving_loss = 0.
    smoothing_constant = .01
    metric = mx.metric.Accuracy()

    print("\n#### Shared+Module1 Training ####")
    for e in range(epochs):
        metric.reset()
        # Train Branch with mod1 on dataset 1
        for i, ((data1, label1),
                (data2, label2)) in enumerate(zip(train_data1, train_data2)):
            data1 = data1.as_in_context(ctx).reshape((-1, 784))
            data2 = data2.as_in_context(ctx).reshape((-1, 784))
            label1 = label1.as_in_context(ctx)
            with autograd.record():
                inter1 = net_siamese(data1)
                inter2 = net_siamese(data2)
                output = net_out(nd.concat(inter1, inter2))
                loss = softmax_cross_entropy(output, label1)
                loss.backward()
            trainer_siamese.step(batch_size)
            trainer_out.step(batch_size)

            metric.update([label1], [output])

            curr_loss = nd.mean(loss).asscalar()
            moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           (smoothing_constant) * curr_loss)

            if i % 100 == 0 and i > 0:
                name, acc = metric.get()
                print('[Epoch %d Batch %d] Loss: %s Training: %s=%f' %
                      (e, i, moving_loss, name, acc))

        _, train_accuracy = metric.get()
        _, test_accuracy = evaluate_accuracy(
            test_data1, test_data2,
            lambda x, y: net_out(nd.concat(net_siamese(x), net_siamese(y))))
        print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\n" %
              (e, moving_loss, train_accuracy, test_accuracy))
Esempio n. 38
0
    def forward(self, input_data):
        x_sen = input_data[:, :DIMENSION * FIXED_WORD_LENGTH].reshape(
            (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION))
        e1_kernel_num = input_data[:, DIMENSION * FIXED_WORD_LENGTH]
        e2_kernel_num = input_data[:, DIMENSION * FIXED_WORD_LENGTH + 1]
        e1_size = input_data[:, DIMENSION * FIXED_WORD_LENGTH +
                             2:DIMENSION * FIXED_WORD_LENGTH + 2 +
                             INFOBOX_LENGTH]
        e2_size = input_data[:, DIMENSION * FIXED_WORD_LENGTH + 2 +
                             INFOBOX_LENGTH:DIMENSION * FIXED_WORD_LENGTH + 2 +
                             INFOBOX_LENGTH * 2]

        e1_start = DIMENSION * FIXED_WORD_LENGTH + 2 + INFOBOX_LENGTH * 2
        e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH *
                                INFOBOX_VALUE_LENGTH * DIMENSION].reshape(
                                    (input_data.shape[0], -1,
                                     DIMENSION))  # (batch_size, word_num, 100)
        e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION
        e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH *
                                INFOBOX_VALUE_LENGTH * DIMENSION].reshape(
                                    (input_data.shape[0], -1,
                                     DIMENSION))  # (batch_size, word_num, 100)
        h_sen = self.lstm(x_sen)  # (batch_size,60,128)

        e1_infobox_list_all = nd.zeros(
            (e1_infobox.shape[0], e1_infobox.shape[1], 60, 1),
            ctx=CTX)  # (batch_size,INFOBOX_LENGTH,60,1)
        e2_infobox_list_all = nd.zeros(
            (e2_infobox.shape[0], e2_infobox.shape[1], 60, 1),
            ctx=CTX)  # (batch_size,INFOBOX_LENGTH,60,1)

        for i in range(e1_infobox.shape[0]):
            base = 0
            for j in range(int(e1_kernel_num.asnumpy().item(0))):
                w = int(e1_size[i, j].asnumpy().item(0))
                if w == 0:
                    continue
                kernel = e1_infobox[i, base:base + w, :].reshape(
                    (1, 1, w, DIMENSION))
                base += w
                e1 = self.conv1(x_sen[i].reshape(
                    (1, 1, FIXED_WORD_LENGTH, DIMENSION)),
                                kernel)  # (1, 1, 59, 1)
                e1_infobox_list_all[
                    i, :e1.shape[1], :e1.shape[2], :e1.shape[3]] = e1.reshape(
                        (e1.shape[1], e1.shape[2], e1.shape[3]))

        for i in range(e2_infobox.shape[0]):
            base = 0
            for j in range(int(e2_kernel_num.asnumpy().item(0))):
                w = int(e2_size[i, j].asnumpy().item(0))
                if w == 0:
                    continue
                kernel = e2_infobox[i, base:base + w, :].reshape(
                    (1, 1, w, DIMENSION))
                base += w
                e2 = self.conv2(x_sen[i].reshape(
                    (1, 1, FIXED_WORD_LENGTH, DIMENSION)),
                                kernel)  # (1, 1, 59, 1)
                e2_infobox_list_all[
                    i, :e2.shape[1], :e2.shape[2], :e2.shape[3]] = e2.reshape(
                        (e2.shape[1], e2.shape[2], e2.shape[3]))

            # e1 = self.conv1(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e1_infobox[i].expand_dims(axis=1))
            # #             e1_p = self.pool(e1)
            # e1_infobox_list_all[i] = e1.reshape((e1.shape[1], e1.shape[2], e1.shape[3]))
            # e2 = self.conv2(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e2_infobox[i].expand_dims(axis=1))
            # #             e2_p = self.pool(e2)
            # e2_infobox_list_all[i] = e2.reshape((e2.shape[1], e2.shape[2], e2.shape[3]))

        e1_infobox_list_all = e1_infobox_list_all.reshape(
            (e1_infobox.shape[0], e1_infobox.shape[1],
             -1))  # (batch_size,INFOBOX_LENGTH,51)
        e2_infobox_list_all = e2_infobox_list_all.reshape(
            (e2_infobox.shape[0], e2_infobox.shape[1],
             -1))  # (batch_size,INFOBOX_LENGTH,51)

        e1_infobox_list_all_new = self.dense1(e1_infobox_list_all)
        e2_infobox_list_all_new = self.dense2(e2_infobox_list_all)

        #         g1 = nd.softmax(self.att(e1_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1)
        #         g2 = nd.softmax(self.att(e2_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1)
        #         g1_att = nd.batch_dot(nd.transpose(g1,axes = (0,2,1)),e1_infobox_list_all) # (batch_size,1,64)
        #         g2_att = nd.batch_dot(nd.transpose(g2,axes = (0,2,1)),e2_infobox_list_all) # (batch_size,1,64)
        #         g1_att = g1_att.reshape((g1_att.shape[0],-1)) # (batch_size,64)
        #         g2_att = g2_att.reshape((g2_att.shape[0],-1)) # (batch_size,64)

        # (batch_size,128)
        e_infobox_list_all_att = nd.concat(e1_infobox_list_all_new,
                                           e2_infobox_list_all_new,
                                           dim=1)
        h_sen_new = self.lstm_out(h_sen.expand_dims(1))
        h_sen_new = h_sen_new.reshape(
            (h_sen_new.shape[0], -1))  # (batch_size,128)
        # (batch_size,256)
        h_sen_infobox = nd.concat(h_sen_new, e_infobox_list_all_att, dim=1)
        y = self.output(h_sen_infobox)
        return y
Esempio n. 39
0
    def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None):
        """Run decoding

        Parameters
        ----------
        word_inputs : mxnet.ndarray.NDArray
            word indices of seq_len x batch_size
        tag_inputs : mxnet.ndarray.NDArray
            tag indices of seq_len x batch_size
        arc_targets : mxnet.ndarray.NDArray
            gold arc indices of seq_len x batch_size
        rel_targets : mxnet.ndarray.NDArray
            gold rel indices of seq_len x batch_size
        Returns
        -------
        tuple
            (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target
        then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a
        list of (arcs, rels).
        """
        is_train = autograd.is_training()

        def flatten_numpy(ndarray):
            """Flatten nd-array to 1-d column vector

            Parameters
            ----------
            ndarray : numpy.ndarray
                input tensor

            Returns
            -------
            numpy.ndarray
                A column vector

            """
            return np.reshape(ndarray, (-1,), 'F')

        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32)
        num_tokens = int(np.sum(mask))  # non padding, non root token number

        if is_train or arc_targets is not None:
            mask_1D = flatten_numpy(mask)
            mask_1D_tensor = nd.array(mask_1D)

        unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK)
        word_embs = self.word_embs(nd.array(unked_words, dtype='int'))
        if self.pret_word_embs:
            word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs))
        tag_embs = self.tag_embs(nd.array(tag_inputs))

        # Dropout
        emb_inputs = nd.concat(word_embs, tag_embs, dim=2)  # seq_len x batch_size

        top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size,
                           dropout_x=self.dropout_lstm_input if is_train else 0)
        top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp)

        W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data()
        W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data()
        dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head)
        dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0],
                                                                                       p=self.dropout_mlp)
        dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1])
        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        W_arc = self.arc_W.data()
        arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1,
                              bias_x=True, bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)

        arc_preds = arc_logits.argmax(0)
        # seq_len x batch_size

        if is_train or arc_targets is not None:
            correct = np.equal(arc_preds.asnumpy(), arc_targets)
            arc_correct = correct.astype(np.float32) * mask
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = flatten_numpy(arc_targets)
            losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
            arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            arc_probs = np.transpose(
                np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F'))
        # #batch_size x #dep x #head

        W_rel = self.rel_W.data()
        rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                              num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size))
        # (#head x rel_size) x (#dep x batch_size)

        _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape(
            seq_len * batch_size, 1)
        _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size))

        partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0)
        # (rel_size) x (#dep x batch_size)

        if is_train or arc_targets is not None:
            rel_preds = partial_rel_logits.argmax(0)
            targets_1D = flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D))
            rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(),
                                                (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))
        # batch_size x #dep x #head x #nclasses

        if is_train or arc_targets is not None:
            loss = arc_loss + rel_loss
            correct = rel_correct * flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if is_train:
            return arc_accuracy, rel_accuracy, overall_accuracy, loss

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs):
            # parse sentences one by one
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = rel_argmax(rel_prob, sent_len)
            outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len]))

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs
Esempio n. 40
0
 def forward(self, inpt):
     fwd = self._lstm_fwd(inpt)
     bwd_inpt = nd.flip(inpt, 0)
     bwd = self._lstm_bwd(bwd_inpt)
     bwd = nd.flip(bwd, 0)
     return nd.concat(fwd, bwd, dim=2)
Esempio n. 41
0
def concat_predictions(preds):
    return nd.concat(*preds, dim=1)
Esempio n. 42
0
    def forward(self, is_train, req, in_data, out_data, aux):
        # im_info.shape(batch_size, 3)
        rpn_cls_score = in_data[0]
        gt_boxes = in_data[1]
        im_info = in_data[2]
        base_anchors = in_data[3]
        feat_stride = in_data[4]
        allowed_border = in_data[5]

        ctx = rpn_cls_score.context
        batch_size = rpn_cls_score.shape[0]
        feat_height, feat_width = rpn_cls_score.shape[-2:]
        A = base_anchors.shape[0]
        K = feat_height * feat_width
        N = K * A
        # generate anchors shifts
        shift_x = (nd.arange(0, feat_width, ctx=ctx) * feat_stride). \
            broadcast_to((feat_height, feat_width)).reshape(K)
        shift_y = (nd.arange(0, feat_height, ctx=ctx) * feat_stride). \
            reshape(feat_height, 1).broadcast_to((feat_height, feat_width)).reshape(K)
        # add A anchors (1, A, 4) to cell K shifts (K, 1, 4) to get shift anchors (K, A, 4)
        # then reshape and broadcast to (batch_size, K*A, 4) shifted anchors
        shifts = nd.stack(shift_x, shift_y, shift_x, shift_y,
                          axis=-1).reshape(K, 1, 4)
        all_anchors = (base_anchors.reshape((1, A, 4)) + shifts).reshape(1, N, 4) \
            .broadcast_to((batch_size, N, 4))

        # keep only inside anchors, set outside anchors coordinate = (-1, -1, -1, -1)
        inside_bool_mask = (all_anchors[:, :, 0] >= -allowed_border) * \
                           (all_anchors[:, :, 1] >= -allowed_border) * \
                           (all_anchors[:, :, 2] < (im_info[:, 1] + allowed_border).reshape(0, 1)) * \
                           (all_anchors[:, :, 3] < (im_info[:, 0] + allowed_border).reshape(0, 1))
        all_anchors[:] = inside_bool_mask.reshape(batch_size, -1,
                                                  1) * (all_anchors + 1) - 1

        overlaps = bbox_overlaps(gt_boxes, all_anchors)
        # get max iou anchor for each gt_boxes
        gt_max_overlaps = overlaps.max(axis=2)
        gt_argmax_overlaps = overlaps.argmax(axis=2)
        # get max iou for each anchors
        max_overlaps = overlaps.max(axis=1)
        argmax_overlaps = overlaps.argmax(axis=1)
        # set positive anchor label=1, other=0
        labels = max_overlaps >= self._positive_iou_th
        # set neither positive nor negative anchor label = -1
        labels[:] = labels - ((max_overlaps > self._negative_iou_th) *
                              (max_overlaps < self._positive_iou_th))
        # set max iou anchor for each gt_boxes label >=  1 (<=3) and ignore padded gt_box
        batch_idx = nd.arange(batch_size, ctx=ctx).reshape(-1, 1)
        labels[batch_idx, gt_argmax_overlaps] = labels[
            batch_idx, gt_argmax_overlaps] + 2 * (gt_max_overlaps > 0)
        # set outside anchor label <= -1
        # then remain label=0 is negative samples
        labels[:] = labels - 4 * (1 - inside_bool_mask)
        # clip label values to -1, 0, 1
        labels[:] = nd.clip(labels, -1, 1)

        # random choice labels
        labels_with_idx = nd.concat(labels.transpose(),
                                    nd.arange(N, ctx=ctx).reshape(-1, 1),
                                    dim=1)
        # column 0:batch_size is label, column batch_size is labels original index
        rand_labels_with_idx = nd.random.shuffle(labels_with_idx)
        # may include some bg_label if labels==1 num < num_fg
        fg_rand_labels_idx = rand_labels_with_idx[:, :batch_size].argsort(
            axis=0, is_ascend=0)[:self._rpn_fg_num]
        # use abs() to invert all label=-1, so that label=0 will at top after ascend sort
        abs_rand_labels = nd.abs(rand_labels_with_idx[:, :batch_size])
        # set fg_label=-1 to let it at top after ascend sort
        abs_rand_labels[fg_rand_labels_idx, batch_idx.transpose()] = -1
        # select rand labels idx that will be excluded
        exclude_rand_labels_idx = abs_rand_labels.argsort(
            axis=0, is_ascend=1)[self._rpn_batch_size:]
        # get original label index
        exclude_labels_idx = rand_labels_with_idx[exclude_rand_labels_idx,
                                                  batch_size]
        # set exclude label = -1

        labels[batch_idx, exclude_labels_idx.transpose()] = -1

        # assign gt_boxes to anchor, anchor box_target is its max iou gt_box
        bbox_targets = nd.empty((batch_size, N, 4), ctx=ctx)
        bbox_targets[:] = bbox_transform(
            all_anchors, gt_boxes[batch_idx, argmax_overlaps, :4])

        labels = labels.reshape((batch_size, feat_height, feat_width,
                                 A)).transpose(axes=(0, 3, 1, 2))
        labels = labels.reshape((batch_size, A * feat_height * feat_width))
        bbox_targets = bbox_targets.reshape(
            (batch_size, feat_height, feat_width, A,
             4)).transpose(axes=(0, 4, 3, 1, 2))
        bbox_targets = bbox_targets.reshape(
            (batch_size, 4, A * feat_height * feat_width))

        out_data[0][:] = labels
        out_data[1][:] = bbox_targets
Esempio n. 43
0
 def forward(self, X):
     return nd.concat(*X, dim=self._concat_dim)
Esempio n. 44
0
 def forward(self, enc1, enc2):
     x = nd.concat(enc1, enc2)
     x = self.dense(x)
     x = nd.log_softmax(x)
     return x
Esempio n. 45
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, 
                          learning_rate, clipping_norm, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    """Train an RNN model and predict the next item in the sequence."""
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1): 
        # If consecutive sampling is used, in the same epoch, the hidden state
        # is initialized only at the beginning of the epoch.
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps, 
                                     ctx):
            # If random sampling is used, the hidden state has to be
            # initialized for each mini-batch.
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs shape: (batch_size, vocab_size)
                if is_lstm:
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params) 
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # Let t_ib_j be the j-th element of the mini-batch at time i.
                # label shape: (batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
                label = label.T.reshape((-1,))
                # Concatenate outputs:
                # shape: (batch_size * num_steps, vocab_size).
                outputs = nd.concat(*outputs, dim=0)
                # Now outputs and label are aligned.
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_norm, ctx)
            SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Training perplexity %f" % (e, 
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print(' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm))
            print()
Esempio n. 46
0
 def forward(self, X):
     for blk in self.net:
         Y = blk(X)
         X = nd.concat(X, Y, dim = 1)
     return Y
Esempio n. 47
0
    def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0)
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)

            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0])
            class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1)
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]]
            np_anchors = all_anchors.asnumpy()

            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    if valid_gts[b, m] < 1:
                        break
                    match = int(matches[b, m])
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[b, index, match, 1] = gty / orig_height * height - loc_y  # ty
                    scale_targets[b, index, match, 0] = np.log(max(gtw, 1) / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(max(gth, 1) / np_anchors[match, 1])

                    weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    objectness[b, index, match, 0] = (
                        np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors, num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors, num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors, num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets
Esempio n. 48
0
    def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0)
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)
            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0])
            class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1)
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    if valid_gts[b, m] < 1:
                        break
                    match = int(matches[b, m])
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[b, index, match, 1] = gty / orig_height * height - loc_y  # ty
                    scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1])
                    weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    objectness[b, index, match, 0] = (
                        np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors, num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors, num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors, num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets