def forward(self, x):
     # Because this encoder decoder setup uses convolutional layers 
     # There is no need to flatten anything
     # x.shape = (batch_size, n_channels, width, height)
     
     # Get the latent layer
     latent_layer = self.encoder(x)
     
     # Split the latent layer into latent means and latent log vars
     latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
     latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]
     
     # Compute the latent variable with reparametrization trick applied
     eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX)
     latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps
     
     # Compute the KL Divergence between latent variable and standard normal
     kl_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar),
                                      axis=1)
     
     # Use the decoder to generate output
     x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1)))
     
     # Compute the pixel-by-pixel loss; this requires that x and x_hat be flattened
     x_flattened = x.reshape((x.shape[0], -1))
     x_hat_flattened = x_hat.reshape((x_hat.shape[0], -1))
     logloss = - nd.sum(x_flattened*nd.log(x_hat_flattened + 1e-10) +
                             (1-x_flattened)*nd.log(1-x_hat_flattened+1e-10),
                             axis=1)
     
     # Sum up the loss
     loss = kl_div_loss + logloss * self.pbp_weight
     
     return loss
Example #2
0
def coordinate_distance(target, label):
    target_xy, target_wh = nd.split(target, 2, -1)
    label_xy, label_wh = nd.split(label, 2, -1)
    dxy = target_xy - label_xy
    dwh = nd.log(target_wh / label_wh)
    distance = nd.concat(dxy, dwh, dim=-1)
    return distance
    def generate(self, x):
        # Because forward() returns the loss values, we still need a method that returns the generated image
        # Which is basically the forward process, up to (not including) the flattening of x_hat

        # x should be image arrays (4-dimensional) but encoder should be able
        # to handle this so I am not going flatten it

        # Use the encoder network to compute the values of latent layers
        latent_layer = self.encoder(x)

        # Split the latent layer into latent means and latent log vars
        latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
        latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]

        # Use the reparametrization trick to ensure differentiability of the latent
        # variable
        eps = nd.random_normal(loc=0,
                               scale=1,
                               shape=(x.shape[0], self.n_latent),
                               ctx=CTX)
        latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps

        # Use the decoder to generate output, then flatten it to compute loss
        return self.decoder(latent_z).reshape(-1, self.n_out_channels,
                                              self.out_width, self.out_height)
    def generate(self, x):
        # Repeat the process of forward, but stop at x_hat and return it
        # input x is image and thus 4-dimensional ndarray
        batch_size, n_channels_in, input_width, input_height = x.shape

        # First run it through the encoder

        x_flattened = x.reshape(batch_size, -1)
        latent_layer = self.encoder(x_flattened)

        # Split latent layer into latent mean and latent log variances
        latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
        latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]

        # Compute the latent variable's value using the reparametrization trick
        eps = nd.random_normal(loc=0,
                               scale=1,
                               shape=(batch_size, self.n_latent),
                               ctx=CTX)
        latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps

        # At this point, also compute the KL_Divergence between latent variable and
        # Gaussian(0, 1)
        KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean *
                                    latent_mean - nd.exp(latent_logvar),
                                    axis=1)

        # Run the latent variable through the decoder to get the flattened generated image
        x_hat_flattened = self.decoder(latent_z)

        # Inflate the flattened output to be fed into the discriminator
        x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width,
                                        input_height)

        return x_hat
    def forward(self, x):
        # x is input of shape (n_batch, n_channels, width, height)
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)
        self.loss_net.batch_size = batch_size

        # Get the latent layer
        latent_vals = self.encoder(x)

        # Split the latent layer into latent means and latent log vars
        latent_mean = nd.split(latent_vals, axis=1, num_outputs=2)[0]
        latent_logvar = nd.split(latent_vals, axis=1, num_outputs=2)[1]

        # Use the reparametrization trick to ensure differentiability of the latent
        # variable
        eps = nd.random_normal(loc=0,
                               scale=1,
                               shape=(batch_size, self.n_latent),
                               ctx=CTX)
        latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps

        # Use the decoder to generate output
        x_hat = self.decoder(latent_z)
        self.x_hat = x_hat

        # Use the vgg loss net to compute the loss
        loss = self.loss_net(x, x_hat)
        return loss
Example #6
0
 def matmul(self, x, y, transpose_a=False,transpose_b=False):
     x = nd.split(x, self.embedding_size, 2)
     y = nd.split(y, self.embedding_size, 2)
     res = []
     for idx in range(self.embedding_size):
         array = nd.batch_dot(x[idx], y[idx], transpose_a,transpose_b=transpose_b)
         res.append(array.asnumpy().tolist())
     return nd.array(res,ctx=self.ctx)
Example #7
0
    def hybrid_forward(self, F, score_gt, kernel_gt, score_pred,
                       training_masks, *args, **kwargs):

        # cal ohem mask
        selected_masks = []
        for i in range(score_gt.shape[0]):
            # cal for text region
            selected_mask = self._ohem_single(score_gt[i:i + 1],
                                              score_pred[i:i + 1],
                                              training_masks[i:i + 1])
            selected_masks.append(selected_mask)
        selected_masks = F.concat(*selected_masks, dim=0)

        s1, s2, s3, s4, s5, s6 = F.split(kernel_gt,
                                         num_outputs=6,
                                         axis=3,
                                         squeeze_axis=True)
        s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split(
            score_pred, num_outputs=7, axis=1, squeeze_axis=True)

        self.pixel_acc = batch_pix_accuracy(C_pred, score_gt)
        # for text map
        eps = 1e-5
        intersection = F.sum(score_gt * C_pred * selected_masks, axis=1)
        union = F.sum(score_gt * selected_masks, axis=1) + F.sum(
            C_pred * selected_mask, axis=1) + eps
        C_dice_loss = 1. - F.mean((2 * intersection / union))

        # loss for kernel
        kernel_dices = []
        for s, s_pred in zip(
            [s1, s2, s3, s4, s5, s6],
            [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]):
            kernel_mask = F.where(C_pred > 0.5, F.ones_like(s_pred),
                                  F.zeros_like(s_pred))
            kernel_mask = F.cast(kernel_mask, dtype='float32')
            kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt),
                                 dtype='float32')
            s = F.cast(s, dtype='float32')
            kernel_intersection = F.sum(s * s_pred * training_masks *
                                        kernel_mask,
                                        axis=1)
            kernel_union = F.sum(
                training_masks * s * kernel_mask, axis=1) + F.sum(
                    training_masks * s_pred * kernel_mask, axis=1) + eps
            kernel_dice = 2. * kernel_intersection / kernel_union
            kernel_dice = 1. - F.mean(
                (2. * kernel_intersection / kernel_union))
            kernel_dices.append(kernel_dice)
        kernel_dice_loss = F.mean(F.array(kernel_dices))

        self.kernel_loss = kernel_dice_loss
        self.C_loss = C_dice_loss

        loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss

        return loss
    def forward(self, x, first_cycle=False):
        # input x is image and thus 4-dimensional ndarray
        batch_size, n_channels_in, input_width, input_height = x.shape

        # First run it through the encoder

        x_flattened = x.reshape(batch_size, -1)
        latent_layer = self.encoder(x_flattened)

        # Split latent layer into latent mean and latent log variances
        latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
        latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]

        # Compute the latent variable's value using the reparametrization trick
        eps = nd.random_normal(loc=0,
                               scale=1,
                               shape=(batch_size, self.n_latent),
                               ctx=CTX)
        latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps

        # At this point, also compute the KL_Divergence between latent variable and
        # Gaussian(0, 1)
        KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean *
                                    latent_mean - nd.exp(latent_logvar),
                                    axis=1)

        # Run the latent variable through the decoder to get the flattened generated image
        x_hat_flattened = self.decoder(latent_z)

        # Inflate the flattened output to be fed into the discriminator
        x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width,
                                        input_height)

        # Content loss is given by the resnet
        # In later training process we will feed the discriminator genuine and generated images
        # with genuine images labeled 1 and generated images labeled 0
        # in this case a higher value in ResNet's output indicate higher confidence of
        # an image's realness; therefore we want to reduce the negative of the ResNet's output
        content_loss = -nd.sigmoid(self.discriminator(x_hat)).reshape(-1)

        # For the first training cycle, resnet is completely not trained
        # so we will not use the resnet as a content loss metric; instead we will use
        # the logloss as a content loss
        if first_cycle:
            content_loss = -nd.sum(
                x_flattened * nd.log(x_hat_flattened + 1e-10) +
                (1 - x_flattened) * nd.log(1 - x_hat_flattened + 1e-10),
                axis=1)

        # Loss is the sum of KL_Divergence and the content loss
        loss = KL_div_loss + content_loss

        return loss
Example #9
0
def _split_batch(arg, batch_axis, arg_size):
    if isinstance(arg, nd.NDArray):
        return nd.split(arg, arg_size,
                        axis=batch_axis) if arg_size > 1 else (arg, )
    arg, fmt = _flatten(arg)
    if arg_size > 1:
        result = (nd.split(x, arg_size, axis=batch_axis) for x in arg)
    else:
        result = ((x, ) for x in arg)
    result = zip(*result)
    out = [_regroup(x, fmt)[0] for x in result]
    return out
Example #10
0
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None):

    xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1)
    bbox_width = xmax - xmin + 1.
    bbox_height = ymax - ymin + 1.
    center_x = 0.5 * (xmin + xmax)
    center_y = 0.5 * (ymin + ymax)

    bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4))
    dx, dy, dw, dh = nd.split(data=bbox_delta_reshape,
                              num_outputs=4,
                              axis=2,
                              squeeze_axis=1)
    if (means is not None) and (stds is not None):
        dx = dx * stds[0] + means[0]
        dy = dy * stds[1] + means[1]
        dw = dw * stds[2] + means[2]
        dh = dh * stds[3] + means[3]

    refine_center_x = nd.broadcast_add(lhs=center_x,
                                       rhs=nd.broadcast_mul(lhs=bbox_width,
                                                            rhs=dx))
    refine_center_y = nd.broadcast_add(lhs=center_y,
                                       rhs=nd.broadcast_mul(lhs=bbox_height,
                                                            rhs=dy))
    refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw))
    refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh))
    w_offset = 0.5 * (refined_width - 1.)
    h_offset = 0.5 * (refined_height - 1.)
    refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1)
    refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1)
    refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1)
    refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1)

    refined_bbox = nd.concat(refined_xmin,
                             refined_ymin,
                             refined_xmax,
                             refined_ymax,
                             dim=1)
    if im_info is not None:
        # assume im_info [[height, width, scale]] with shape (1,3)
        im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2)
        im_wh = nd.reverse(im_hw, axis=1)
        im_wh = im_wh - 1.
        im_wh = nd.tile(data=im_wh, reps=(1, 2))
        im_wh = nd.Reshape(im_wh, shape=(1, 4, 1))
        refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh)
        refined_bbox = nd.broadcast_maximum(lhs=refined_bbox,
                                            rhs=nd.zeros_like(refined_bbox))
    # print refined_bbox.debug_str()
    return refined_bbox
Example #11
0
 def matmul(self, x, y, transpose_a=False, transpose_b=False):
     batch = x.shape[0]  #batch
     m = x.shape[1]  #field
     h_k = y.shape[1]
     x = nd.split(x, self.embedding_size, 2)
     y = nd.split(y, self.embedding_size, 2)
     res = nd.zeros(shape=(1, batch, m, h_k), ctx=self.ctx)
     for idx in range(self.embedding_size):
         array = nd.batch_dot(x[idx],
                              y[idx],
                              transpose_a,
                              transpose_b=transpose_b).reshape(
                                  (1, -1, m, h_k))
         res = nd.concat(res, array, dim=0)  # embedding+1,batch,field,field
     return res[1:, :, :, :]
Example #12
0
    def backward(self, grad_output):
        X, W = self.saved_tensors

        # recompute X_out
        X_list = [
            X,
        ]
        for A in self.A_list:
            if A is not None:
                X_list.append(nd.sparse.dot(A, X))
            else:
                X_list.append(nd.zeros_like(X))
        X_out = nd.concat(*X_list, dim=1)

        grad_W = nd.dot(X_out.T, grad_output)

        grad_X_out = nd.dot(grad_output, W.T)
        grad_X_out_list = nd.split(grad_X_out,
                                   num_outputs=len(self.A_list) + 1)

        grad_X = [
            grad_X_out_list[0],
        ]
        for A, grad_X_out in zip(self.A_list, grad_X_out_list[1:]):
            if A is not None:
                grad_X.append(nd.sparse.dot(A, grad_X_out))
            else:
                grad_X.append(nd.zeros_like(grad_X_out))

        grad_X = sum(grad_X)

        return grad_X, grad_W
Example #13
0
def extract_multi_position_matrix_nd(bbox):
    bbox = nd.transpose(bbox, axes=(1, 0, 2))
    xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=2)
    # [num_fg_classes, num_boxes, 1]
    bbox_width = xmax - xmin + 1.
    bbox_height = ymax - ymin + 1.
    center_x = 0.5 * (xmin + xmax)
    center_y = 0.5 * (ymin + ymax)
    # [num_fg_classes, num_boxes, num_boxes]
    delta_x = nd.broadcast_minus(lhs=center_x,
                                 rhs=nd.transpose(center_x, axes=(0, 2, 1)))
    delta_x = nd.broadcast_div(delta_x, bbox_width)
    delta_x = nd.log(nd.maximum(nd.abs(delta_x), 1e-3))

    delta_y = nd.broadcast_minus(lhs=center_y,
                                 rhs=nd.transpose(center_y, axes=(0, 2, 1)))
    delta_y = nd.broadcast_div(delta_y, bbox_height)
    delta_y = nd.log(nd.maximum(nd.abs(delta_y), 1e-3))

    delta_width = nd.broadcast_div(lhs=bbox_width,
                                   rhs=nd.transpose(bbox_width,
                                                    axes=(0, 2, 1)))
    delta_width = nd.log(delta_width)

    delta_height = nd.broadcast_div(lhs=bbox_height,
                                    rhs=nd.transpose(bbox_height,
                                                     axes=(0, 2, 1)))
    delta_height = nd.log(delta_height)
    concat_list = [delta_x, delta_y, delta_width, delta_height]
    for idx, sym in enumerate(concat_list):
        concat_list[idx] = nd.expand_dims(sym, axis=3)
    position_matrix = nd.concat(*concat_list, dim=3)
    return position_matrix
Example #14
0
    def forward(self, inputs, batch_size):
        sequence_length_ = len(inputs)
        sequence_length = 0
        for j in range(sequence_length_):  # 函数目的是去掉padding
            if (inputs[j, 0].asscalar() <= 0):
                sequence_length = j
                break
            else:
                sequence_length = sequence_length_

        if (sequence_length == 0):
            print("sequence_length=0")
            print(inputs)
            return

        inputs = inputs[0:sequence_length, :]

        # Get the emission scores from the BiLSTM.
        # inputs.shape: (sequence_length, batch_size)
        lstm_feats = self._get_lstm_features(inputs, batch_size)
        '''
        目的:将[sequence_length, batch_size, tagset_size]维度转换为[batch_size, sequence_length, tagset_size]
        '''
        # outputs.shape: batch_size个(sequence_length, tagset_size)
        lstm_feats = nd.split(lstm_feats, num_outputs=batch_size, axis=0)
        # outputs.shape: (sequence_length, tagset_size)
        lstm_feats = nd.concat(*lstm_feats,
                               dim=0).reshape(sequence_length,
                                              self.tagset_size)

        # Find the best path, given the features.
        tag_seq, score = self._viterbi_decode(lstm_feats)
        return tag_seq, score
Example #15
0
def evaluate(data_iter_valid, model, state, loss, word_vocab, label_vocab,
             max_seq_len, only_ne_cate_dic):
    valid_loss = 0.

    y_true, y_pred, sentences_input = [], [], []
    for n_batch, (batch_x, batch_nature,
                  batch_y) in enumerate(data_iter_valid):
        batch_score, batch_pred, feats, _ = model(batch_x, batch_nature, state)
        l = loss(feats, nd.split(batch_y, max_seq_len, axis=1))

        y_pred.append(batch_pred.asnumpy().astype(np.int32, copy=False))
        y_true.append(batch_y.asnumpy().astype(np.int32, copy=False))
        sentences_input.append(batch_x.asnumpy().astype(np.int32, copy=False))

        valid_loss += l.mean().asscalar()

    y_pred = np.vstack(y_pred)
    y_true = np.vstack(y_true)
    sentences_input = np.vstack(sentences_input)
    valid_loss /= (n_batch + 1)

    # 计算训练集上的 P R F1
    raw_prf_dic = cal_prf1(y_pred.tolist(), y_true.tolist(),
                           sentences_input.tolist(), label_vocab, word_vocab,
                           max_seq_len, only_ne_cate_dic)

    prf_dic = convert_signal_to_ne_name(only_ne_cate_dic, raw_prf_dic)
    prf_dic = pd.DataFrame(list(prf_dic.values()),
                           index=list(prf_dic.keys()),
                           columns=['P', 'R', 'F1'])

    return prf_dic, valid_loss
Example #16
0
    def predict_LP(self, LP_batch_out):
        # LP_batch_out = self.fp16_2_fp32(LP_batch_out)
        LP_batch_out = self.merge_and_slice(LP_batch_out, self.LP_slice_point)

        LP_score = nd.sigmoid(LP_batch_out[0])
        LP_pose_xy = LP_batch_out[1]
        LP_pose_z = LP_batch_out[2]
        LP_pose_r = LP_batch_out[3]
        LP_batch_out = nd.concat(
            LP_score, LP_pose_xy, LP_pose_z, LP_pose_r, dim=-1)

        LP_batch_out = nd.split(LP_batch_out, axis=0, num_outputs=len(LP_batch_out))

        LP_batch_pred = []
        for i, out in enumerate(LP_batch_out):
            best_index = LP_score[i].reshape(-1).argmax(axis=0)
            out = out.reshape((-1, 7))

            pred = out[best_index][0]  # best out
            pred[1:7] = self.LP_pose_activation(pred[1:7])
            LP_batch_pred.append(nd.expand_dims(pred, axis=0))

        LP_batch_pred = nd.concat(*LP_batch_pred, dim=0)

        return LP_batch_pred.asnumpy()
Example #17
0
 def _split_box(x, num_outputs, axis, squeeze_axis=False):
     a = nd.split(x,
                  axis=axis,
                  num_outputs=num_outputs,
                  squeeze_axis=squeeze_axis)
     if not isinstance(a, (list, tuple)):
         return [a]
     return a
    def forward(self, x):
        # x is input of shape (n_batch, n_channels, width, height)
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)

        # Get the latent layer
        latent_layer = self.encoder(x)

        # Split the latent layer into latent means and latent log vars
        latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
        latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]

        # Use the reparametrization trick to ensure differentiability of the latent
        # variable
        eps = nd.random_normal(loc=0,
                               scale=1,
                               shape=(batch_size, self.n_latent),
                               ctx=CTX)
        latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps

        # Use the decoder to generate output
        x_hat = self.decoder(latent_z)

        # Compute the KL_Divergence between latent variable and standard normal
        self.KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean *
                                         latent_mean - nd.exp(latent_logvar),
                                         axis=1)

        # Compute the content loss that is the cross entropy between the original image
        # and the generated image
        # content_loss = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)(x_hat, x.reshape(batch_size, -1))

        # Add 1e-10 to prevent log(0) from happening
        self.logloss = -nd.sum(x * nd.log(x_hat + 1e-10) +
                               (1 - x) * nd.log(1 - x_hat + 1e-10),
                               axis=1)

        # Try l2 loss, too
        # self.l2loss = nd.sum((x_hat - x) ** 2, axis = 1)

        # Sum up the loss
        loss = self.KL_div_loss + self.logloss
        return loss
Example #19
0
    def hybrid_forward(self, F_geo_true, F_geo_pred):
        top_true, right_true, bottom_true, left_true, theta_true = nd.split(
            F_geo_true, axis=3, num_outputs=5)
        top_pred, right_pred, bottom_pred, left_pred, theta_pred = nd.split(
            F_geo_pred, axis=3, num_outputs=5)
        area_true = (top_true + bottom_true) * (right_true + left_true)
        area_pred = (top_pred + bottom_pred) * (right_pred + left_pred)
        w_union = mx.nd.minimum(right_true, right_pred) + mx.nd.minimum(
            left_true, left_pred)
        h_union = mx.nd.minimum(top_true, top_pred) + mx.nd.minimum(
            bottom_true, bottom_pred)
        area_intersect = w_union * h_union
        area_union = area_true + area_pred - area_intersect
        L_AABB = -nd.log((area_intersect + 1.0) / (area_union + 1.0))
        L_theta = 1 - nd.cos(theta_pred - theta_true)
        L_geo = L_AABB + self.lambda_value * L_theta
        loss = mx.nd.sum(L_geo * F_geo_true)

        return loss
 def generate(self, x):
     # Generate an image given the input
     # input is
     # x.shape = (batch_size, n_channels, width, height)
     
     # Get the latent layer
     latent_layer = self.encoder(x)
     
     # Split the latent layer into latent means and latent log vars
     latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0]
     latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1]
     
     # Compute the latent variable with reparametrization trick applied
     eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX)
     latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps
     
     # Use the decoder to generate output
     x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1)))
     return x_hat
Example #21
0
def BBoxCornerToCenter(x, axis=-1, split=False):
    xmin, ymin, xmax, ymax = nd.split(x, axis=axis, num_outputs=4)
    width = xmax - xmin
    height = ymax - ymin
    x = xmin + width / 2
    y = ymin + height / 2
    if not split:
        return nd.concat(x, y, width, height, dim=axis)
    else:
        return x, y, width, height
Example #22
0
    def hybrid_forward(self, F, x):
        x = self.base(x)

        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)

        # local
        outputs = []
        features = []

        parts_2 = nd.split(x2, axis=2, num_outputs=2)
        for i in range(2):
            part = self.feat[i](parts_2[i])
            if self.pretrained:
                part = self.classify[i](part)
            outputs.append(part)

        parts_3 = nd.split(x3, axis=2, num_outputs=3)
        for i in range(3):
            part = self.feat[i+2](parts_3[i])
            if self.pretrained:
                part = self.classify[i+2](part)
            outputs.append(part)
        
        # global
        g_part_1 = self.g_feat[0](x1)
        g_part_2 = self.g_feat[1](x2)
        g_part_3 = self.g_feat[2](x3)
        features.append(g_part_1)
        features.append(g_part_2)
        features.append(g_part_3)
        if self.pretrained:
            g_part_1 = self.g_classify[0](g_part_1)
            g_part_2 = self.g_classify[1](g_part_2)
            g_part_3 = self.g_classify[2](g_part_3)
        outputs.append(g_part_1)
        outputs.append(g_part_2)
        outputs.append(g_part_3)

        return outputs, features
Example #23
0
def BBoxCenterToCorner(x, axis=-1, split=False):
    x, y, w, h = nd.split(x, axis=axis, num_outputs=4)
    hw = w / 2
    hh = h / 2
    xmin = x - hw
    ymin = y - hh
    xmax = x + hw
    ymax = y + hh
    if not split:
        return nd.concat(xmin, ymin, xmax, ymax, dim=axis)
    else:
        return xmin, ymin, xmax, ymax
Example #24
0
def train(data_iter_train, data_iter_valid, model, loss, trainer, CTX,
          num_epochs, word_vocab, label_vocab, max_seq_len, ne_cate_dic):
    print('Train on ', CTX)
    only_ne_cate_dic = ne_cate_dic.copy()
    only_ne_cate_dic.pop('不是实体')
    print(only_ne_cate_dic)
    print(ne_cate_dic)
    for epoch in range(1, num_epochs + 1):
        start = time()
        states = None
        for n_batch, (batch_x, batch_nature,
                      batch_y) in enumerate(data_iter_train):
            with autograd.record():
                batch_score, batch_pred, feats, _ = model(
                    batch_x, batch_nature, states)
                l = loss(feats, nd.split(batch_y, max_seq_len, axis=1))
            l.backward()
            trainer.step(batch_x.shape[0])

            # 每隔 skip_step ,采样看看
            if (n_batch + 1) % 100 == 0:
                print("Epoch {0}, n_batch {1}, loss {2}".format(
                    epoch, n_batch + 1,
                    l.mean().asscalar()))
                batch_y = batch_y.asnumpy().astype(np.int32, copy=False)
                batch_pred = batch_pred.asnumpy().astype(np.int32, copy=False)
                for example in range(3):
                    true_idx = batch_y[example].tolist()
                    pred_idx = batch_pred[example].tolist()

                    true_label = label_vocab.to_tokens(true_idx)
                    pred_label = label_vocab.to_tokens(pred_idx)

                    print("    Sample {0}: ".format(example))
                    print("    True Label {0}: ".format(true_label))
                    print("    Pred Label {0}: ".format(pred_label))
        # 在训练集上评估
        print('Evaluating...')

        prf_dic_train, train_loss = evaluate(data_iter_train, model, states,
                                             loss, word_vocab, label_vocab,
                                             max_seq_len, only_ne_cate_dic)
        prf_dic_valid, valid_loss = evaluate(data_iter_valid, model, states,
                                             loss, word_vocab, label_vocab,
                                             max_seq_len, only_ne_cate_dic)

        print("===========================================")
        print("Epoch {0}, epoch_loss_train {1}, epoch_loss_valid {2}".format(
            epoch, train_loss, valid_loss))
        print(prf_dic_train)
        print(prf_dic_valid)
        print("===========================================")
        print()
Example #25
0
 def get_indices(self, boxes):
     W, H = self.image_width, self.image_height
     w, h = self.patch_width, self.patch_height
     x_min, y_min, x_max, y_max = F.split(data=boxes, num_outputs=4, axis=1)
     cx = 0.5 * (x_min + x_max)
     cy = 0.5 * (y_min + y_max)
     indices = 1 + mx.nd.floor(
         cx / w) + math.floor(W / w) * mx.nd.floor(cy / h)
     indices = mx.nd.Concat(mx.nd.zeros(shape=(1, 1), ctx=mx.gpu()),
                            indices,
                            dim=0)
     return indices.reshape(1, -1)
Example #26
0
    def forward(self, feature, data, begin_state):
        num_nodes, batch_size, length, _ = data.shape

        data = nd.split(data, axis=2, num_outputs=length, squeeze_axis=1)

        outputs, state = [], begin_state
        for input in data:
            output, state = self.forward_single(feature, input, state)
            outputs.append(output)

        outputs = nd.stack(*outputs, axis=2)
        return outputs, state
Example #27
0
    def hybrid_forward(self, F, score_gt, kernel_gt, score_pred,
                       training_masks, *args, **kwargs):
        s1, s2, s3, s4, s5, s6 = F.split(kernel_gt,
                                         num_outputs=6,
                                         axis=3,
                                         squeeze_axis=True)
        s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split(
            score_pred, num_outputs=7, axis=1, squeeze_axis=True)

        self.pixel_acc = batch_pix_accuracy(C_pred, score_gt)
        # classification loss
        eps = 1e-5
        intersection = F.sum(score_gt * C_pred * training_masks, axis=1)
        union = F.sum(training_masks * score_gt, axis=1) + F.sum(
            training_masks * C_pred, axis=1) + eps
        C_dice_loss = 1. - F.mean((2 * intersection / union))
        # loss for kernel
        kernel_dices = []
        for s, s_pred in zip(
            [s1, s2, s3, s4, s5, s6],
            [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]):
            kernel_mask = F.where((C_pred * training_masks > 0.5),
                                  F.ones_like(C_pred), F.zeros_like(C_pred))
            kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt),
                                 dtype='float32')

            s = F.cast(s, dtype='float32')
            kernel_intersection = F.sum(s * s_pred * kernel_mask, axis=1)
            kernel_union = F.sum(s * kernel_mask, axis=1) + F.sum(
                s_pred * kernel_mask, axis=1) + eps
            kernel_dice = 1. - F.mean(
                (2. * kernel_intersection / kernel_union))
            kernel_dices.append(kernel_dice.asscalar())
        kernel_dice_loss = F.mean(F.array(kernel_dices))
        # print("kernel_loss:", kernel_dice_loss)
        self.C_loss = C_dice_loss
        self.kernel_loss = kernel_dice_loss
        loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss

        return loss
Example #28
0
 def forward(self, x):
     with x.context:
         x = self.conv1(x)
         x = nd.split(x, num_outputs=32, axis=1)
         cups = []
         for i in range(32):
             xi = getattr(self, "caps{}".format(i))(x[i])
             xi = nd.dot(xi, self.w[i].data())
             cups += [xi]
         x = nd.concat(*cups)
         x = self.digitcap(x)
         x = nd.sum(x, axis=[1, 2])
     return x
Example #29
0
def bbox_iou(lhs, rhs, x1y1x2y2=True):
    if x1y1x2y2:
        b1_xmin, b1_ymin, b1_xmax, b1_ymax = nd.split(lhs,
                                                      axis=-1,
                                                      num_outputs=4)
        b2_xmin, b2_ymin, b2_xmax, b2_ymax = nd.split(rhs,
                                                      axis=-1,
                                                      num_outputs=4)
    else:
        b1_x, b1_y, b1_w, b1_h = nd.split(lhs, axis=-1, num_outputs=4)
        b2_x, b2_y, b2_w, b2_h = nd.split(rhs, axis=-1, num_outputs=4)

        b1_xmin, b1_xmax = b1_x - b1_w / 2., b1_x + b1_w / 2.
        b1_ymin, b1_ymax = b1_y - b1_h / 2., b1_y + b1_h / 2.
        b2_xmin, b2_xmax = b2_x - b2_w / 2., b2_x + b2_w / 2.
        b2_ymin, b2_ymax = b2_y - b2_h / 2., b2_y + b2_h / 2.

    # Intersection area
    MAX = 1e5
    inter_w = nd.clip(
        nd.minimum(b1_xmax, b2_xmax) - nd.maximum(b1_xmin, b2_xmin), 0, MAX)
    inter_h = nd.clip(
        nd.minimum(b1_ymax, b2_ymax) - nd.maximum(b1_ymin, b2_ymin), 0, MAX)
    # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w)
    # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h)
    inter = inter_w * inter_h

    # Union Area
    w1, h1 = b1_xmax - b1_xmin, b1_ymax - b1_ymin
    w2, h2 = b2_xmax - b2_xmin, b2_ymax - b2_ymin
    # w1 = F.where(w1 < 0., F.zeros_like(w1), w1)
    # h1 = F.where(h1 < 0., F.zeros_like(h1), h1)
    # w2 = F.where(w2 < 0., F.zeros_like(w2), w2)
    # h2 = F.where(h2 < 0., F.zeros_like(h2), h2)
    union = (w1 * h1 + 1e-16) + w2 * h2 - inter

    iou = inter / union  # iou
    return iou
Example #30
0
def split(x):
    ''' Split ndarray on channel dimension '''
    ch = x.shape[1]

    # If channel dimension uneven, no splitting function available in mxnet
    if ch % 2 == 1:
        ch1 = (ch // 2)  # if uneven, split_a has one dim more
        split_a = x[:, :ch1, ...]
        split_b = x[:, ch1:, ...]

    else:
        split_a, split_b = nd.split(x, axis=1, num_outputs=2)

    return split_a, split_b