Beispiel #1
0
    def __getitem__(self, index):
        if self.is_train:
            imagelist = []
            batch, labels = self.sampled_batch_data()
            for file in batch:
                file_path = os.path.join(self.root, file)
                image = imread(file_path, to_rgb=True, flag=1)
                if image.shape[2] == 1:
                    print("has gray file", file)
                    image = nd.tile(image, (1, 1, 3))
                box = self.boxes.get(file, [0, 0, 256, 256])
                image = image[box[1]:box[3],
                              box[0]:box[2]]  # crop image in width and height
                image = self._transform(image)
                imagelist.append(image)
            return nd.stack(*imagelist, axis=0), nd.array(labels)
        else:
            path, class_id = self.test_images2id[index]
            box = self.boxes.get(path,
                                 [0, 0, 256, 256])  # fetch path,id and box
            file_path = os.path.join(self.root, path)
            image = imread(file_path, to_rgb=True, flag=1)
            if image.shape[2] == 1:
                image = nd.tile(image, (1, 1, 3))

            image = image[box[1]:box[3], box[0]:box[2]]  # crop test image
            image = self._transform(image)
            return image, class_id
Beispiel #2
0
def get_im2col_indices(x_shape,
                       field_height,
                       field_width,
                       padding=1,
                       stride=1,
                       ctx=None):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width)
    i0 = nd.tile(i0, C)
    i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width)
    j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C)
    j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height)
    i = i0.reshape((-1, 1)) + i1.reshape((1, -1))
    j = j0.reshape((-1, 1)) + j1.reshape((1, -1))

    k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape(
        (-1, 1))

    return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
Beispiel #3
0
def test_radial_basis_function_kernel(
    x1, x2, amplitude, length_scale, exact
) -> None:
    tol = 1e-5
    batch_size = amplitude.shape[0]
    history_length_1 = x1.shape[0]
    history_length_2 = x2.shape[0]
    num_features = x1.shape[1]
    if batch_size > 1:
        x1 = nd.tile(x1, reps=(batch_size, 1, 1))
        x2 = nd.tile(x2, reps=(batch_size, 1, 1))
        for i in range(1, batch_size):
            x1[i, :, :] = (i + 1) * x1[i, :, :]
            x2[i, :, :] = (i - 3) * x2[i, :, :]
    else:
        x1 = x1.reshape(batch_size, history_length_1, num_features)
        x2 = x2.reshape(batch_size, history_length_2, num_features)
    amplitude = amplitude.reshape(batch_size, 1, 1)
    length_scale = length_scale.reshape(batch_size, 1, 1)
    rbf = RBFKernel(amplitude, length_scale)

    exact = amplitude * nd.exp(-0.5 * exact / length_scale ** 2)

    res = rbf.kernel_matrix(x1, x2)
    assert nd.norm(exact - res) < tol
Beispiel #4
0
    def forward(self, query, key, value, mask=None):
        if mask is not None:
            if mask.shape[1] == 1:  #encoding otherwise decoder
                #mask = nd.expand_dims(nd.squeeze(mask),-1) ##!!!!!!!!
                mask = nd.tile(mask, reps=(1, query.shape[1], 1))
        bs = query.shape[0]

        #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        #1) run linear transform from d_model to d_model
        #2) reshape and transpose to split input h heads
        query = nd.transpose(
            nd.reshape(self.linears_0(query), (bs, -1, self.h, self.d_k)),
            (0, 2, 1, 3))
        key = nd.transpose(
            nd.reshape(self.linears_1(key), (bs, -1, self.h, self.d_k)),
            (0, 2, 1, 3))
        value = nd.transpose(
            nd.reshape(self.linears_2(value), (bs, -1, self.h, self.d_k)),
            (0, 2, 1, 3))

        #x = nd.zeros(value.shape)
        #for h in range(self.h):
        #    x[:,h,:,:],_ = attention(query[:,h,:,:], key[:,h,:,:], value[:,h,:,:], mask=mask, dropout=self.dropout)
        query, key, value = nd.reshape(
            query, (bs * self.h, -1, self.d_k)), nd.reshape(
                key, (bs * self.h, -1, self.d_k)), nd.reshape(
                    value, (bs * self.h, -1, self.d_k))
        mask = nd.tile(mask, reps=(self.h, 1, 1))
        x, _ = attention(query, key, value, mask=mask, dropout=self.dropout)
        x = nd.reshape(x, (bs, self.h, -1, self.d_k))
        x = nd.reshape(nd.transpose(x, (0, 2, 1, 3)),
                       (bs, -1, self.h * self.d_k))
        return self.linears_3(x)
Beispiel #5
0
 def _generate_coordinates(self, img):
     h, w, _ = img.shape
     fh = int(np.ceil(np.ceil(np.ceil(h / 2) / 2) / 2))
     fw = int(np.ceil(np.ceil(np.ceil(w / 2) / 2) / 2))
     stride = self._base_stride
     #
     fm_list = []
     for i in range(self._retina_stages):
         fm_list.append((fh, fw))
         fh = int(np.ceil(fh / 2))
         fw = int(np.ceil(fw / 2))
     fm_list = fm_list[::-1]
     #
     cor_targets = []
     for i in range(self._retina_stages):
         fh, fw = fm_list[i]
         cx = nd.arange(0, fw).reshape((1, -1))
         cy = nd.arange(0, fh).reshape((-1, 1))
         sx = nd.tile(cx, reps=(fh, 1))
         sy = nd.tile(cy, reps=(1, fw))
         syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose()
         by = syx[:, 0] * stride
         bx = syx[:, 1] * stride
         cor_targets.append(nd.stack(bx, by, axis=1))
         stride = int(stride / 2)
     cor_targets = nd.concat(*cor_targets, dim=0)
     return cor_targets
Beispiel #6
0
def test_periodic_kernel(x1, x2, amplitude, length_scale, exact) -> None:
    tol = 1e-5
    batch_size = amplitude.shape[0]
    history_length_1 = x1.shape[0]
    history_length_2 = x2.shape[0]
    num_features = x1.shape[1]
    if batch_size > 1:
        x1 = nd.tile(x1, reps=(batch_size, 1, 1))
        x2 = nd.tile(x2, reps=(batch_size, 1, 1))
        for i in range(1, batch_size):
            x1[i, :, :] = (i + 1) * x1[i, :, :]
            x2[i, :, :] = (i - 3) * x2[i, :, :]
    else:
        x1 = x1.reshape(batch_size, history_length_1, num_features)
        x2 = x2.reshape(batch_size, history_length_2, num_features)
    amplitude = amplitude.reshape(batch_size, 1, 1)
    length_scale = length_scale.reshape(batch_size, 1, 1)
    frequency = 1 / 24 * nd.ones_like(length_scale)
    periodic = PeriodicKernel(amplitude, length_scale, frequency)

    exact = amplitude * nd.exp(
        -2 * nd.sin(frequency * math.pi * nd.sqrt(exact))**2 / length_scale**2)

    res = periodic.kernel_matrix(x1, x2)
    assert nd.norm(exact - res) < tol
Beispiel #7
0
    def _calculate_trilinear_similarity(self, context, query, context_max_len,
                                        query_max_len, w4mlu, bias):
        """Implement the computation of trilinear similarity function.

            refer https://github.com/NLPLearn/QANet/blob/master/layers.py#L505

            The similarity function is:
                    f(w, q) = W[w, q, w * q]
            where w and q represent the word in context and query respectively,
            and * operator means hadamard product.

        Parameters
        -----------
        context : NDArray
            input tensor with shape `(batch_size, context_sequence_length, hidden_size)`
        query : NDArray
            input tensor with shape `(batch_size, query_sequence_length, hidden_size)`
        context_max_len : int
        context_max_len : int

        Returns
        --------
        similarity_mat : NDArray
            output tensor with shape `(batch_size, context_sequence_length, query_sequence_length)`
        """

        subres0 = nd.tile(self.w4c(context), [1, 1, query_max_len])
        subres1 = nd.tile(nd.transpose(self.w4q(query), axes=(0, 2, 1)),
                          [1, context_max_len, 1])
        subres2 = nd.batch_dot(w4mlu * context,
                               nd.transpose(query, axes=(0, 2, 1)))
        similarity_mat = subres0 + subres1 + subres2 + bias
        return similarity_mat
Beispiel #8
0
 def offset(self, kernel_size):
     kernel_h, kernel_w = kernel_size
     dilation_h, dilation_w = self.dilation
     offset_h = (nd.arange(kernel_h)) * dilation_h  # - row
     offset_h = nd.tile(offset_h, (kernel_w, 1)).T.reshape(-1)
     offset_w = (nd.arange(kernel_w)) * dilation_w  # - col
     offset_w = nd.tile(offset_w, (kernel_h, 1)).reshape(-1)
     return offset_h, offset_w
Beispiel #9
0
def transform_center(xy):
    """Given x, y prediction after sigmoid(), convert to relative coordinates (0, 1) on image."""
    b, h, w, n, s = xy.shape
    offset_y = nd.tile(nd.arange(0, h, repeat=(w * n * 1), ctx=xy.context).reshape((1, h, w, n, 1)), (b, 1, 1, 1, 1))
    # print(offset_y[0].asnumpy()[:, :, 0, 0])
    offset_x = nd.tile(nd.arange(0, w, repeat=(n * 1), ctx=xy.context).reshape((1, 1, w, n, 1)), (b, h, 1, 1, 1))
    # print(offset_x[0].asnumpy()[:, :, 0, 0])
    x, y = xy.split(num_outputs=2, axis=-1)
    x = (x + offset_x) / w
    y = (y + offset_y) / h
    return x, y
Beispiel #10
0
 def _cross_element_wise_mp(p, h):
     plen = p.shape[1]
     plen = h.shape[1]
     # order is important
     p_expand = nd.tile(
         nd.expand_dims(p, 2),
         [1, 1, plen, 1])  # (batch_size, seq_len, seq_len, embed_dim)
     h_expand = nd.tile(nd.expand_dims(p, 1),
                        [1, hlen, 1, 1])  # (32, 40, 40, 300)
     out = p_expand * h_expand
     if interact_dropout != 1:
         out = nn.Dropout(keep_rate)(out)
     return out
Beispiel #11
0
def verify_loaded_model(net, ctx):
    def transform(data, label):
        return data.astype(np.float32) / 255, label.astype(np.float32)

    # Load ten random images from the test dataset.
    sample_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(
        train=False, transform=transform),
                                           10,
                                           shuffle=True)

    for data, label in sample_data:
        # Display the images.
        img = nd.transpose(data, (1, 0, 2, 3))
        img = nd.reshape(img, (28, 10 * 28, 1))
        imtiles = nd.tile(img, (1, 1, 3))
        plt.imshow(imtiles.asnumpy())
        plt.show()

        # Display the predictions.
        data = nd.transpose(data, (0, 3, 1, 2))
        out = net(data.as_in_context(ctx))
        predictions = nd.argmax(out, axis=1)
        print('Model predictions:', predictions.asnumpy())
        print('Ground truth:     ', label.asnumpy())

        break
Beispiel #12
0
def transform(data, target_wd, target_ht, is_train, box):
    """Crop and normnalize an image nd array."""
    if box is not None:
        x, y, w, h = box
        data = data[y:min(y + h, data.shape[0]), x:min(x + w, data.shape[1])]

    # Resize to target_wd * target_ht.
    data = mx.image.imresize(data, target_wd, target_ht)

    # Normalize in the same way as the pre-trained model.
    data = data.astype(np.float32) / 255.0
    data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array(
        [0.229, 0.224, 0.225])

    if is_train:
        if random.random() < 0.5:
            data = nd.flip(data, axis=1)
        data, _ = mx.image.random_crop(data, (224, 224))
    else:
        data, _ = mx.image.center_crop(data, (224, 224))

    # Transpose from (target_wd, target_ht, 3)
    # to (3, target_wd, target_ht).
    data = nd.transpose(data, (2, 0, 1))

    # If image is greyscale, repeat 3 times to get RGB image.
    if data.shape[0] == 1:
        data = nd.tile(data, (3, 1, 1))
    return data.reshape((1, ) + data.shape)
def transform(data):
    data = mx.image.imresize(data, 64, 64)
    data = nd.transpose(data, (2,0,1))
    data = data.astype(np.float32)/127.5 - 1
    if data.shape[0] == 1:
        data = nd.tile(data, (3, 1, 1))
    return data.reshape((1,) + data.shape)
Beispiel #14
0
def transform(data, target_wd, target_ht, is_train, box):
    """Crop and normnalize an image nd array."""
    if box is not None:
        x, y, w, h = box
        data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]

    # Resize to target_wd * target_ht.
    data = mx.image.imresize(data, target_wd, target_ht)

    # Normalize in the same way as the pre-trained model.
    data = data.astype(np.float32) / 255.0
    data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])

    if is_train:
        if random.random() < 0.5:
            data = nd.flip(data, axis=1)
        data, _ = mx.image.random_crop(data, (224, 224))
    else:
        data, _ = mx.image.center_crop(data, (224, 224))

    # Transpose from (target_wd, target_ht, 3)
    # to (3, target_wd, target_ht).
    data = nd.transpose(data, (2, 0, 1))

    # If image is greyscale, repeat 3 times to get RGB image.
    if data.shape[0] == 1:
        data = nd.tile(data, (3, 1, 1))
    return data.reshape((1,) + data.shape)
Beispiel #15
0
    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        assert (
            img.shape[0] == 3
        ), "Input to before cutout should be C x H x W., given: {}".format(
            img.shape)
        h = img.shape[1]
        w = img.shape[2]

        mask = np.ones((h, w), np.uint8)  # np.float32

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1:y2, x1:x2] = 0

        mask = nd.tile(nd.array(mask),
                       (3, 1, 1))  # .transpose((1,2,0))  #再次用到tail函数
        # 为什么这时候mask的类型是float32?
        return img.astype('float32') * mask
Beispiel #16
0
    def forward(self, x, x_mask=None):
        N, T, D = tuple(x.shape)  # bs, sl, vec
        bs, sl, vec = tuple(x.shape)
        direct_mask = get_direct_mask(bs, sl, self.direction)
        #x_mask_tile = x_mask.expand_dims(1)
        #mask = np.logical_and(direct_mask, x_mask_tile).astype(float)
        mask = direct_mask.astype('float32')
        x_map = self.linear1(x)  # bs, sl, vec
        #x_map_tile = x_map.expand_dims(1) #
        x_map_tile = nd.tile(x_map.expand_dims(1),
                             (1, sl, 1, 1))  # bs, sl, sl, vec
        x_map_drop = self.dropout(x_map)

        dependent = self.linear2(x_map_drop)
        dependent_etd = dependent.expand_dims(1)
        head = self.linear3(x_map_drop)
        head_etd = head.expand_dims(2)
        loggits = scaled_tanh(dependent_etd + head_etd + self.f_bias, 5.0)

        loggits_masked = exp_mask_for_tensor(loggits, mask)
        attn_score = nd.softmax(loggits_masked, 2)
        attn_score = mask_for_tensor(attn_score, mask)

        attn_result = (attn_score * x_map_tile).nansum(2)
        fusion_gate = nd.sigmoid(
            self.linear4(x_map) + self.linear5(attn_result) + self.o_bias)
        output = fusion_gate * x_map + (1 - fusion_gate) * attn_result
        return output
Beispiel #17
0
    def sample_neighbours(self, data, query_network):
        num_stored_samples = self.key_memory.shape[0]
        batch_size = data[0].shape[0]

        query = query_network(*data).as_in_context(mx.cpu())

        vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0)
        vec2 = nd.tile(self.key_memory, reps=(batch_size, 1))
        diff = nd.subtract(vec1, vec2)
        sq = nd.square(diff)
        batch_sum = nd.sum(sq, exclude=1, axis=0)
        sqrt = nd.sqrt(batch_sum)

        dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples))

        sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices")
        num_outputs = len(self.label_memory)

        sample_labels = [
            self.label_memory[i][sample_ind] for i in range(num_outputs)
        ]
        sample_batches = [[
            self.value_memory[j][sample_ind]
            for j in range(len(self.value_memory))
        ], sample_labels]

        return sample_batches
Beispiel #18
0
    def forward(self, *input):
        if self.mode == 'loss' or self.mode == 'likelihood':
            X, A, iw_ids, last_append_mask, \
            NX, NX_rep, action_0, actions, log_p, \
            batch_size, iw_size, \
            graph_to_rnn, rnn_to_graph, NX_cum, \
            c, ids = input

            init = nd.tile(fn.unsqueeze(self._policy_0(c), axis=1),
                           [1, iw_size, 1])
            append, connect, end = self._policy(X, A, NX, NX_rep,
                                                last_append_mask, graph_to_rnn,
                                                rnn_to_graph, NX_cum, c, ids)
            l = self._likelihood(init, append, connect, end, action_0, actions,
                                 iw_ids, log_p, batch_size, iw_size)
            if self.mode == 'likelihood':
                return l
            else:
                return -l.mean()
        elif self.mode == 'decode_0':
            return self._policy_0(*input)
        elif self.mode == 'decode_step':
            X, A, NX, NX_rep, last_append_mask, NX_cum, h, c, ids = input
            return self._decode_step(X, A, NX, NX_rep, last_append_mask,
                                     NX_cum, h, c, ids)
        else:
            raise ValueError
Beispiel #19
0
    def __getitem__(self, index):
        """
        get the batch //batch_k for train and single for test
        """
        if self.is_train:
            image_names, labels = self.sample_train_batch()
            # get sampled order image_file names and corresponding label
            image_list, label_list = [], []
            for img, label in zip(image_names, labels):
                image = imread(img, flag=1, to_rgb=True)
                x, y, w, h = self.boxes[img]
                image = image[y:min(y + h, image.shape[0]),
                              x:min(x + w, image.shape[1])]
                if image.shape[2] == 1:
                    print("has gray file", img)
                    image = nd.tile(image, (1, 1, 3))
                image = self._transform(image)  # for rgb same value
                image_list.append(image)
                label_list.append(label)
            batch_data = nd.stack(*image_list, axis=0)
            batch_label = nd.array(label_list)
            return batch_data, batch_label
        else:
            img = self.test_images_files[index]  # get the file name full path
            image = imread(img, flag=1, to_rgb=1)
            x, y, w, h = self.boxes[img]
            image = image[y:min(y + h, image.shape[0]),
                          x:min(x + w, image.shape[1])]
            image = self._transform(image)

            return image, self.test_labels[index]
Beispiel #20
0
def transform_center(xy):
    b, h, w, n, s = xy.shape
    # tile: repeat thw whole array multiple times
    offset_y = nd.tile(
        nd.arange(0, h, repeat=(w * n * 1), ctx=xy.context).reshape(
            (1, h, w, n, 1)),
        (b, 1, 1, 1, 1))  # repeat b times along the batch axis
    offset_x = nd.tile(
        nd.arange(0, w, repeat=(n * 1), ctx=xy.context).reshape(
            (1, 1, w, n, 1)), (b, h, 1, 1, 1)
    )  # repeat b times  along the batch channel, and n times along axis=1

    # split: num_outputs is the number of splits
    x, y = xy.split(num_outputs=2, axis=-1)
    x = (x + offset_x) / w
    y = (y + offset_y) / h
    return x, y
Beispiel #21
0
def transform_size(wh, anchors):
    """Given w, h prediction after exp() and anchor sizes, convert to relative width/height (0, 1) on image"""
    b, h, w, n, s = wh.shape
    aw, ah = nd.tile(nd.array(anchors, ctx=wh.context).reshape((1, 1, 1, -1, 2)), (b, h, w, 1, 1)).split(num_outputs=2, axis=-1)
    w_pred, h_pred = nd.exp(wh).split(num_outputs=2, axis=-1)
    w_out = w_pred * aw / w
    h_out = h_pred * ah / h
    return w_out, h_out
 def transform(img, dims):
     data = mx.image.imread(img)
     data = mx.image.imresize(data, dims, dims)
     data = nd.transpose(data, (2, 0, 1))
     # normalize to [-1, 1]
     data = data.astype(np.float32) / 127.5 - 1
     # if image is greyscale, repeat 3 times to get RGB image.
     if data.shape[0] == 1:
         data = nd.tile(data, (3, 1, 1))
     return data.reshape((1, ) + data.shape)
Beispiel #23
0
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1, ctx=None):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width)
    i0 = nd.tile(i0, C)
    i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width)
    j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C)
    j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height)
    i = i0.reshape((-1, 1)) + i1.reshape((1, -1))
    j = j0.reshape((-1, 1)) + j1.reshape((1, -1))

    k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape((-1, 1))

    return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
Beispiel #24
0
def transform(data):
    data = mx.image.imresize(data, 64, 64)  # state size: (64, 64, 3)
    data = nd.transpose(data, (2, 0, 1))
    data = data.astype(np.float32) / 127.5 - 1  # normalize to [-1, 1]
    if data.shape[0] == 1:
        data = nd.tile(
            data,
            (3, 1,
             1))  # if image is greyscale, repeat 3 times to get RGB image
    return data.reshape((1, ) + data.shape)
Beispiel #25
0
def getDefaultBoxes(fmap, s=None, r=None, 
                    offset=None, norm=None, clip=False, 
                    srmode='few', omode='flatten'):
    assert omode in ('flatten', 'stack')
    assert srmode in ('few', 'many')
    n, c, fh, fw = fmap.shape
    
    if s is None:
        scales = nd.array([1.])
    else:
        scales = nd.array(s)

    if r is None:
        ratios = nd.array([1.])
    else:
        ratios = nd.array(r)
        
    width, height = getwh(scales, ratios, fw, fh, srmode)
    
    nbox_per_pixel = width.size
    xcenter = nd.repeat(nd.arange(fw).reshape((1,-1)), fh, axis=0)
    ycenter = nd.repeat(nd.arange(fh).reshape((-1,1)), fw, axis=1)
    xycenters = nd.stack(xcenter, ycenter, axis=2)
    xycenters = nd.tile(xycenters, [1, 1, nbox_per_pixel*2])
    

    lu_rd_offset = nd.stack(width*-0.5, height*-0.5, width*0.5, height*0.5, axis=1)

    lu_rd_offset = lu_rd_offset.reshape((-1,))
    
    lu_rd_points = (xycenters + lu_rd_offset).reshape((fh, fw, nbox_per_pixel, 2, 2))
    
    if offset is None:
        offset = nd.array([0.5, 0.5])
    else:
        offset = nd.array(offset)
    assert offset.size <= 2
    
    if norm is None:
        norm = nd.array([fw, fh])
    else:
        norm = nd.array(norm)
    assert norm.size <= 2
    
    lu_rd_points = (lu_rd_points + offset) / norm
    
    if clip:
        nd.clip(lu_rd_points, a_min=0., a_max=1., out=lu_rd_points)
    
    if omode == 'flatten':
        lu_rd_points = lu_rd_points.reshape((1, -1, 4))
    else:
        lu_rd_points = lu_rd_points.reshape((1, fh, fw, nbox_per_pixel, 4))
    
    return lu_rd_points
Beispiel #26
0
def transform_size(wh, anchors):
    b, h, w, n, s = wh.shape
    aw, ah = nd.tile(
        nd.array(anchors, ctx=wh.context).reshape((1, 1, 1, -1, 2)),
        (b, h, w, 1, 1)).split(num_outputs=2, axis=-1)

    w_pred, h_pred = nd.exp(wh).split(num_outputs=2, axis=-1)
    w_out = w_pred * aw / w
    h_out = h_pred * ah / h

    return w_out, h_out
Beispiel #27
0
def random_expand(src, max_ratio=4, fill=0, keep_ratio=True):
    """Random expand original image with borders, this is identical to placing
    the original image on a larger canvas.

    Modified for video from gluoncv default image transform

    Parameters
    ----------
    src : mxnet.nd.NDArray
        The original image with KHWC format.
    max_ratio : int or float
        Maximum ratio of the output image on both direction(vertical and horizontal)
    fill : int or float or array-like
        The value(s) for padded borders. If `fill` is numerical type, RGB channels
        will be padded with single value. Otherwise `fill` must have same length
        as image channels, which resulted in padding with per-channel values.
    keep_ratio : bool
        If `True`, will keep output image the same aspect ratio as input.

    Returns
    -------
    mxnet.nd.NDArray
        Augmented image.
    tuple
        Tuple of (offset_x, offset_y, new_width, new_height)

    """
    if max_ratio <= 1:
        return src, (0, 0, src.shape[1], src.shape[0])

    k, h, w, c = src.shape

    ratio_x = random.uniform(1, max_ratio)
    if keep_ratio:
        ratio_y = ratio_x
    else:
        ratio_y = random.uniform(1, max_ratio)

    oh, ow = int(h * ratio_y), int(w * ratio_x)
    off_y = random.randint(0, oh - h)
    off_x = random.randint(0, ow - w)

    # make canvas
    if isinstance(fill, numeric_types):
        dst = nd.full(shape=(k, oh, ow, c), val=fill, dtype=src.dtype)
    else:
        fill = nd.array(fill, dtype=src.dtype, ctx=src.context)
        if not c == fill.size:
            raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
        dst = nd.tile(fill.reshape((1, c)), reps=(k * oh * ow, 1)).reshape((k, oh, ow, c))

    dst[:, off_y:off_y+h, off_x:off_x+w, :] = src

    return dst, (off_x, off_y, ow, oh)
Beispiel #28
0
def curvature_based_sample(nn_pts, k):
    curvature = compute_curvature(nn_pts)
    point_indices = nd.topk(curvature, axis=-1, k=k, ret_typ='indices')

    pts_shape = nn_pts.shape
    batch_size = pts_shape[0]
    batch_indices = nd.tile(nd.reshape(nd.arange(batch_size), (-1, 1, 1)),
                            (1, k, 1))
    indices = nd.concat(batch_indices,
                        nd.expand_dims(point_indices, axis=2),
                        dim=2)
    return indices
Beispiel #29
0
def transform(data, target_wd=64, target_ht=64):
    # resize to target_wd * target_ht
    data = mx.image.imresize(data, target_wd, target_ht)
    # transpose from (target_wd, target_ht, 3)
    # to (3, target_wd, target_ht)
    data = nd.transpose(data, (2, 0, 1))
    # normalize to [-1, 1]
    data = data.astype(np.float32) / 127.5 - 1
    # if image is greyscale, repeat 3 times to get RGB image.
    if data.shape[0] == 1:
        data = nd.tile(data, (3, 1, 1))
    return data
Beispiel #30
0
def get_max_pred(batch_heatmaps):
    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = nd.argmax(heatmaps_reshaped, 2)
    maxvals = nd.max(heatmaps_reshaped, 2)

    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))

    preds = nd.tile(idx, (1, 1, 2)).astype(np.float32)

    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width)

    pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)

    preds *= pred_mask
    return preds, maxvals
Beispiel #31
0
def get_max_pred(batch_heatmaps):
    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = nd.argmax(heatmaps_reshaped, 2)
    maxvals = nd.max(heatmaps_reshaped, 2)

    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))

    preds = nd.tile(idx, (1, 1, 2)).astype(np.float32)

    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width)

    pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)

    preds *= pred_mask
    return preds, maxvals
Beispiel #32
0
 def crop(self, bboxes, h, w, masks):
     scale = 4
     b = bboxes.shape[0]
     ctx = bboxes.context
     with autograd.pause():
         _h = nd.arange(h, ctx=ctx)
         _w = nd.arange(w, ctx=ctx)
         _h = nd.tile(_h, reps=(b, 1))
         _w = nd.tile(_w, reps=(b, 1))
         x1, y1 = nd.round(bboxes[:, 0] / scale), nd.round(bboxes[:, 1] /
                                                           scale)
         x2, y2 = nd.round((bboxes[:, 2]) / scale), nd.round(
             (bboxes[:, 3]) / scale)
         _w = (_w >= x1.expand_dims(axis=-1)) * (_w <=
                                                 x2.expand_dims(axis=-1))
         _h = (_h >= y1.expand_dims(axis=-1)) * (_h <=
                                                 y2.expand_dims(axis=-1))
         _mask = nd.batch_dot(_h.expand_dims(axis=-1),
                              _w.expand_dims(axis=-1),
                              transpose_b=True)
     masks = _mask * masks
     return masks
Beispiel #33
0
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None):

    xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1)
    bbox_width = xmax - xmin + 1.
    bbox_height = ymax - ymin + 1.
    center_x = 0.5 * (xmin + xmax)
    center_y = 0.5 * (ymin + ymax)

    bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4))
    dx, dy, dw, dh = nd.split(data=bbox_delta_reshape,
                              num_outputs=4,
                              axis=2,
                              squeeze_axis=1)
    if (means is not None) and (stds is not None):
        dx = dx * stds[0] + means[0]
        dy = dy * stds[1] + means[1]
        dw = dw * stds[2] + means[2]
        dh = dh * stds[3] + means[3]

    refine_center_x = nd.broadcast_add(lhs=center_x,
                                       rhs=nd.broadcast_mul(lhs=bbox_width,
                                                            rhs=dx))
    refine_center_y = nd.broadcast_add(lhs=center_y,
                                       rhs=nd.broadcast_mul(lhs=bbox_height,
                                                            rhs=dy))
    refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw))
    refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh))
    w_offset = 0.5 * (refined_width - 1.)
    h_offset = 0.5 * (refined_height - 1.)
    refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1)
    refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1)
    refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1)
    refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1)

    refined_bbox = nd.concat(refined_xmin,
                             refined_ymin,
                             refined_xmax,
                             refined_ymax,
                             dim=1)
    if im_info is not None:
        # assume im_info [[height, width, scale]] with shape (1,3)
        im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2)
        im_wh = nd.reverse(im_hw, axis=1)
        im_wh = im_wh - 1.
        im_wh = nd.tile(data=im_wh, reps=(1, 2))
        im_wh = nd.Reshape(im_wh, shape=(1, 4, 1))
        refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh)
        refined_bbox = nd.broadcast_maximum(lhs=refined_bbox,
                                            rhs=nd.zeros_like(refined_bbox))
    # print refined_bbox.debug_str()
    return refined_bbox
Beispiel #34
0
def random_expand(src, max_ratio=4, fill=0, keep_ratio=True):
    """Random expand original image with borders, this is identical to placing
    the original image on a larger canvas.

    Parameters
    ----------
    src : mxnet.nd.NDArray
        The original image with HWC format.
    max_ratio : int or float
        Maximum ratio of the output image on both direction(vertical and horizontal)
    fill : int or float or array-like
        The value(s) for padded borders. If `fill` is numerical type, RGB channels
        will be padded with single value. Otherwise `fill` must have same length
        as image channels, which resulted in padding with per-channel values.
    keep_ratio : bool
        If `True`, will keep output image the same aspect ratio as input.

    Returns
    -------
    mxnet.nd.NDArray
        Augmented image.
    tuple
        Tuple of (offset_x, offset_y, new_width, new_height)

    """
    if max_ratio <= 1:
        return src, (0, 0, src.shape[1], src.shape[0])

    h, w, c = src.shape
    ratio_x = random.uniform(1, max_ratio)
    if keep_ratio:
        ratio_y = ratio_x
    else:
        ratio_y = random.uniform(1, max_ratio)

    oh, ow = int(h * ratio_y), int(w * ratio_x)
    off_y = random.randint(0, oh - h)
    off_x = random.randint(0, ow - w)

    # make canvas
    if isinstance(fill, numeric_types):
        dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
    else:
        fill = nd.array(fill, dtype=src.dtype, ctx=src.context)
        if not c == fill.size:
            raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
        dst = nd.tile(fill.reshape((1, c)), reps=(oh * ow, 1)).reshape((oh, ow, c))

    dst[off_y:off_y+h, off_x:off_x+w, :] = src
    return dst, (off_x, off_y, ow, oh)