def __getitem__(self, index): if self.is_train: imagelist = [] batch, labels = self.sampled_batch_data() for file in batch: file_path = os.path.join(self.root, file) image = imread(file_path, to_rgb=True, flag=1) if image.shape[2] == 1: print("has gray file", file) image = nd.tile(image, (1, 1, 3)) box = self.boxes.get(file, [0, 0, 256, 256]) image = image[box[1]:box[3], box[0]:box[2]] # crop image in width and height image = self._transform(image) imagelist.append(image) return nd.stack(*imagelist, axis=0), nd.array(labels) else: path, class_id = self.test_images2id[index] box = self.boxes.get(path, [0, 0, 256, 256]) # fetch path,id and box file_path = os.path.join(self.root, path) image = imread(file_path, to_rgb=True, flag=1) if image.shape[2] == 1: image = nd.tile(image, (1, 1, 3)) image = image[box[1]:box[3], box[0]:box[2]] # crop test image image = self._transform(image) return image, class_id
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1, ctx=None): # First figure out what the size of the output should be N, C, H, W = x_shape assert (H + 2 * padding - field_height) % stride == 0 assert (W + 2 * padding - field_height) % stride == 0 out_height = int((H + 2 * padding - field_height) / stride + 1) out_width = int((W + 2 * padding - field_width) / stride + 1) i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width) i0 = nd.tile(i0, C) i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width) j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C) j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height) i = i0.reshape((-1, 1)) + i1.reshape((1, -1)) j = j0.reshape((-1, 1)) + j1.reshape((1, -1)) k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape( (-1, 1)) return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
def test_radial_basis_function_kernel( x1, x2, amplitude, length_scale, exact ) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) rbf = RBFKernel(amplitude, length_scale) exact = amplitude * nd.exp(-0.5 * exact / length_scale ** 2) res = rbf.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def forward(self, query, key, value, mask=None): if mask is not None: if mask.shape[1] == 1: #encoding otherwise decoder #mask = nd.expand_dims(nd.squeeze(mask),-1) ##!!!!!!!! mask = nd.tile(mask, reps=(1, query.shape[1], 1)) bs = query.shape[0] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #1) run linear transform from d_model to d_model #2) reshape and transpose to split input h heads query = nd.transpose( nd.reshape(self.linears_0(query), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) key = nd.transpose( nd.reshape(self.linears_1(key), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) value = nd.transpose( nd.reshape(self.linears_2(value), (bs, -1, self.h, self.d_k)), (0, 2, 1, 3)) #x = nd.zeros(value.shape) #for h in range(self.h): # x[:,h,:,:],_ = attention(query[:,h,:,:], key[:,h,:,:], value[:,h,:,:], mask=mask, dropout=self.dropout) query, key, value = nd.reshape( query, (bs * self.h, -1, self.d_k)), nd.reshape( key, (bs * self.h, -1, self.d_k)), nd.reshape( value, (bs * self.h, -1, self.d_k)) mask = nd.tile(mask, reps=(self.h, 1, 1)) x, _ = attention(query, key, value, mask=mask, dropout=self.dropout) x = nd.reshape(x, (bs, self.h, -1, self.d_k)) x = nd.reshape(nd.transpose(x, (0, 2, 1, 3)), (bs, -1, self.h * self.d_k)) return self.linears_3(x)
def _generate_coordinates(self, img): h, w, _ = img.shape fh = int(np.ceil(np.ceil(np.ceil(h / 2) / 2) / 2)) fw = int(np.ceil(np.ceil(np.ceil(w / 2) / 2) / 2)) stride = self._base_stride # fm_list = [] for i in range(self._retina_stages): fm_list.append((fh, fw)) fh = int(np.ceil(fh / 2)) fw = int(np.ceil(fw / 2)) fm_list = fm_list[::-1] # cor_targets = [] for i in range(self._retina_stages): fh, fw = fm_list[i] cx = nd.arange(0, fw).reshape((1, -1)) cy = nd.arange(0, fh).reshape((-1, 1)) sx = nd.tile(cx, reps=(fh, 1)) sy = nd.tile(cy, reps=(1, fw)) syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose() by = syx[:, 0] * stride bx = syx[:, 1] * stride cor_targets.append(nd.stack(bx, by, axis=1)) stride = int(stride / 2) cor_targets = nd.concat(*cor_targets, dim=0) return cor_targets
def test_periodic_kernel(x1, x2, amplitude, length_scale, exact) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) frequency = 1 / 24 * nd.ones_like(length_scale) periodic = PeriodicKernel(amplitude, length_scale, frequency) exact = amplitude * nd.exp( -2 * nd.sin(frequency * math.pi * nd.sqrt(exact))**2 / length_scale**2) res = periodic.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def _calculate_trilinear_similarity(self, context, query, context_max_len, query_max_len, w4mlu, bias): """Implement the computation of trilinear similarity function. refer https://github.com/NLPLearn/QANet/blob/master/layers.py#L505 The similarity function is: f(w, q) = W[w, q, w * q] where w and q represent the word in context and query respectively, and * operator means hadamard product. Parameters ----------- context : NDArray input tensor with shape `(batch_size, context_sequence_length, hidden_size)` query : NDArray input tensor with shape `(batch_size, query_sequence_length, hidden_size)` context_max_len : int context_max_len : int Returns -------- similarity_mat : NDArray output tensor with shape `(batch_size, context_sequence_length, query_sequence_length)` """ subres0 = nd.tile(self.w4c(context), [1, 1, query_max_len]) subres1 = nd.tile(nd.transpose(self.w4q(query), axes=(0, 2, 1)), [1, context_max_len, 1]) subres2 = nd.batch_dot(w4mlu * context, nd.transpose(query, axes=(0, 2, 1))) similarity_mat = subres0 + subres1 + subres2 + bias return similarity_mat
def offset(self, kernel_size): kernel_h, kernel_w = kernel_size dilation_h, dilation_w = self.dilation offset_h = (nd.arange(kernel_h)) * dilation_h # - row offset_h = nd.tile(offset_h, (kernel_w, 1)).T.reshape(-1) offset_w = (nd.arange(kernel_w)) * dilation_w # - col offset_w = nd.tile(offset_w, (kernel_h, 1)).reshape(-1) return offset_h, offset_w
def transform_center(xy): """Given x, y prediction after sigmoid(), convert to relative coordinates (0, 1) on image.""" b, h, w, n, s = xy.shape offset_y = nd.tile(nd.arange(0, h, repeat=(w * n * 1), ctx=xy.context).reshape((1, h, w, n, 1)), (b, 1, 1, 1, 1)) # print(offset_y[0].asnumpy()[:, :, 0, 0]) offset_x = nd.tile(nd.arange(0, w, repeat=(n * 1), ctx=xy.context).reshape((1, 1, w, n, 1)), (b, h, 1, 1, 1)) # print(offset_x[0].asnumpy()[:, :, 0, 0]) x, y = xy.split(num_outputs=2, axis=-1) x = (x + offset_x) / w y = (y + offset_y) / h return x, y
def _cross_element_wise_mp(p, h): plen = p.shape[1] plen = h.shape[1] # order is important p_expand = nd.tile( nd.expand_dims(p, 2), [1, 1, plen, 1]) # (batch_size, seq_len, seq_len, embed_dim) h_expand = nd.tile(nd.expand_dims(p, 1), [1, hlen, 1, 1]) # (32, 40, 40, 300) out = p_expand * h_expand if interact_dropout != 1: out = nn.Dropout(keep_rate)(out) return out
def verify_loaded_model(net, ctx): def transform(data, label): return data.astype(np.float32) / 255, label.astype(np.float32) # Load ten random images from the test dataset. sample_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST( train=False, transform=transform), 10, shuffle=True) for data, label in sample_data: # Display the images. img = nd.transpose(data, (1, 0, 2, 3)) img = nd.reshape(img, (28, 10 * 28, 1)) imtiles = nd.tile(img, (1, 1, 3)) plt.imshow(imtiles.asnumpy()) plt.show() # Display the predictions. data = nd.transpose(data, (0, 3, 1, 2)) out = net(data.as_in_context(ctx)) predictions = nd.argmax(out, axis=1) print('Model predictions:', predictions.asnumpy()) print('Ground truth: ', label.asnumpy()) break
def transform(data, target_wd, target_ht, is_train, box): """Crop and normnalize an image nd array.""" if box is not None: x, y, w, h = box data = data[y:min(y + h, data.shape[0]), x:min(x + w, data.shape[1])] # Resize to target_wd * target_ht. data = mx.image.imresize(data, target_wd, target_ht) # Normalize in the same way as the pre-trained model. data = data.astype(np.float32) / 255.0 data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array( [0.229, 0.224, 0.225]) if is_train: if random.random() < 0.5: data = nd.flip(data, axis=1) data, _ = mx.image.random_crop(data, (224, 224)) else: data, _ = mx.image.center_crop(data, (224, 224)) # Transpose from (target_wd, target_ht, 3) # to (3, target_wd, target_ht). data = nd.transpose(data, (2, 0, 1)) # If image is greyscale, repeat 3 times to get RGB image. if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data.reshape((1, ) + data.shape)
def transform(data): data = mx.image.imresize(data, 64, 64) data = nd.transpose(data, (2,0,1)) data = data.astype(np.float32)/127.5 - 1 if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data.reshape((1,) + data.shape)
def transform(data, target_wd, target_ht, is_train, box): """Crop and normnalize an image nd array.""" if box is not None: x, y, w, h = box data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])] # Resize to target_wd * target_ht. data = mx.image.imresize(data, target_wd, target_ht) # Normalize in the same way as the pre-trained model. data = data.astype(np.float32) / 255.0 data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225]) if is_train: if random.random() < 0.5: data = nd.flip(data, axis=1) data, _ = mx.image.random_crop(data, (224, 224)) else: data, _ = mx.image.center_crop(data, (224, 224)) # Transpose from (target_wd, target_ht, 3) # to (3, target_wd, target_ht). data = nd.transpose(data, (2, 0, 1)) # If image is greyscale, repeat 3 times to get RGB image. if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data.reshape((1,) + data.shape)
def __call__(self, img): """ Args: img (Tensor): Tensor image of size (C, H, W). Returns: Tensor: Image with n_holes of dimension length x length cut out of it. """ assert ( img.shape[0] == 3 ), "Input to before cutout should be C x H x W., given: {}".format( img.shape) h = img.shape[1] w = img.shape[2] mask = np.ones((h, w), np.uint8) # np.float32 for n in range(self.n_holes): y = np.random.randint(h) x = np.random.randint(w) y1 = np.clip(y - self.length // 2, 0, h) y2 = np.clip(y + self.length // 2, 0, h) x1 = np.clip(x - self.length // 2, 0, w) x2 = np.clip(x + self.length // 2, 0, w) mask[y1:y2, x1:x2] = 0 mask = nd.tile(nd.array(mask), (3, 1, 1)) # .transpose((1,2,0)) #再次用到tail函数 # 为什么这时候mask的类型是float32? return img.astype('float32') * mask
def forward(self, x, x_mask=None): N, T, D = tuple(x.shape) # bs, sl, vec bs, sl, vec = tuple(x.shape) direct_mask = get_direct_mask(bs, sl, self.direction) #x_mask_tile = x_mask.expand_dims(1) #mask = np.logical_and(direct_mask, x_mask_tile).astype(float) mask = direct_mask.astype('float32') x_map = self.linear1(x) # bs, sl, vec #x_map_tile = x_map.expand_dims(1) # x_map_tile = nd.tile(x_map.expand_dims(1), (1, sl, 1, 1)) # bs, sl, sl, vec x_map_drop = self.dropout(x_map) dependent = self.linear2(x_map_drop) dependent_etd = dependent.expand_dims(1) head = self.linear3(x_map_drop) head_etd = head.expand_dims(2) loggits = scaled_tanh(dependent_etd + head_etd + self.f_bias, 5.0) loggits_masked = exp_mask_for_tensor(loggits, mask) attn_score = nd.softmax(loggits_masked, 2) attn_score = mask_for_tensor(attn_score, mask) attn_result = (attn_score * x_map_tile).nansum(2) fusion_gate = nd.sigmoid( self.linear4(x_map) + self.linear5(attn_result) + self.o_bias) output = fusion_gate * x_map + (1 - fusion_gate) * attn_result return output
def sample_neighbours(self, data, query_network): num_stored_samples = self.key_memory.shape[0] batch_size = data[0].shape[0] query = query_network(*data).as_in_context(mx.cpu()) vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0) vec2 = nd.tile(self.key_memory, reps=(batch_size, 1)) diff = nd.subtract(vec1, vec2) sq = nd.square(diff) batch_sum = nd.sum(sq, exclude=1, axis=0) sqrt = nd.sqrt(batch_sum) dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples)) sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices") num_outputs = len(self.label_memory) sample_labels = [ self.label_memory[i][sample_ind] for i in range(num_outputs) ] sample_batches = [[ self.value_memory[j][sample_ind] for j in range(len(self.value_memory)) ], sample_labels] return sample_batches
def forward(self, *input): if self.mode == 'loss' or self.mode == 'likelihood': X, A, iw_ids, last_append_mask, \ NX, NX_rep, action_0, actions, log_p, \ batch_size, iw_size, \ graph_to_rnn, rnn_to_graph, NX_cum, \ c, ids = input init = nd.tile(fn.unsqueeze(self._policy_0(c), axis=1), [1, iw_size, 1]) append, connect, end = self._policy(X, A, NX, NX_rep, last_append_mask, graph_to_rnn, rnn_to_graph, NX_cum, c, ids) l = self._likelihood(init, append, connect, end, action_0, actions, iw_ids, log_p, batch_size, iw_size) if self.mode == 'likelihood': return l else: return -l.mean() elif self.mode == 'decode_0': return self._policy_0(*input) elif self.mode == 'decode_step': X, A, NX, NX_rep, last_append_mask, NX_cum, h, c, ids = input return self._decode_step(X, A, NX, NX_rep, last_append_mask, NX_cum, h, c, ids) else: raise ValueError
def __getitem__(self, index): """ get the batch //batch_k for train and single for test """ if self.is_train: image_names, labels = self.sample_train_batch() # get sampled order image_file names and corresponding label image_list, label_list = [], [] for img, label in zip(image_names, labels): image = imread(img, flag=1, to_rgb=True) x, y, w, h = self.boxes[img] image = image[y:min(y + h, image.shape[0]), x:min(x + w, image.shape[1])] if image.shape[2] == 1: print("has gray file", img) image = nd.tile(image, (1, 1, 3)) image = self._transform(image) # for rgb same value image_list.append(image) label_list.append(label) batch_data = nd.stack(*image_list, axis=0) batch_label = nd.array(label_list) return batch_data, batch_label else: img = self.test_images_files[index] # get the file name full path image = imread(img, flag=1, to_rgb=1) x, y, w, h = self.boxes[img] image = image[y:min(y + h, image.shape[0]), x:min(x + w, image.shape[1])] image = self._transform(image) return image, self.test_labels[index]
def transform_center(xy): b, h, w, n, s = xy.shape # tile: repeat thw whole array multiple times offset_y = nd.tile( nd.arange(0, h, repeat=(w * n * 1), ctx=xy.context).reshape( (1, h, w, n, 1)), (b, 1, 1, 1, 1)) # repeat b times along the batch axis offset_x = nd.tile( nd.arange(0, w, repeat=(n * 1), ctx=xy.context).reshape( (1, 1, w, n, 1)), (b, h, 1, 1, 1) ) # repeat b times along the batch channel, and n times along axis=1 # split: num_outputs is the number of splits x, y = xy.split(num_outputs=2, axis=-1) x = (x + offset_x) / w y = (y + offset_y) / h return x, y
def transform_size(wh, anchors): """Given w, h prediction after exp() and anchor sizes, convert to relative width/height (0, 1) on image""" b, h, w, n, s = wh.shape aw, ah = nd.tile(nd.array(anchors, ctx=wh.context).reshape((1, 1, 1, -1, 2)), (b, h, w, 1, 1)).split(num_outputs=2, axis=-1) w_pred, h_pred = nd.exp(wh).split(num_outputs=2, axis=-1) w_out = w_pred * aw / w h_out = h_pred * ah / h return w_out, h_out
def transform(img, dims): data = mx.image.imread(img) data = mx.image.imresize(data, dims, dims) data = nd.transpose(data, (2, 0, 1)) # normalize to [-1, 1] data = data.astype(np.float32) / 127.5 - 1 # if image is greyscale, repeat 3 times to get RGB image. if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data.reshape((1, ) + data.shape)
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1, ctx=None): # First figure out what the size of the output should be N, C, H, W = x_shape assert (H + 2 * padding - field_height) % stride == 0 assert (W + 2 * padding - field_height) % stride == 0 out_height = int((H + 2 * padding - field_height) / stride + 1) out_width = int((W + 2 * padding - field_width) / stride + 1) i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width) i0 = nd.tile(i0, C) i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width) j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C) j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height) i = i0.reshape((-1, 1)) + i1.reshape((1, -1)) j = j0.reshape((-1, 1)) + j1.reshape((1, -1)) k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape((-1, 1)) return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
def transform(data): data = mx.image.imresize(data, 64, 64) # state size: (64, 64, 3) data = nd.transpose(data, (2, 0, 1)) data = data.astype(np.float32) / 127.5 - 1 # normalize to [-1, 1] if data.shape[0] == 1: data = nd.tile( data, (3, 1, 1)) # if image is greyscale, repeat 3 times to get RGB image return data.reshape((1, ) + data.shape)
def getDefaultBoxes(fmap, s=None, r=None, offset=None, norm=None, clip=False, srmode='few', omode='flatten'): assert omode in ('flatten', 'stack') assert srmode in ('few', 'many') n, c, fh, fw = fmap.shape if s is None: scales = nd.array([1.]) else: scales = nd.array(s) if r is None: ratios = nd.array([1.]) else: ratios = nd.array(r) width, height = getwh(scales, ratios, fw, fh, srmode) nbox_per_pixel = width.size xcenter = nd.repeat(nd.arange(fw).reshape((1,-1)), fh, axis=0) ycenter = nd.repeat(nd.arange(fh).reshape((-1,1)), fw, axis=1) xycenters = nd.stack(xcenter, ycenter, axis=2) xycenters = nd.tile(xycenters, [1, 1, nbox_per_pixel*2]) lu_rd_offset = nd.stack(width*-0.5, height*-0.5, width*0.5, height*0.5, axis=1) lu_rd_offset = lu_rd_offset.reshape((-1,)) lu_rd_points = (xycenters + lu_rd_offset).reshape((fh, fw, nbox_per_pixel, 2, 2)) if offset is None: offset = nd.array([0.5, 0.5]) else: offset = nd.array(offset) assert offset.size <= 2 if norm is None: norm = nd.array([fw, fh]) else: norm = nd.array(norm) assert norm.size <= 2 lu_rd_points = (lu_rd_points + offset) / norm if clip: nd.clip(lu_rd_points, a_min=0., a_max=1., out=lu_rd_points) if omode == 'flatten': lu_rd_points = lu_rd_points.reshape((1, -1, 4)) else: lu_rd_points = lu_rd_points.reshape((1, fh, fw, nbox_per_pixel, 4)) return lu_rd_points
def transform_size(wh, anchors): b, h, w, n, s = wh.shape aw, ah = nd.tile( nd.array(anchors, ctx=wh.context).reshape((1, 1, 1, -1, 2)), (b, h, w, 1, 1)).split(num_outputs=2, axis=-1) w_pred, h_pred = nd.exp(wh).split(num_outputs=2, axis=-1) w_out = w_pred * aw / w h_out = h_pred * ah / h return w_out, h_out
def random_expand(src, max_ratio=4, fill=0, keep_ratio=True): """Random expand original image with borders, this is identical to placing the original image on a larger canvas. Modified for video from gluoncv default image transform Parameters ---------- src : mxnet.nd.NDArray The original image with KHWC format. max_ratio : int or float Maximum ratio of the output image on both direction(vertical and horizontal) fill : int or float or array-like The value(s) for padded borders. If `fill` is numerical type, RGB channels will be padded with single value. Otherwise `fill` must have same length as image channels, which resulted in padding with per-channel values. keep_ratio : bool If `True`, will keep output image the same aspect ratio as input. Returns ------- mxnet.nd.NDArray Augmented image. tuple Tuple of (offset_x, offset_y, new_width, new_height) """ if max_ratio <= 1: return src, (0, 0, src.shape[1], src.shape[0]) k, h, w, c = src.shape ratio_x = random.uniform(1, max_ratio) if keep_ratio: ratio_y = ratio_x else: ratio_y = random.uniform(1, max_ratio) oh, ow = int(h * ratio_y), int(w * ratio_x) off_y = random.randint(0, oh - h) off_x = random.randint(0, ow - w) # make canvas if isinstance(fill, numeric_types): dst = nd.full(shape=(k, oh, ow, c), val=fill, dtype=src.dtype) else: fill = nd.array(fill, dtype=src.dtype, ctx=src.context) if not c == fill.size: raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size)) dst = nd.tile(fill.reshape((1, c)), reps=(k * oh * ow, 1)).reshape((k, oh, ow, c)) dst[:, off_y:off_y+h, off_x:off_x+w, :] = src return dst, (off_x, off_y, ow, oh)
def curvature_based_sample(nn_pts, k): curvature = compute_curvature(nn_pts) point_indices = nd.topk(curvature, axis=-1, k=k, ret_typ='indices') pts_shape = nn_pts.shape batch_size = pts_shape[0] batch_indices = nd.tile(nd.reshape(nd.arange(batch_size), (-1, 1, 1)), (1, k, 1)) indices = nd.concat(batch_indices, nd.expand_dims(point_indices, axis=2), dim=2) return indices
def transform(data, target_wd=64, target_ht=64): # resize to target_wd * target_ht data = mx.image.imresize(data, target_wd, target_ht) # transpose from (target_wd, target_ht, 3) # to (3, target_wd, target_ht) data = nd.transpose(data, (2, 0, 1)) # normalize to [-1, 1] data = data.astype(np.float32) / 127.5 - 1 # if image is greyscale, repeat 3 times to get RGB image. if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data
def get_max_pred(batch_heatmaps): batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] width = batch_heatmaps.shape[3] heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) idx = nd.argmax(heatmaps_reshaped, 2) maxvals = nd.max(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) idx = idx.reshape((batch_size, num_joints, 1)) preds = nd.tile(idx, (1, 1, 2)).astype(np.float32) preds[:, :, 0] = (preds[:, :, 0]) % width preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width) pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2)) pred_mask = pred_mask.astype(np.float32) preds *= pred_mask return preds, maxvals
def crop(self, bboxes, h, w, masks): scale = 4 b = bboxes.shape[0] ctx = bboxes.context with autograd.pause(): _h = nd.arange(h, ctx=ctx) _w = nd.arange(w, ctx=ctx) _h = nd.tile(_h, reps=(b, 1)) _w = nd.tile(_w, reps=(b, 1)) x1, y1 = nd.round(bboxes[:, 0] / scale), nd.round(bboxes[:, 1] / scale) x2, y2 = nd.round((bboxes[:, 2]) / scale), nd.round( (bboxes[:, 3]) / scale) _w = (_w >= x1.expand_dims(axis=-1)) * (_w <= x2.expand_dims(axis=-1)) _h = (_h >= y1.expand_dims(axis=-1)) * (_h <= y2.expand_dims(axis=-1)) _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True) masks = _mask * masks return masks
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None): xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1) bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4)) dx, dy, dw, dh = nd.split(data=bbox_delta_reshape, num_outputs=4, axis=2, squeeze_axis=1) if (means is not None) and (stds is not None): dx = dx * stds[0] + means[0] dy = dy * stds[1] + means[1] dw = dw * stds[2] + means[2] dh = dh * stds[3] + means[3] refine_center_x = nd.broadcast_add(lhs=center_x, rhs=nd.broadcast_mul(lhs=bbox_width, rhs=dx)) refine_center_y = nd.broadcast_add(lhs=center_y, rhs=nd.broadcast_mul(lhs=bbox_height, rhs=dy)) refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw)) refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh)) w_offset = 0.5 * (refined_width - 1.) h_offset = 0.5 * (refined_height - 1.) refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1) refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1) refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1) refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1) refined_bbox = nd.concat(refined_xmin, refined_ymin, refined_xmax, refined_ymax, dim=1) if im_info is not None: # assume im_info [[height, width, scale]] with shape (1,3) im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2) im_wh = nd.reverse(im_hw, axis=1) im_wh = im_wh - 1. im_wh = nd.tile(data=im_wh, reps=(1, 2)) im_wh = nd.Reshape(im_wh, shape=(1, 4, 1)) refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh) refined_bbox = nd.broadcast_maximum(lhs=refined_bbox, rhs=nd.zeros_like(refined_bbox)) # print refined_bbox.debug_str() return refined_bbox
def random_expand(src, max_ratio=4, fill=0, keep_ratio=True): """Random expand original image with borders, this is identical to placing the original image on a larger canvas. Parameters ---------- src : mxnet.nd.NDArray The original image with HWC format. max_ratio : int or float Maximum ratio of the output image on both direction(vertical and horizontal) fill : int or float or array-like The value(s) for padded borders. If `fill` is numerical type, RGB channels will be padded with single value. Otherwise `fill` must have same length as image channels, which resulted in padding with per-channel values. keep_ratio : bool If `True`, will keep output image the same aspect ratio as input. Returns ------- mxnet.nd.NDArray Augmented image. tuple Tuple of (offset_x, offset_y, new_width, new_height) """ if max_ratio <= 1: return src, (0, 0, src.shape[1], src.shape[0]) h, w, c = src.shape ratio_x = random.uniform(1, max_ratio) if keep_ratio: ratio_y = ratio_x else: ratio_y = random.uniform(1, max_ratio) oh, ow = int(h * ratio_y), int(w * ratio_x) off_y = random.randint(0, oh - h) off_x = random.randint(0, ow - w) # make canvas if isinstance(fill, numeric_types): dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype) else: fill = nd.array(fill, dtype=src.dtype, ctx=src.context) if not c == fill.size: raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size)) dst = nd.tile(fill.reshape((1, c)), reps=(oh * ow, 1)).reshape((oh, ow, c)) dst[off_y:off_y+h, off_x:off_x+w, :] = src return dst, (off_x, off_y, ow, oh)