def special_im2col(self, temp_img): # , idx_out N, C, height, width = temp_img.shape offset_h, offset_w = self.kernel_hw shape_oh, shape_ow = offset_h.shape, offset_w.shape offset_h = offset_h.broadcast_to((C,) + shape_oh).asnumpy().astype('int') offset_w = offset_w.broadcast_to((C,) + shape_ow).asnumpy().astype('int') shedule = np.tile(np.arange(self.kernel_size[1] ** 2), (C, 1)) assert isinstance(self.padding, int), 'padding should be a number' pad = (0,) * 4 + (self.padding,) * 4 stride_h, stride_w = map(int, self.strides) height -= height % stride_h width -= width % stride_w data = nd.pad(temp_img, mode="constant", pad_width=pad) data = data.transpose((1, 2, 3, 0)) array_channel = [] for n in range(C): array_kernel = [] for i in shedule[n]: start_h, start_w = offset_h[n, i], offset_w[n, i] end_h, end_w = start_h + height, start_w + width array_kernel.append(data[n, start_h:end_h:stride_h, start_w:end_w:stride_w, :]) array_channel.append(nd.stack(*array_kernel)) sz = array_channel[0].shape pit = nd.stack(*array_channel).reshape((-1,) + sz[1:]) return pit
def im2col(indut_data, filter_h, filter_w, stride=1, pad=0): """ Parameters ---------- indut_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据 filter_h : 滤波器的高 filter_w : 滤波器的长 stride : 步幅 pad : 填充 Returns ------- col : 2维数组 """ N, C, H, W = indut_data.shape out_h = (H + 2*pad - filter_h)//stride + 1 out_w = (W + 2*pad - filter_w)//stride + 1 img = nd.pad(indut_data, mode='constant', pad_width=(0, 0, 0, 0, pad, pad, pad, pad)) img_np = img.asnumpy() col_np = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) for y in range(filter_h): y_max = y + stride*out_h for x in range(filter_w): x_max = x + stride*out_w col_np[:, :, y, x, :, :] = img_np[:, :, y:y_max:stride, x:x_max:stride] col = nd.array(col_np, ctx=ctx) col = col.transpose(axes=(0, 4, 5, 1, 2, 3)).reshape(N*out_h*out_w, -1) return col
def decode(self, targets, encoder_outputs, attention_bias): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = nd.expand_dims(decoder_inputs, axis=0) decoder_inputs = nd.pad(data=decoder_inputs, mode="constant", constant_value=0, pad_width=(0, 0, 0, 0, 1, 0, 0, 0)) decoder_inputs = nd.reshape(data=decoder_inputs, shape=decoder_inputs.shape[1:])[:, :-1, :] length = decoder_inputs.shape[1] decoder_inputs = decoder_inputs + model_utils.get_position_encoding( length, self.param.hidden_size, targets.context) if self.train: decoder_inputs = self.dropout_output(decoder_inputs) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length, targets.context) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias) logits = self.embedding_softmax_layer.linear(outputs) return logits
def validate_batch(self, img1, img2, flow): shape = img1.shape pad_h = (64 - shape[2] % 64) % 64 pad_w = (64 - shape[3] % 64) % 64 if pad_h != 0 or pad_w != 0: img1 = nd.pad(img1, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, 0, pad_h, 0, pad_w)) img2 = nd.pad(img2, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, 0, pad_h, 0, pad_w)) pred = self.network(self.preprocess(img1), self.preprocess(img2)) epe = self.metrics(pred, flow) return epe.asnumpy()
def transform(self, input_data): num_batches = input_data.shape[0] num_samples = input_data.shape[1] self.num_samples = num_samples # similar to librosa, reflect-pad the input input_data = input_data.reshape(num_batches, 1, num_samples).expand_dims(1) input_data = nd.pad(input_data, 'reflect', (0, 0, 0, 0, 0, 0, self.filter_length // 2, self.filter_length // 2)) input_data = input_data.squeeze(axis=1) forward_transform = nd.Convolution( input_data, self.forward_basis, no_bias=True, kernel=self.forward_basis.shape[2], num_filter=self.forward_basis.shape[0], stride=self.hop_length, pad=0) cutoff = int((self.filter_length / 2) + 1) real_part = forward_transform[:, :cutoff, :] imag_part = forward_transform[:, cutoff:, :] magnitude = nd.sqrt(real_part**2 + imag_part**2) phase = nd.array(np.arctan2(imag_part.asnumpy(), real_part.asnumpy())) #phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data)) return magnitude, phase
def test_pad(): x = create_2d_tensor(rows=SMALL_Y-2, columns=LARGE_X//2-2, dtype=np.float32).reshape(1 , 1, SMALL_Y-2, LARGE_X//2-2) y = nd.pad(x, mode="edge", pad_width=(0, 0, 0, 0, 1, 1, 1, 1)) assert y[0][0][1][0] == 0 assert y[0][0][1][-1] == 0 assert y[0][0][-1][0] == SMALL_Y-3 assert y[0][0][-1][-1] == SMALL_Y-3 assert y.shape == (1, 1, SMALL_Y, LARGE_X//2)
def predict_batch_mx(self, img1, img2, flow): ''' Predict a batch of samples range [0,1] with network preprocessing and padding ''' shape = img1.shape pad_h = (64 - shape[2] % 64) % 64 pad_w = (64 - shape[3] % 64) % 64 if pad_h != 0 or pad_w != 0: img1 = nd.pad(img1, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, 0, pad_h, 0, pad_w)) img2 = nd.pad(img2, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, 0, pad_h, 0, pad_w)) rgb_mean = self.rgb_mean.as_in_context(img1.context) pred = self.network(img1 - rgb_mean, img2 - rgb_mean) return pred
def forward(self, x_in): # Pad along height and width and learn the identity function x_out = nd.pad(x_in, mode='constant', pad_width=(0, 0, 0, 0, 1, 1, 1, 1), constant_value=1) x_out = self.conv(x_out) # Not in paper, but in the glow code x_out = x_out * nd.exp( self.log_s.data(x_in.context) * self.log_scale_factor) return x_out
def im2col_indices(x, field_height, field_width, padding, stride): """ An implementation of im2col based on some fancy indexing """ # Zero-pad the input ctx = x.context p = padding x_padded = nd.pad(x, pad_width=(0, 0, 0, 0, p, p, p, p), mode='constant') k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride, ctx=ctx) cols = x_padded[:, k, i, j] C = x.shape[1] cols = cols.transpose((1, 2, 0)).reshape((field_height * field_width * C, -1)) return cols
def augment(data, auglist): data = nd.pad(data, pad_width=(0, 0, 0, 0, 2, 2, 2, 2), mode='constant', constant_value=0) data = nd.transpose(data, (0, 2, 3, 1)) temp = [] for d in data: for aug in auglist: d = aug(d) temp.append(d) data = nd.stack(*temp) data = nd.transpose(data, (0, 3, 1, 2)) return data
def _pad_tensors_to_same_length(x, y): """Pad x and y so that the result have the same length (second dimension)""" x_length = x.shape[1] y_length = y.shape[1] max_length = max(x_length, y_length) x = nd.expand_dims(x, axis=0) x = nd.pad(x, mode="constant", constant_value=0, pad_width=(0, 0, 0, 0, 0, max_length - x_length, 0, 0)) x = nd.squeeze(x, axis=0) y = nd.expand_dims(y, axis=0) y = nd.expand_dims(y, axis=0) y = nd.pad(y, mode="constant", constant_value=0, pad_width=(0, 0, 0, 0, 0, 0, 0, max_length - y_length)) y = nd.squeeze(y, axis=0) y = nd.squeeze(y, axis=0) return x, y
def augment(data): aug_list = image.CreateAugmenter(data_shape=(3, 32, 32), rand_crop=True, rand_mirror=True) data = nd.pad(data, pad_width=(0, 0, 0, 0, 2, 2, 2, 2), mode='constant', constant_value=0) data = nd.transpose(data, (0, 2, 3, 1)) temp = [] for d in data: for aug in aug_list: d = aug(d) temp.append(d) data = nd.stack(*temp) data = nd.transpose(data, (0, 3, 1, 2)) return data
def hybrid_forward(self, F, x1, *args, **kwargs): x2 = args[0] x1 = self.up(x1) # The same as paper # x2 = x2[:, :, :x1.shape[2], : x1.shape[3]] # Fill in x1 shape to be the same as the x2 diffY = x2.shape[2] - x1.shape[2] diffX = x2.shape[3] - x1.shape[3] x1 = nd.pad(x1, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, diffY // 2, diffY - diffY // 2, diffX // 2, diffX - diffX // 2)) x = nd.concat(x1, x2, dim=1) logging.info(x.shape) return self.conv(x)
def im2col_indices(x, field_height, field_width, padding, stride): """ An implementation of im2col based on some fancy indexing """ # Zero-pad the input ctx = x.context p = padding x_padded = nd.pad(x, pad_width=(0, 0, 0, 0, p, p, p, p), mode='constant') k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride, ctx=ctx) cols = x_padded[:, k, i, j] C = x.shape[1] cols = cols.transpose((1, 2, 0)).reshape( (field_height * field_width * C, -1)) return cols
def transform(data, label): data = ((data.astype('float32')/255.) * (bbox[1]-bbox[0]) + bbox[0]).reshape((1, 28, 28)) if pad: data = nd.pad(data.reshape(1,1,28,28), 'constant', constant_value=bbox[0], pad_width=[0,0,0,0, 2,2,2,2])[0] label = label.astype('int32') return (data, label)
from mxnet import nd def view_single(data, id): if data.shape[1] == 3: img = data[id].transpose((1, 2, 0)).asnumpy() else: img = data[id].asnumpy() plt.imshow(img) plt.show() return img def imshow(x): if x.shape[0] == 3: x = x.transpose((1, 2, 0)) if isinstance(x, mx.ndarray.ndarray.NDArray): x = x.asnumpy() if x.dtype == np.float32: x = np.uint8(x) try: plt.imshow(x) except: print(x.shape) plt.show() if __name__ == "__main__": X_padded = nd.pad(X.transpose((0, 3, 1, 2)).astype(np.float32), mode='constant', pad_width=(0, 0, 0, 0, 4, 4, 4, 4))