def test_max_pool_non_zero_pads(): rt = get_runtime() # array([[[[ 0.5, 1.5, 2.5, 3.5], # [ 4.5, 5.5, 6.5, 7.5], # [ 8.5, 9.5, 10.5, 11.5], # [12.5, 13.5, 14.5, 15.5]]]], dtype=float32) data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) strides = [1, 1] dilations = [1, 1] pads_begin = [1, 1] pads_end = [1, 1] # 0 0 , 0 , 0 , 0, 0 # 0 [ 0.5, 1.5, 2.5, 3.5], 0, # 0 [ 4.5, 5.5, 6.5, 7.5], 0, # 0 [ 8.5, 9.5, 10.5, 11.5], 0, # 0 [12.5, 13.5, 14.5, 15.5], 0 # 0 0 , 0 , 0 , 0, 0 kernel_shape = [2, 2] rounding_type = "floor" auto_pad = None index_et = "i32" data_node = ng.parameter(data.shape, name="A", dtype=np.float32) maxpool_node = ng.max_pool( data_node, strides, dilations, pads_begin, pads_end, kernel_shape, rounding_type, auto_pad, index_et, ) comp = rt.computation(maxpool_node, data_node) result = comp(data) expected = np.array( [[[ [0.5, 1.5, 2.5, 3.5, 3.5], [4.5, 5.5, 6.5, 7.5, 7.5], [8.5, 9.5, 10.5, 11.5, 11.5], [12.5, 13.5, 14.5, 15.5, 15.5], [12.5, 13.5, 14.5, 15.5, 15.5], ]]], dtype=np.float32, ) expected_idx = np.array( [[[ [0, 1, 2, 3, 3], [4, 5, 6, 7, 7], [8, 9, 10, 11, 11], [12, 13, 14, 15, 15], [12, 13, 14, 15, 15], ]]], dtype=np.int32, ) assert np.allclose(result[0], expected) assert np.allclose(result[1], expected_idx)
def test_max_pool_same_lower_auto_pads(): rt = get_runtime() # array([[[[ 0.5, 1.5, 2.5, 3.5], # [ 4.5, 5.5, 6.5, 7.5], # [ 8.5, 9.5, 10.5, 11.5], # [12.5, 13.5, 14.5, 15.5]]]], dtype=float32) data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) strides = [1, 1] pads_begin = [0, 0] pads_end = [0, 0] # 0 0 , 0 , 0 , 0, # 0 [ 0.5, 1.5, 2.5, 3.5], # 0 [ 4.5, 5.5, 6.5, 7.5], # 0 [ 8.5, 9.5, 10.5, 11.5], # 0 [12.5, 13.5, 14.5, 15.5], kernel_shape = [2, 2] auto_pad = "same_lower" data_node = ng.parameter(data.shape, name="A", dtype=np.float32) avgpool_node = ng.max_pool(data_node, strides, pads_begin, pads_end, kernel_shape, auto_pad=auto_pad) comp = rt.computation(avgpool_node, data_node) result = comp(data) expected = np.array( [[[[0.5, 1.5, 2.5, 3.5], [4.5, 5.5, 6.5, 7.5], [8.5, 9.5, 10.5, 11.5], [12.5, 13.5, 14.5, 15.5]]]], dtype=np.float32, ) assert np.allclose(result, expected)
def test_max_pool_kernel_shape3x3(): rt = get_runtime() # array([[[[ 0.5, 1.5, 2.5, 3.5], # [ 4.5, 5.5, 6.5, 7.5], # [ 8.5, 9.5, 10.5, 11.5], # [12.5, 13.5, 14.5, 15.5]]]], dtype=float32) data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) strides = [1, 1] dilations = [1, 1] pads_begin = [0, 0] pads_end = [0, 0] kernel_shape = [3, 3] rounding_type = "floor" auto_pad = None index_et = "i32" data_node = ng.parameter(data.shape, name="A", dtype=np.float32) maxpool_node = ng.max_pool( data_node, strides, dilations, pads_begin, pads_end, kernel_shape, rounding_type, auto_pad, index_et, ) comp = rt.computation(maxpool_node, data_node) result = comp(data) expected = np.array([[[[10.5, 11.5], [14.5, 15.5]]]], dtype=np.float32) assert np.allclose(result[0], expected)
def __init__(self, *args, upsample_ratio=1, **kwargs): super().__init__(*args, **kwargs) self.pooled_heatmaps_blob_name = 'pooled_heatmaps' self.heatmaps_blob_name = 'heatmaps' self.pafs_blob_name = 'pafs' function = ng.function_from_cnn(self.net) paf = function.get_output_op(0) paf = paf.inputs()[0].get_source_output().get_node() paf.set_friendly_name(self.pafs_blob_name) heatmap = function.get_output_op(1) heatmap = heatmap.inputs()[0].get_source_output().get_node() heatmap.set_friendly_name(self.heatmaps_blob_name) # Add keypoints NMS to the network. # Heuristic NMS kernel size adjustment depending on the feature maps upsampling ratio. p = int(np.round(6 / 7 * upsample_ratio)) k = 2 * p + 1 pooled_heatmap = ng.max_pool(heatmap, kernel_shape=(k, k), pads_begin=(p, p), pads_end=(p, p), strides=(1, 1), name=self.pooled_heatmaps_blob_name) f = ng.impl.Function([ ng.result(heatmap, name=self.heatmaps_blob_name), ng.result(pooled_heatmap, name=self.pooled_heatmaps_blob_name), ng.result(paf, name=self.pafs_blob_name) ], function.get_parameters(), 'hpe') self.image_blob_name = self._get_inputs(self.net) self.net = IENetwork(ng.impl.Function.to_capsule(f)) self.exec_net = self.ie.load_network( network=self.net, device_name=self.device, num_requests=self.max_num_requests) self.requests = self.exec_net.requests self.empty_requests = deque(self.requests) self.num_joints = self.net.outputs[self.heatmaps_blob_name].shape[ 1] - 1 # The last channel is for background. target_size = self.net.input_info[ self.image_blob_name].input_data.shape[-2] self.output_scale = target_size / self.net.outputs[ self.heatmaps_blob_name].shape[-2] if self.target_size is None: self.target_size = target_size self.decoder = OpenPoseDecoder(num_joints=self.num_joints)
def test_max_pool_kernel_shape1x1(): rt = get_runtime() # array([[[[ 0.5, 1.5, 2.5, 3.5], # [ 4.5, 5.5, 6.5, 7.5], # [ 8.5, 9.5, 10.5, 11.5], # [12.5, 13.5, 14.5, 15.5]]]], dtype=float32) data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) strides = [1, 1] pads_begin = [0, 0] pads_end = [0, 0] kernel_shape = [1, 1] data_node = ng.parameter(data.shape, name="A", dtype=np.float32) maxpool_node = ng.max_pool(data_node, strides, pads_begin, pads_end, kernel_shape) comp = rt.computation(maxpool_node, data_node) result = comp(data) assert np.allclose(result, data)
def __init__(self, ie, model_path, target_size, aspect_ratio, prob_threshold, size_divisor=8, upsample_ratio=1): super().__init__(ie, model_path) self.image_blob_name = self._get_inputs(self.net) self.pooled_heatmaps_blob_name = 'pooled_heatmaps' self.heatmaps_blob_name = 'heatmaps' self.pafs_blob_name = 'pafs' function = ng.function_from_cnn(self.net) paf = function.get_output_op(0) paf = paf.inputs()[0].get_source_output().get_node() paf.set_friendly_name(self.pafs_blob_name) heatmap = function.get_output_op(1) heatmap = heatmap.inputs()[0].get_source_output().get_node() heatmap.set_friendly_name(self.heatmaps_blob_name) # Add keypoints NMS to the network. # Heuristic NMS kernel size adjustment depending on the feature maps upsampling ratio. p = int(np.round(6 / 7 * upsample_ratio)) k = 2 * p + 1 pooled_heatmap = ng.max_pool(heatmap, kernel_shape=(k, k), pads_begin=(p, p), pads_end=(p, p), strides=(1, 1), name=self.pooled_heatmaps_blob_name) f = ng.impl.Function( [ng.result(heatmap, name=self.heatmaps_blob_name), ng.result(pooled_heatmap, name=self.pooled_heatmaps_blob_name), ng.result(paf, name=self.pafs_blob_name)], function.get_parameters(), 'hpe') self.net = IENetwork(ng.impl.Function.to_capsule(f)) self.output_scale = self.net.input_info[self.image_blob_name].input_data.shape[-2] / self.net.outputs[self.heatmaps_blob_name].shape[-2] if target_size is None: target_size = self.net.input_info[self.image_blob_name].input_data.shape[-2] self.h = (target_size + size_divisor - 1) // size_divisor * size_divisor input_width = round(target_size * aspect_ratio) self.w = (input_width + size_divisor - 1) // size_divisor * size_divisor default_input_shape = self.net.input_info[self.image_blob_name].input_data.shape input_shape = {self.image_blob_name: (default_input_shape[:-2] + [self.h, self.w])} self.logger.info('Reshape net to {}'.format(input_shape)) self.net.reshape(input_shape) num_joints = self.net.outputs[self.heatmaps_blob_name].shape[1] - 1 # The last channel is for background self.decoder = OpenPoseDecoder(num_joints, score_threshold=prob_threshold) self.size_divisor = size_divisor
def make_pooling_op(onnx_node, ng_inputs, kernel_shape=None): # type: (NodeWrapper, List[NgraphNode], List[int]) -> NgraphNode """ Create an ngraph pooling Op based on an ONNX node. :param onnx_node: wrapped ONNX node for a pooling op :param ng_inputs: ngraph TensorOp input tensors :param kernel_shape: kernel shape for this op :return: ngraph pooling op """ x = ng_inputs[0] op_type = get_op_type(onnx_node) # We assume data are in [D1,...,DN] format thus we subtract [N,C] dimensions. spatial_dims = len(x.shape) - 2 # get spatial dimensions if kernel_shape is None: kernel_shape = get_kernel_shape(onnx_node) kernel_shape = reduce_extra_dims(spatial_dims, kernel_shape, onnx_node) strides = get_strides(onnx_node, kernel_shape) padding_below, padding_above = get_pads(onnx_node, kernel_shape) strides = reduce_extra_dims(spatial_dims, strides, onnx_node) padding_above = reduce_extra_dims(spatial_dims, padding_above, onnx_node) padding_below = reduce_extra_dims(spatial_dims, padding_below, onnx_node) include_pad = onnx_node.get_attribute_value('count_include_pad', 0) != 0 if op_type == 'avg': ng_op = ng.avg_pool(x, kernel_shape, strides, padding_below, padding_above, include_padding=include_pad) elif op_type == 'max': ng_op = ng.max_pool(x, kernel_shape, strides, padding_below, padding_above) else: raise NotImplementedError('%s node (%s): Unsupported pooling type.', onnx_node.op_type, onnx_node.name) return ng_op
def create_ngraph_function(args) -> Function: weights = np.fromfile(args.model, dtype=np.float32) weights_offset = 0 padding_begin = [0, 0] padding_end = [0, 0] # input input_shape = [64, 1, 28, 28] param_node = ngraph.parameter(input_shape, np.float32, 'Parameter') # convolution 1 conv_1_kernel_shape, conv_1_kernel_length = shape_and_length([20, 1, 5, 5]) conv_1_kernel = ngraph.constant( weights[0:conv_1_kernel_length].reshape(conv_1_kernel_shape)) weights_offset += conv_1_kernel_length conv_1_node = ngraph.convolution(param_node, conv_1_kernel, [1, 1], padding_begin, padding_end, [1, 1]) # add 1 add_1_kernel_shape, add_1_kernel_length = shape_and_length([1, 20, 1, 1]) add_1_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_1_kernel_length].reshape(add_1_kernel_shape)) weights_offset += add_1_kernel_length add_1_node = ngraph.add(conv_1_node, add_1_kernel) # maxpool 1 maxpool_1_node = ngraph.max_pool(add_1_node, [2, 2], padding_begin, padding_end, [2, 2], 'ceil', None) # convolution 2 conv_2_kernel_shape, conv_2_kernel_length = shape_and_length( [50, 20, 5, 5]) conv_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + conv_2_kernel_length].reshape(conv_2_kernel_shape)) weights_offset += conv_2_kernel_length conv_2_node = ngraph.convolution(maxpool_1_node, conv_2_kernel, [1, 1], padding_begin, padding_end, [1, 1]) # add 2 add_2_kernel_shape, add_2_kernel_length = shape_and_length([1, 50, 1, 1]) add_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_2_kernel_length].reshape(add_2_kernel_shape)) weights_offset += add_2_kernel_length add_2_node = ngraph.add(conv_2_node, add_2_kernel) # maxpool 2 maxpool_2_node = ngraph.max_pool(add_2_node, [2, 2], padding_begin, padding_end, [2, 2], 'ceil', None) # reshape 1 reshape_1_dims, reshape_1_length = shape_and_length([2]) # workaround to get int64 weights from float32 ndarray w/o unnecessary copying dtype_weights = np.frombuffer(weights[weights_offset:weights_offset + 2 * reshape_1_length], dtype=np.int64) reshape_1_kernel = ngraph.constant(dtype_weights) weights_offset += 2 * reshape_1_length reshape_1_node = ngraph.reshape(maxpool_2_node, reshape_1_kernel, True) # matmul 1 matmul_1_kernel_shape, matmul_1_kernel_length = shape_and_length( [500, 800]) matmul_1_kernel = ngraph.constant( weights[weights_offset:weights_offset + matmul_1_kernel_length].reshape(matmul_1_kernel_shape)) weights_offset += matmul_1_kernel_length matmul_1_node = ngraph.matmul(reshape_1_node, matmul_1_kernel, False, True) # add 3 add_3_kernel_shape, add_3_kernel_length = shape_and_length([1, 500]) add_3_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_3_kernel_length].reshape(add_3_kernel_shape)) weights_offset += add_3_kernel_length add_3_node = ngraph.add(matmul_1_node, add_3_kernel) # ReLU relu_node = ngraph.relu(add_3_node) # reshape 2 reshape_2_kernel = ngraph.constant(dtype_weights) reshape_2_node = ngraph.reshape(relu_node, reshape_2_kernel, True) # matmul 2 matmul_2_kernel_shape, matmul_2_kernel_length = shape_and_length([10, 500]) matmul_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + matmul_2_kernel_length].reshape(matmul_2_kernel_shape)) weights_offset += matmul_2_kernel_length matmul_2_node = ngraph.matmul(reshape_2_node, matmul_2_kernel, False, True) # add 4 add_4_kernel_shape, add_4_kernel_length = shape_and_length([1, 10]) add_4_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_4_kernel_length].reshape(add_4_kernel_shape)) weights_offset += add_4_kernel_length add_4_node = ngraph.add(matmul_2_node, add_4_kernel) # softmax softmax_axis = 1 softmax_node = ngraph.softmax(add_4_node, softmax_axis) # result result_node = ngraph.result(softmax_node) # nGraph function function = Function(result_node, [param_node], 'lenet') return function
def test_max_pool(): # test 1d element_type = Type.f32 shape = Shape([1, 1, 10]) A = Parameter(element_type, shape) parameter_list = [A] input_arr = np.arange(10, dtype=np.float32).reshape([1, 1, 10]) window_shape = [3] strides = [1] * len(window_shape) pads_begin = [0] * len(window_shape) pads_end = [0] * len(window_shape) model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") runtime = get_runtime() computation = runtime.computation(function, *parameter_list) result = computation(input_arr)[0] expected = (np.arange(8) + 2).reshape(1, 1, 8) assert np.allclose(result, expected) # test 1d with strides strides = [2] pads_begin = [0] * len(window_shape) pads_end = [0] * len(window_shape) model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") size = 4 computation = runtime.computation(function, *parameter_list) result = computation(input_arr)[0] expected = ((np.arange(size) + 1) * 2).reshape(1, 1, size) assert np.allclose(result, expected) # test 2d element_type = Type.f32 shape = Shape([1, 1, 10, 10]) A = Parameter(element_type, shape) parameter_list = [A] input_arr = np.arange(100, dtype=np.float32).reshape(1, 1, 10, 10) window_shape = [3, 3] strides = [1, 1] pads_begin = [0, 0] pads_end = [0, 0] model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") computation = runtime.computation(function, *parameter_list) result = computation(input_arr)[0] expected = ((np.arange(100).reshape(10, 10))[2:, 2:]).reshape(1, 1, 8, 8) assert np.allclose(result, expected) # test 2d with strides strides = [2, 2] pads_begin = [0, 0] pads_end = [0, 0] model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") computation = runtime.computation(function, *parameter_list) result = computation(input_arr)[0] size = 4 expected = ((np.arange(100).reshape(10, 10))[2::2, 2::2]).reshape(1, 1, size, size) assert np.allclose(result, expected)
def create_ngraph_function(args: argparse.Namespace) -> ngraph.impl.Function: """Create a network on the fly from the source code using ngraph""" def shape_and_length(shape: list) -> typing.Tuple[list, int]: length = reduce(lambda x, y: x * y, shape) return shape, length weights = np.fromfile(args.model, dtype=np.float32) weights_offset = 0 padding_begin = padding_end = [0, 0] # input input_shape = [64, 1, 28, 28] param_node = ngraph.parameter(input_shape, np.float32, 'Parameter') # convolution 1 conv_1_kernel_shape, conv_1_kernel_length = shape_and_length([20, 1, 5, 5]) conv_1_kernel = ngraph.constant( weights[0:conv_1_kernel_length].reshape(conv_1_kernel_shape)) weights_offset += conv_1_kernel_length conv_1_node = ngraph.convolution(param_node, conv_1_kernel, [1, 1], padding_begin, padding_end, [1, 1]) # add 1 add_1_kernel_shape, add_1_kernel_length = shape_and_length([1, 20, 1, 1]) add_1_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_1_kernel_length].reshape(add_1_kernel_shape), ) weights_offset += add_1_kernel_length add_1_node = ngraph.add(conv_1_node, add_1_kernel) # maxpool 1 maxpool_1_node = ngraph.max_pool(add_1_node, [2, 2], padding_begin, padding_end, [2, 2], 'ceil', None) # convolution 2 conv_2_kernel_shape, conv_2_kernel_length = shape_and_length( [50, 20, 5, 5]) conv_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + conv_2_kernel_length].reshape(conv_2_kernel_shape), ) weights_offset += conv_2_kernel_length conv_2_node = ngraph.convolution(maxpool_1_node, conv_2_kernel, [1, 1], padding_begin, padding_end, [1, 1]) # add 2 add_2_kernel_shape, add_2_kernel_length = shape_and_length([1, 50, 1, 1]) add_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_2_kernel_length].reshape(add_2_kernel_shape), ) weights_offset += add_2_kernel_length add_2_node = ngraph.add(conv_2_node, add_2_kernel) # maxpool 2 maxpool_2_node = ngraph.max_pool(add_2_node, [2, 2], padding_begin, padding_end, [2, 2], 'ceil', None) # reshape 1 reshape_1_dims, reshape_1_length = shape_and_length([2]) # workaround to get int64 weights from float32 ndarray w/o unnecessary copying dtype_weights = np.frombuffer( weights[weights_offset:weights_offset + 2 * reshape_1_length], dtype=np.int64, ) reshape_1_kernel = ngraph.constant(dtype_weights) weights_offset += 2 * reshape_1_length reshape_1_node = ngraph.reshape(maxpool_2_node, reshape_1_kernel, True) # matmul 1 matmul_1_kernel_shape, matmul_1_kernel_length = shape_and_length( [500, 800]) matmul_1_kernel = ngraph.constant( weights[weights_offset:weights_offset + matmul_1_kernel_length].reshape(matmul_1_kernel_shape), ) weights_offset += matmul_1_kernel_length matmul_1_node = ngraph.matmul(reshape_1_node, matmul_1_kernel, False, True) # add 3 add_3_kernel_shape, add_3_kernel_length = shape_and_length([1, 500]) add_3_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_3_kernel_length].reshape(add_3_kernel_shape), ) weights_offset += add_3_kernel_length add_3_node = ngraph.add(matmul_1_node, add_3_kernel) # ReLU relu_node = ngraph.relu(add_3_node) # reshape 2 reshape_2_kernel = ngraph.constant(dtype_weights) reshape_2_node = ngraph.reshape(relu_node, reshape_2_kernel, True) # matmul 2 matmul_2_kernel_shape, matmul_2_kernel_length = shape_and_length([10, 500]) matmul_2_kernel = ngraph.constant( weights[weights_offset:weights_offset + matmul_2_kernel_length].reshape(matmul_2_kernel_shape), ) weights_offset += matmul_2_kernel_length matmul_2_node = ngraph.matmul(reshape_2_node, matmul_2_kernel, False, True) # add 4 add_4_kernel_shape, add_4_kernel_length = shape_and_length([1, 10]) add_4_kernel = ngraph.constant( weights[weights_offset:weights_offset + add_4_kernel_length].reshape(add_4_kernel_shape), ) weights_offset += add_4_kernel_length add_4_node = ngraph.add(matmul_2_node, add_4_kernel) # softmax softmax_axis = 1 softmax_node = ngraph.softmax(add_4_node, softmax_axis) # result result_node = ngraph.result(softmax_node) return ngraph.impl.Function(result_node, [param_node], 'lenet')
def test_max_pool(): # test 1d element_type = Type.f32 shape = Shape([1, 1, 10]) A = Parameter(element_type, shape) parameter_list = [A] input_arr = np.arange(10, dtype=np.float32).reshape(1, 1, 10) window_shape = [3] strides = [1] * len(window_shape) pads_begin = [0] * len(window_shape) pads_end = [0] * len(window_shape) model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") backend = Backend.create(test.BACKEND_NAME) a = backend.create_tensor(element_type, shape) result = backend.create_tensor(element_type, Shape([1, 1, 8])) a.write(util.numpy_to_c(input_arr), 10 * 4) result_arr = np.zeros(8, dtype=np.float32).reshape(1, 1, 8) result.write(util.numpy_to_c(result_arr), 8 * 4) handle = backend.compile(function) handle.call([result], [a]) result.read(util.numpy_to_c(result_arr), 32) result_arr_ref = (np.arange(8) + 2).reshape(1, 1, 8) assert np.allclose(result_arr, result_arr_ref) # test 1d with strides strides = [2] pads_begin = [0] * len(window_shape) pads_end = [0] * len(window_shape) model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") size = 4 result = backend.create_tensor(element_type, Shape([1, 1, size])) result_arr = np.zeros(size, dtype=np.float32).reshape(1, 1, size) backend = Backend.create(test.BACKEND_NAME) result.write(util.numpy_to_c(result_arr), size * 4) handle = backend.compile(function) handle.call([result], [a]) result.read(util.numpy_to_c(result_arr), size * 4) result_arr_ref = ((np.arange(size) + 1) * 2).reshape(1, 1, size) assert np.allclose(result_arr, result_arr_ref) # test 2d element_type = Type.f32 shape = Shape([1, 1, 10, 10]) A = Parameter(element_type, shape) parameter_list = [A] input_arr = np.arange(100, dtype=np.float32).reshape(1, 1, 10, 10) window_shape = [3, 3] strides = [1, 1] pads_begin = [0, 0] pads_end = [0, 0] model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") backend = Backend.create(test.BACKEND_NAME) a = backend.create_tensor(element_type, shape) result = backend.create_tensor(element_type, Shape([1, 1, 8, 8])) a.write(util.numpy_to_c(input_arr), 10 * 10 * 4) result_arr = np.zeros(64, dtype=np.float32).reshape(1, 1, 8, 8) result.write(util.numpy_to_c(result_arr), 8 * 8 * 4) handle = backend.compile(function) handle.call([result], [a]) result.read(util.numpy_to_c(result_arr), 8 * 8 * 4) result_arr_ref = ((np.arange(100).reshape(10, 10))[2:, 2:]).reshape(1, 1, 8, 8) assert np.allclose(result_arr, result_arr_ref) # test 2d with strides strides = [2, 2] pads_begin = [0, 0] pads_end = [0, 0] model = ng.max_pool(A, strides, pads_begin, pads_end, window_shape) function = Function([model], parameter_list, "test") backend = Backend.create(test.BACKEND_NAME) size = 4 result = backend.create_tensor(element_type, Shape([1, 1, size, size])) result_arr = np.zeros(size * size, dtype=np.float32).reshape(1, 1, size, size) result.write(util.numpy_to_c(result_arr), size * size * 4) handle = backend.compile(function) handle.call([result], [a]) result.read(util.numpy_to_c(result_arr), size * size * 4) result_arr_ref = ((np.arange(100).reshape(10, 10))[2::2, 2::2]).reshape( 1, 1, size, size) assert np.allclose(result_arr, result_arr_ref)
def __init__(self, model_adapter, configuration=None, preload=False): super().__init__(model_adapter, configuration, preload=False) self.pooled_heatmaps_blob_name = 'pooled_heatmaps' self.heatmaps_blob_name = 'heatmaps' self.pafs_blob_name = 'pafs' function = ng.function_from_cnn(self.model_adapter.net) paf = function.get_output_op(0) paf_shape = paf.outputs()[0].get_shape() heatmap = function.get_output_op(1) heatmap_shape = heatmap.outputs()[0].get_shape() if len(paf_shape) != 4 and len(heatmap_shape) != 4: raise RuntimeError('OpenPose outputs must be 4-dimensional') if paf_shape[2] != heatmap_shape[2] and paf_shape[3] != heatmap_shape[ 3]: raise RuntimeError( 'Last two dimensions of OpenPose outputs must match') if paf_shape[1] * 2 == heatmap_shape[1]: paf, heatmap = heatmap, paf elif paf_shape[1] != heatmap_shape[1] * 2: raise RuntimeError( 'Size of second dimension of OpenPose of one output must be two times larger then size ' 'of second dimension of another output') paf = paf.inputs()[0].get_source_output().get_node() paf.set_friendly_name(self.pafs_blob_name) heatmap = heatmap.inputs()[0].get_source_output().get_node() heatmap.set_friendly_name(self.heatmaps_blob_name) # Add keypoints NMS to the network. # Heuristic NMS kernel size adjustment depending on the feature maps upsampling ratio. p = int(np.round(6 / 7 * self.upsample_ratio)) k = 2 * p + 1 pooled_heatmap = ng.max_pool(heatmap, kernel_shape=(k, k), pads_begin=(p, p), pads_end=(p, p), strides=(1, 1), name=self.pooled_heatmaps_blob_name) f = ng.impl.Function([ ng.result(heatmap, name=self.heatmaps_blob_name), ng.result(pooled_heatmap, name=self.pooled_heatmaps_blob_name), ng.result(paf, name=self.pafs_blob_name) ], function.get_parameters(), 'hpe') self.model_adapter.net = IENetwork(ng.impl.Function.to_capsule(f)) self.inputs = self.model_adapter.get_input_layers() self.outputs = self.model_adapter.get_output_layers() self.output_scale = self.inputs[self.image_blob_name].shape[ -2] / self.outputs[self.heatmaps_blob_name].shape[-2] if self.target_size is None: self.target_size = self.inputs[self.image_blob_name].shape[-2] self.h = (self.target_size + self.size_divisor - 1) // self.size_divisor * self.size_divisor input_width = round(self.target_size * self.aspect_ratio) self.w = (input_width + self.size_divisor - 1) // self.size_divisor * self.size_divisor default_input_shape = self.inputs[self.image_blob_name].shape input_shape = { self.image_blob_name: (default_input_shape[:-2] + [self.h, self.w]) } self.logger.debug('\tReshape model from {} to {}'.format( default_input_shape, input_shape[self.image_blob_name])) super().reshape(input_shape) if preload: self.load() num_joints = self.outputs[self.heatmaps_blob_name].shape[ 1] - 1 # The last channel is for background self.decoder = OpenPoseDecoder(num_joints, score_threshold=self.prob_threshold)