def test_translate_vmul(x_shape): a = np.random.randint(-3, 3, x_shape) b = np.random.randint(-3, 3, x_shape) np_res = a.dot(b) with pm.Node("vmul") as pm_graph: pm_a = pm.input(name="a", shape=x_shape) pm_b = pm.input(name="b", shape=x_shape) pm_o = pm.output(name="o", shape=x_shape) pm_s = pm.output(name="out") pm.elem_mul(pm_a, pm_b, pm_o) pm.reduce_sum(pm_o, pm_s, axes=(0,), keepdims=0) pm_res = pm_graph("out", {"a": a, "b": b}) np.testing.assert_allclose(pm_res, np_res)
def get_max_pool(x, ceil_mode=0, kernel_shape=None, pads=None, auto_pad=None, strides=None, shape=None, name=None, out=None): if not out: out = pm.output(shape=shape, name=name) int_fn = np.ceil if ceil_mode != 0 else np.floor if auto_pad: h_out = int_fn(x.shape[-2] / strides[0]) w_out = int_fn(x.shape[-1] / strides[1]) ph = max(0, (h_out - 1) * strides[0] + kernel_shape[0] - x.shape[-2]) pw = max(0, (w_out - 1) * strides[1] + kernel_shape[1] - x.shape[-1]) pads = [0, 0, 0, 0] if auto_pad == "SAME_LOWER": pads[0] = np.floor(ph // 2) pads[1] = ph - pads[0] pads[2] = np.floor(pw // 2) pads[3] = pw - pads[2] elif auto_pad == "SAME_UPPER": pads[1] = np.floor(ph // 2) pads[0] = ph - pads[1] pads[3] = np.floor(pw // 2) pads[2] = pw - pads[3] pm.max_pool(x, out, kernel_shape[0], kernel_shape[1], (int(strides[0]), int(strides[1])), (int(pads[0]), int(pads[2]))) return out
def get_transpose(data, perm=None, shape=None, name=None, out=None, **kwargs): if not out: out = pm.output(name=name, shape=shape) if perm is None: perm = tuple(list(reversed([i for i in data.shape]))) pm.tensor_transpose(data, out, perm=perm) return out
def test_conv2d_transpose_shapes(inp_shape, wgt_shape, stride, pad): groups = 1 dilation = 1 out_pad = 0 inp = np.random.randint(-15, 15, np.prod(inp_shape)).reshape(inp_shape) wgt = np.random.randint(-15, 15, np.prod(wgt_shape)).reshape(wgt_shape) torch_res = F.conv_transpose2d(torch.from_numpy(inp), torch.from_numpy(wgt), stride=stride, padding=pad) torch_res = conv2d_transpose(torch.from_numpy(inp), torch.from_numpy(wgt), stride, pad) # np.testing.assert_allclose(tres.numpy(), torch_res.numpy()) info = { 'data': inp, 'w': wgt, } N, C, H, W = inp.shape x = pm.input(name="data", shape=inp_shape) w = pm.state(name="w", shape=wgt_shape) out = pm.output(name="out") graph = pm.conv_transpose(x, w, out, stride, pad) # tres = graph("out", info) np.testing.assert_allclose(tres, torch_res.numpy())
def elem_tanh_grad(self, node): grad = self.get_gradient(node) out_grad = pm.output(name=f"{node.inputs[0].name}_grad", shape=node.inputs[0].shape) pm.elem_tanh_grad(node.inputs[0], grad, out_grad) self.update_grad_map(node.inputs[0], out_grad, node)
def max_pool_grad(self, node): grad = self.get_gradient(node) mpool_grad = pm.output(name=f"{node.inputs[0].name}_grad", shape=node.inputs[0].shape) pm.max_pool_grad(node.inputs[0], grad, mpool_grad, node.kernel_size[0], node.kernel_size[1], node.stride, node.pad) self.update_grad_map(node.inputs[0], mpool_grad, node)
def test_translate_conv(x_shape, w_shape, params): shape_dict = {"n": x_shape[0], "c": x_shape[1], "ih": x_shape[2], "iw": x_shape[3], "nf": w_shape[0], "kh": w_shape[2], "kw": w_shape[3], "stride": params["stride"], "pad": params["pad"]} _, input_info, out_info, keys = conv(x_shape, w_shape, params, coarse=True, debug_matrix=False) n = pm.parameter(name="n") c = pm.parameter(name="ic") ih = pm.parameter(name="ih") iw = pm.parameter(name="iw") nf = pm.parameter(name="nf") kh = pm.parameter(name="kh") kw = pm.parameter(name="kw") x = pm.input(name="data", shape=(n, c, ih, iw)) w = pm.state(name="w", shape=(nf, c, kh, kw)) b = pm.state(name="bias", shape=(nf)) stride = pm.parameter(name="stride") pad = pm.parameter(name="pad") out = pm.output(name="out") graph = pm.conv_bias(x, w, b, out, stride, pad) tinput_info = copy.deepcopy(input_info) res0 = graph("out", tinput_info) np.testing.assert_allclose(res0, out_info["out"])
def get_topk(x, k, largest=1, sorted=1, axis=-1, shapes=None, name=None, out=None, out_indices=None, **kwargs): if not out: out = pm.output(name=name[0], shape=shapes[0]) if not out_indices: out_indices = pm.output(name=name[1], shape=shapes[1]) pm.topk(x, k, out, out_indices, largest=largest, sorted=sorted, axis=axis) return out, out_indices
def cross_entropy_grad(self, node): grad = self.get_gradient(node) inp_grad = pm.output(name=f"{node.inputs[0].name}_grad", shape=node.inputs[0].shape) pm.cross_entropy_loss_grad(node.inputs[0], node.inputs[1], grad, inp_grad) self.update_grad_map(node.inputs[0], inp_grad, node)
def test_translate_softmax(x_shape, axis): x = np.random.randint(0, 5, x_shape).astype(np.float) data = pm.input("x", shape=x.shape) out = pm.output("out") g = pm.softmax(data, out, axis=1) res = g("out", {"x": x}) np_res = np_softmax(x, axis=1) np.testing.assert_allclose(np_res, res)
def test_bnorm(): shape = (1, 16, 32, 32) grad = torch.rand(shape) x = torch.rand(shape) scale = torch.rand((shape[1], )) bias = torch.rand((shape[1], )) mean = torch.rand((shape[1], )) var = torch.rand((shape[1], )) torch_res = batchnorm2d_backward(grad, x, scale, bias) grad = grad.numpy() x = x.numpy() scale = scale.numpy() bias = bias.numpy() mean = mean.numpy() var = var.numpy() optimizer = "sgd" optimizer_kwargs = {"lr": 0.01} pm_x = pm.input(name="x", shape=shape) pm_grad = pm.input(name="grad", shape=shape) pm_scale = pm.state(name="scale", shape=scale.shape) pm_bias = pm.state(name="bias", shape=scale.shape) pm_mean = pm.state(name="mean", shape=scale.shape) pm_var = pm.state(name="var", shape=scale.shape) pm_x_grad = pm.output(name="x_grad", shape=shape) pm_scale_grad = pm.output(name="scale_grad", shape=scale.shape) pm_b_grad = pm.output(name="bias_grad", shape=bias.shape) inp_map = { 'x': x, 'grad': grad, 'scale': scale, 'bias': bias, 'mean': mean, 'var': var, } graph = pm.batchnorm_grad(pm_x, pm_scale, pm_bias, pm_mean, pm_var, pm_grad, pm_x_grad, pm_scale_grad, pm_b_grad, optimizer, optimizer_kwargs) rtol, atol = 1.3e-3, 1e-3 gout = graph("bias_grad", inp_map) np.testing.assert_allclose(gout, torch_res.numpy().reshape(gout.shape), rtol=rtol, atol=atol)
def populate_output(self, node): if node.shape != pm.DEFAULT_SHAPES[0]: indices = list(product(*tuple([np.arange(i) for i in node.shape]))) for i in indices: x = pm.output(graph=node, name=f"{node.name}{i}", root_name=node.name, shape=(1, )) self.stored_objects[id(x)] = x
def get_elem_if(condition, else_branch=None, then_branch=None, shape=None, name=None, out=None): if not out: out = pm.output(name=name, shape=shape) pm.elem_nonzero(condition, out) return out
def update_grad_map(self, input_node, gradient_node, parent_node): if input_node.name in self.grad_map: assert gradient_node.shape == self.grad_map[input_node.name].shape grad_name = f"{self.grad_map[input_node.name].name},{gradient_node.name}" acc_grad = pm.output(name=grad_name, shape=gradient_node.shape) pm.elem_add(self.grad_map[input_node.name], gradient_node, acc_grad) self.grad_map[input_node.name] = acc_grad else: self.grad_map[input_node.name] = gradient_node
def get_concat(*inputs, axis=None, shape=None, name=None, out=None): if not out: out = pm.output(name=name, shape=shape) pm.concat(*(inputs + (out, )), axis=axis) # indices = [pm.index(0, s - 1) if s > 1 else 0 for s in shape] # for idx, i in enumerate(inputs): # indices[axis] = pm.index(idx*i.shape[axis], (idx+1)*i.shape[axis]-1) # j = pm.index(0, i.shape[axis]-1) # out[tuple(indices)] = i[tuple(indices[:axis] + [j] + indices[axis+1:])] return out
def get_loop(v_initial, cond=None, max_trip_count=None, name=None, shape=None, out=None): if not out: out = pm.output(name=name, shape=shape) pm.loop(v_initial, out, cond=cond, max_trip_count=max_trip_count) return out
def batch_norm_grad(self, node): grad = self.get_gradient(node) bn_inp_grad = pm.output(name=f"{node.inputs[0].name}_grad", shape=node.inputs[0].shape) bn_scale_grad = pm.output( name=f"{node.inputs[1].name}_grad_{node.name}", shape=node.inputs[1].shape) bn_bias_grad = pm.output( name=f"{node.inputs[2].name}_grad_{node.name}", shape=node.inputs[2].shape) inp = node.inputs[0] scale = node.inputs[1] bias = node.inputs[2] mean = node.inputs[3] var = node.inputs[4] pm.batchnorm_grad(inp, scale, bias, mean, var, grad, bn_inp_grad, bn_scale_grad, bn_bias_grad, self.optimizer_name, self.optimizer_kwargs) self.update_grad_map(node.inputs[0], bn_inp_grad, node)
def get_reduce_max(x, shape=None, name=None, out=None, axes=(0, ), **kwargs): if not out: out = pm.output(name=name, shape=shape) if isinstance(axes, Integral): axes = (axes, ) elif isinstance(axes, list): axes = tuple(axes) else: assert isinstance(axes, tuple) pm.reduce_max(x, out, axes=axes) return out
def get_cross_entropy_loss(scores, labels, ignore_index=-100, reduction="mean", name=None, shape=None, out=None): if not out: out = pm.output(shape=shape, name=name) pm.cross_entropy_loss(scores, labels, out, reduction=reduction) return out
def gemm_grad(self, node): grad = self.get_gradient(node) gemm_inp_grad = pm.output(name=f"{node.inputs[0].name}_grad", shape=node.inputs[0].shape) gemm_weight_grad = pm.output(name=f"{node.inputs[1].name}_grad", shape=node.inputs[1].shape) if len(node.inputs) > 2: gemm_bias_grad = pm.output(name=f"{node.inputs[2].name}_grad", shape=node.inputs[2].shape) pm.gemm_grad(node.inputs[0], node.inputs[1], node.inputs[2], grad, gemm_inp_grad, gemm_weight_grad, gemm_bias_grad, self.optimizer_name, self.optimizer_kwargs) else: pm.gemm_grad_no_bias(node.inputs[0], node.inputs[1], grad, gemm_inp_grad, gemm_weight_grad, self.optimizer_name, self.optimizer_kwargs) self.update_grad_map(node.inputs[0], gemm_inp_grad, node)
def get_scatter(data, indices, updates, axis=0, shape=None, name=None, out=None): if not out: out = pm.output(name=name, shape=shape) pm.scatter_elements(data, indices, updates, out, axis=axis) return out
def get_split(input, split=None, axis=-1, name=None, shape=None, out=None, **kwargs): if not out: out = pm.output(name=name, shape=shape) pm.split(input, out, split=split, axis=axis) return out
def test_translate_flatten(x_shape): x = np.random.randint(0, 5, x_shape) data = pm.input("x", shape=x.shape) out = pm.output("out") g = pm.batch_flatten(data, out) res = g("out", {"x": x}) print(res) print(x.reshape(-1)) np.testing.assert_allclose(res, x.reshape(-1))
def get_elem_clip(data, min=None, max=None, shape=None, name=None, out=None, **kwargs): if not out: out = pm.output(name=name, shape=shape) pm.elem_clip(data, out, min=min, max=max) return out
def get_slice(input, starts, ends, axes=-1, steps=1, name=None, shape=None, out=None, **kwargs): if not out: out = pm.output(name=name, shape=shape) return out
def get_lrn(x, alpha=None, beta=None, bias=None, size=None, name=None, shape=None, out=None): if not out: out = pm.output(name=name, shape=shape) pm.lrn(x, out, alpha=alpha, beta=beta, bias=bias, nsize=size) return out
def get_dropout(x, ratio=None, training_mode=False, shape=None, name=None, out=None): if not out: out = pm.output(shape=shape, name=name) if training_mode: pm.dropout(x, out, ratio=ratio) else: pm.dropout(x, out) return out
def test_translate_elem_mul(x_shape): a = np.random.randint(-3, 3, x_shape) b = np.random.randint(-3, 3, x_shape) np_res = a * b graph = pm.Node("elem_mul") pm_a = pm.input(name="a", shape=x_shape, graph=graph) pm_b = pm.input(name="b", shape=x_shape, graph=graph) pm_o = pm.output(name="out", shape=x_shape, graph=graph) with graph: pm.elem_mul(pm_a, pm_b, pm_o) pm_res = graph("out", {"a": a, "b": b}) np.testing.assert_allclose(pm_res, np_res)
def test_translate_reduce_sum(x_shape): data = np.random.randint(-3, 3, x_shape) np_res = np.sum(data) graph = pm.Node("reduce") pm_data = pm.input(name="a", shape=x_shape, graph=graph) out = pm.output(name="out", graph=graph) axis = (0,) keepdims = 0 with graph: pm.reduce_sum(pm_data, out, axes=axis, keepdims=keepdims) pm_res = graph("out", {"a": data}) np.testing.assert_allclose(pm_res, np_res)
def get_conv_transpose(x, w, bias=None, dilations=None, group=None, kernel_shape=None, pads=None, auto_pad=None, output_padding=None, strides=None, shape=None, name=None, out=None): if not out: out = pm.output(shape=shape, name=name) if auto_pad: h_out = np.ceil(x.shape[-2] / strides[0]) w_out = np.ceil(x.shape[-1] / strides[1]) ph = max(0, (h_out - 1) * strides[0] + kernel_shape[0] - x.shape[-2]) pw = max(0, (w_out - 1) * strides[1] + kernel_shape[1] - x.shape[-1]) pads = [0, 0, 0, 0] if auto_pad == "SAME_LOWER": pads[0] = np.floor(ph // 2) pads[1] = ph - pads[0] pads[2] = np.floor(pw // 2) pads[3] = pw - pads[2] elif auto_pad == "SAME_UPPER": pads[1] = np.floor(ph // 2) pads[0] = ph - pads[1] pads[3] = np.floor(pw // 2) pads[2] = pw - pads[3] if bias: pm.conv_transpose_bias(x, w, bias, out, int(strides[0]), int(pads[-2]), out_pad=output_padding) return out else: pm.conv_transpose(x, w, out, int(strides[0]), int(pads[-2]), out_pad=output_padding) return out