def test_conv_bias(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) bias = AxiswiseBias(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) b_shape = [h.shape_dict[Axis.C]] b = ConstantVariable(arange_shaped(b_shape), OrderC) b_data = b.data.copy() y, = bias(h, b) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) w_data_expected = w_data b_data_expected = b_data ops = listup_operators(graph) assert len(ops) == 2 and isinstance( ops[0], Convolution2D) and isinstance(ops[1], AxiswiseBias) assert np.all(np.equal(ops[0].inputs["w"].data, w_data_expected)) assert np.all(np.equal(ops[1].inputs["b"].data, b_data_expected))
def test_wide_stride_CNHW(): v_im, v_col = generate_data_212() col_dummy = ConstantVariable(v_col, order=OrderNHWC) col_dummy.change_order(OrderCNHW) im = Variable(v_im.shape, order=OrderNHWC) col_wasm, = WasmIm2Col(None, ksize=2, padding=1, stride=2)(im) col_wasm.change_order(OrderCNHW) col_webgpu, = WebGPUIm2Col(None, ksize=2, padding=1, stride=2)(im) col_webgpu.change_order(OrderCNHW) generate_kernel_test_case(description=f"Im2Col output=CNHW stride=2", backend=["webassembly"], graph=Graph([im], [col_wasm]), inputs={im: v_im}, expected={col_wasm: col_dummy.data}, raise_skip=False) generate_kernel_test_case(description=f"Im2Col output=CNHW stride=2", backend=["webgpu"], graph=Graph([im], [col_webgpu]), inputs={im: v_im}, expected={col_webgpu: col_dummy.data})
def test_conv_scale(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) scale = AxiswiseScale(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) s_shape = [h.shape_dict[Axis.C]] s = ConstantVariable(arange_shaped(s_shape), OrderC) s_data = s.data.copy() y, = scale(h, s) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) # noinspection PyTypeChecker expander = (None, ) * order_w.axes_dict[Axis.N] + ( Ellipsis, ) + (None, ) * (3 - order_w.axes_dict[Axis.N]) w_data_expected = w_data * s_data[expander] ops = listup_operators(graph) assert len(ops) == 1 and isinstance(ops[0], Convolution2D) assert conv.outputs["y"] == y assert np.all(np.equal(w.data, w_data_expected))
def fold_constance(self, graph: Graph): x0 = self.inputs["x0"] # type: ConstantVariable y = self.outputs["y"] new_y = ConstantVariable(1 / np.sqrt(x0.data), x0.order) new_y.change_order(y.order) OptimizeRule.replace_variable(graph, y, new_y) self.remove_all()
def test_change_order_with_compression(): d1 = np.arange(3 * 4).reshape((3, 1, 1, 4)) v = ConstantVariable(d1, OrderNHWC) v.change_order(OrderCN) d2 = np.rollaxis(d1, 0, 4) assert v.order == OrderCN assert np.all(v.data.flatten() == d2.flatten())
def fold_constance(self, graph: Graph): x = self.inputs["x"] # type: ConstantVariable y = self.outputs["y"] new_y = ConstantVariable(np.tile(x.data, self.multiplier), x.order) new_y.change_order(y.order) OptimizeRule.replace_variable(graph, y, new_y) self.remove_all()
def test_change_order_with_expansion(): d1 = np.arange(3 * 4).reshape((3, 4)) v = ConstantVariable(d1, OrderNC) v.change_order(OrderCHWN) d2 = np.rollaxis(d1, 0, 2) assert v.order == OrderCHWN assert np.all(v.data.flatten() == d2.flatten())
def test_change_order(): d1 = np.arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) v = ConstantVariable(d1, OrderNHWC) v.change_order(OrderHWNC) d2 = np.rollaxis(d1, 0, 3) assert v.order == OrderHWNC assert np.all(v.data == d2)
def fold_constance(self, graph: Graph): x = self.inputs["x"] # type: ConstantVariable y = self.outputs["y"] remained_axes_in_x_order = [a for a in x.order.axes if a in y.order.axes] new_axes = [a for a in y.order.axes if a not in x.order.axes] slices = [self.indices[a] for a in x.order.axes] + [None] * len(new_axes) new_y = ConstantVariable(x.data[slices], Order(remained_axes_in_x_order + new_axes)) new_y.change_order(y.order) OptimizeRule.replace_variable(graph, y, new_y) self.remove_all()
def fold_constance(self): x = self.inputs["x"] # type: ConstantVariable ys = [self.outputs[f"y{i}"] for i in range(len(self.outputs))] axis = self.parameters["axis"] sections = self.parameters["sections"] self.remove_all() y_datum = np.split(x.data, sections, x.order.axes_dict[axis]) for i, y in enumerate(ys): y_new = ConstantVariable(y_datum[i], x.order) y_new.change_order(y.order) y.replace(y_new)
def fold_constance(self, graph: Graph): x = self.inputs["x"] # type: ConstantVariable y = self.outputs["y"] new_axes = list(x.order.axes) new_axes.remove(self.axis) new_y = ConstantVariable( np.sum(x.data, axis=x.order.axes_dict[self.axis]), Order(new_axes)) new_y.change_order(y.order) OptimizeRule.replace_variable(graph, y, new_y) self.remove_all()
def test_CNHW(): v_im, v_col = generate_data_311() col_dummy = ConstantVariable(v_col, order=OrderNHWC) col_dummy.change_order(OrderCNHW) im = Variable(v_im.shape, order=OrderNHWC) col, = Im2Col(None, ksize=3, padding=1, stride=1, dilation_rate=1)(im) col.change_order(OrderCNHW) generate_kernel_test_case(description=f"Im2Col output=CNHW", backend=["webgpu", "webgl", "webassembly"], graph=Graph([im], [col]), inputs={im: v_im}, expected={col: col_dummy.data})
def template(x_shape=[2, 3], feature_size=5, vocabulary_size=6, x_order=OrderNT, w_order=OrderCN, y_order=OrderNTC, description: str = ""): x = Variable(x_shape, order=x_order) vx = np.random.randint(low=0, high=vocabulary_size, size=(x.shape_dict[Axis.N], x.shape_dict[Axis.T])) # OrderNT vw = np.random.rand(vocabulary_size, feature_size) # OrderCN vy = vw[vx] # OrderNTC w = ConstantVariable(vw, order=OrderCN) y, = Embedding(None)(x, w) x = x.change_order(x_order) w = w.change_order(w_order) y = y.change_order(y_order) generate_kernel_test_case( description=f"Embedding {description}", backend=["webgpu", "webassembly"], graph=Graph([x], [y]), inputs={x: vx.transpose([OrderNT.axes_dict[a] for a in x.order.axes])}, expected={ y: vy.transpose([OrderNTC.axes_dict[a] for a in y.order.axes]) })
def fold_constance(self, graph: "graph.Graph"): x = self.inputs["x"] # type: ConstantVariable y = self.outputs["y"] self.remove_all() y_new = ConstantVariable( x.data, Order([ self.out_order.axes[self.in_order.axes.index(a)] for a in x.order.axes ])) OptimizeRule.replace_variable(graph, y, y_new.change_order(y.order))
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for match in traverse.search_sub_structure(graph, [LSTM]): lstm = match[0] # type: LSTM if lstm.has_attribute(LSTMOptimized): continue x = lstm.inputs["x"] w_input = lstm.inputs["w_input"] w_hidden = lstm.inputs["w_hidden"] if isinstance(w_input, ConstantVariable) and isinstance(w_hidden, ConstantVariable): w_input.change_order(OrderCN) w_hidden.change_order(OrderCN) w_all = ConstantVariable(np.vstack([w_input.data, w_hidden.data]), OrderCN) else: w_all, = Concat(None, axis=Axis.C)(w_input, w_hidden) # type: Variable w_all.change_order(OrderCN) attr = LSTMOptimized(lstm) N = x.shape_dict[Axis.N] C1 = attr.C1 C2 = attr.C2 x_and_h = Variable([C1 + C2, N], OrderCN) workspace = Variable([N, 4 * C2], OrderNC) lstm.remove_input(w_input) lstm.remove_input(w_hidden) lstm.append_input("x_and_h", x_and_h) lstm.append_input("workspace", workspace) lstm.append_input("w_all", w_all) lstm.attributes.add(attr) flag_changed = True return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for conv in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): x = conv.inputs["x"] w = conv.inputs["w"] y = conv.outputs["y"] if not isinstance(w, ConstantVariable): continue C2 = w.shape_dict[Axis.N] KH = w.shape_dict[Axis.H] KW = w.shape_dict[Axis.W] C1 = w.shape_dict[Axis.C] if conv.has_attribute(Convolution2DSvdCompressed): continue if KH != conv.PH * 2 + 1 or KW != conv.PW * 2 + 1 or conv.SH != 1 or conv.SW != 1 or conv.DH != 1 or conv.DW != 1: # TODO: Is this constraint required? continue w_copy = ConstantVariable(w.data, w.order) w_copy.change_order(OrderNHWC) d = w_copy.data.reshape((C2 * KH * KW, C1)) d_expand, d_squeeze = _svd(d, 0.5) C3 = d_expand.shape[1] """ Computation complexity: before) After) C1*C2*KH*KW > C1*C3 + C3*C2*KH*KW <=> (C1*C2*KH*KW) / (C1+C2*KH*KW) > C3 """ relative_complexity = (C1 * C3 + C3 * C2 * KH * KW) / (C1 * C2 * KH * KW) if relative_complexity >= 1: """ In this case, decomposition makes convolution more complex """ continue conv.remove_all() w_expand = ConstantVariable(d_expand.reshape([C2, KH, KW, C3]), OrderNHWC) w_squeeze = ConstantVariable(d_squeeze.reshape([C3, 1, 1, C1]), OrderNHWC) conv1 = Convolution2D(None, ksize=1, stride=1, padding=0, dilation_rate=1) conv2 = Convolution2D(None, ksize=conv.ksize, stride=conv.stride, padding=conv.padding, dilation_rate=conv.dilation_rate) h, = conv1(x, w_squeeze) y_new, = conv2(h, w_expand) conv1.attributes.add(Convolution2DSvdCompressed()) conv2.attributes.add(Convolution2DSvdCompressed()) OptimizeRule.replace_variable(graph, y_new.transpose_like(y), y) flag_changed = True return graph, flag_changed
def test_change_order_with_invalid_compression(): d1 = np.arange(3 * 2 * 2 * 4).reshape((3, 2, 2, 4)) v = ConstantVariable(d1, OrderNHWC) v.change_order(OrderCN)