def test_rgba2r_2(): """test_rgba2r_2 before) v0[R] -{ConvertRGBAtoR}- v1[R] -{ConvertRGBAtoR}- v2[R] after) v0[R] -{Transpose}- v1[R] -{Transpose}- v2[R] """ v0 = Variable((2, 3, 4, 5), OrderNCHW) v1, = ConvertRGBAtoR(None)(v0) v2, = ConvertRGBAtoR(None)(v1) graph = Graph([v0], [v2]) SimplifyRedundantChannelModeConversion().optimize(graph) assert len(graph.inputs) == 1 and graph.inputs[0] == v0 assert len(graph.outputs) == 1 and graph.outputs[0] == v2 new_ops = traverse.listup_operators(graph) assert len(new_ops) == 2 assert isinstance(new_ops[0], Transpose) assert isinstance(new_ops[1], Transpose)
def test_r2rgba_2(): """test_r2rgba_2 before) v0[R] -{ConvertRtoRGBA}- v1[RGBA] -{ConvertRtoRGBA}- v2[RGBA] -{ConvertRtoRGBA} -v3[RGBA] after) v0[R] -{ConvertRtoRGBA}- v1[RGBA] -{ConvertRGBAtoR}- v3[R] -{Transpose}- v4[R] -{ConvertRtoRGBA}- v2[RGBA] - - v2[RGBA] -{ConvertRGBAtoR}- v5[R] -{Transpose}- v6[R] -{ConvertRtoRGBA}- v3[RGBA] """ v0 = Variable((2, 3, 4, 5), OrderNCHW) v1, = ConvertRtoRGBA(None)(v0) v2, = ConvertRtoRGBA(None)(v1) v3, = ConvertRtoRGBA(None)(v2) graph = Graph([v0], [v3]) SimplifyRedundantChannelModeConversion().optimize(graph) assert len(graph.inputs) == 1 and graph.inputs[0] == v0 assert len(graph.outputs) == 1 and graph.outputs[0] == v3 new_ops = traverse.listup_operators(graph) assert len(new_ops) == 7 assert isinstance(new_ops[0], ConvertRtoRGBA) assert isinstance(new_ops[1], ConvertRGBAtoR) assert isinstance(new_ops[2], Transpose) assert isinstance(new_ops[3], ConvertRtoRGBA) assert isinstance(new_ops[4], ConvertRGBAtoR) assert isinstance(new_ops[5], Transpose) assert isinstance(new_ops[6], ConvertRtoRGBA)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: global _rgba_support_operators flag_changed = False for op in traverse.listup_operators(graph): if op.__class__ not in _rgba_support_operators: # This operator doesn't support RGBA mode continue if op.get_attribute(ChannelMode)[0].mode == ChannelModeEnum.RGBA: # This operator is configured as RGBA mode already continue y = list(op.outputs.values())[0] if any(x.shape != y.shape for x in op.inputs.values()): # FIXME: ブロードキャストがあるとRGBAは無理 continue op.get_attribute(ChannelMode)[0].mode = ChannelModeEnum.RGBA for name, x in op.inputs.items(): op.remove_input(x) x_converted, = ConvertRtoRGBA(None)(x) op.append_input(name, x_converted) for name, y in list(op.outputs.items()): y_dummy = Variable(y.shape, y.order) y_converted, = ConvertRGBAtoR(None)(y_dummy) for op2 in y.input_to: # type: Operator op2.replace_input(y, y_converted) y_dummy.replace(y) y.get_attribute(ChannelMode)[0].mode = ChannelModeEnum.RGBA flag_changed = True return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op1 in traverse.filter_nodes(traverse.listup_operators(graph), Elementwise): # type: Elementwise if len(op1.inputs) <= 1: continue x0 = op1.inputs["x0"] x1 = op1.inputs["x1"] if isinstance(op1, ElementwiseAdd): op2 = x0.output_from op3 = x1.output_from if isinstance(op2, ElementwiseAdd) and isinstance( op3, ElementwiseAdd) and len(x0.input_to) == 1 and len( x1.input_to) == 1: # # x2 -+ # +-[op2: ElementwiseAdd]-> x0 -+ # x3 -+ | # +-[op1: ElementwiseAdd]-> y # x4 -+ | # +-[op3: ElementwiseAdd]-> x1 -+ # x5 -+ # x2 = op2.inputs["x0"] x3 = op2.inputs["x1"] x4 = op3.inputs["x0"] x5 = op3.inputs["x1"] cs = [] xs = [] for x in [x2, x3, x4, x5]: if isinstance(x, ConstantVariable): cs.append(x) else: xs.append(x) if len(cs) >= 2: y = op1.outputs["y"] y_new = cs[0] for c in cs[1:]: y_new = y_new + c for x in xs: y_new = y_new + x op1.remove_all() op2.remove_all() op3.remove_all() y.change_order(y_new.order) y_new.replace(y) flag_changed = True return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if isinstance(op, Tensordot): flag_changed |= _replace_output(op, "C", ChannelModeEnum.R) elif isinstance(op, Im2Col): flag_changed |= _replace_input(op, "im", ChannelModeEnum.R) if op.outputs["col"].shape_dict[Axis.C] % 4 == 0: flag_changed |= _replace_output(op, "col", ChannelModeEnum.RGBA) else: flag_changed |= _replace_output(op, "col", ChannelModeEnum.R) elif isinstance(op, ConvertRGBAtoR): flag_changed |= _replace_input(op, "x0", ChannelModeEnum.RGBA) flag_changed |= _replace_output(op, "y", ChannelModeEnum.R) elif isinstance(op, ConvertRtoRGBA): flag_changed |= _replace_input(op, "x0", ChannelModeEnum.R) flag_changed |= _replace_output(op, "y", ChannelModeEnum.RGBA) else: flag_changed |= _replace_input_all(op, ChannelModeEnum.R) flag_changed |= _replace_output_all(op, ChannelModeEnum.R) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), self.pattern): flag_changed |= self.optimize_operator(graph, op) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for concat in traverse.filter_nodes(traverse.listup_operators(graph), Concat): if len(concat.inputs) == 2: # Unrolling is not needed continue flag_changed = True xs = [concat.inputs[f"x{i}"] for i in range(len(concat.inputs))] y = concat.outputs["y"] concat.remove_all() while len(xs) > 1: hs = [] while len(xs) > 0: if len(xs) == 1: hs.append(xs.pop(0)) else: x0, x1 = xs.pop(0), xs.pop(0) h, = Concat(None, axis=concat.axis)(x0, x1) hs.append(h) xs = hs OptimizeRule.replace_variable(graph, y, xs[0].transpose_like(y)) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes( traverse.listup_operators(graph), Deconvolution2D): # type: Deconvolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter, a_kh, a_kw = Axis(), Axis(), Axis() w, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.C, a_kh, a_kw, a_filter]))(w) x, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.N, Axis.H, Axis.W, a_filter]))(x) col, = Tensordot(None, axes=a_filter)(x, w) col = col.transpose( Order([Axis.N, Axis.H, Axis.W, a_kh, a_kw, Axis.C])) col = col.reshape(shape=[*col.shape[0:3], mul(col.shape[3:6])], order=OrderNHWC) new_y, = Col2Im(None, ksize=op.ksize, stride=op.stride, padding=op.padding)(col) OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y) return graph, flag_changed
def test_use_same_layer_twice(): model = keras.models.Sequential() model.add( keras.layers.Dense(4, use_bias=False, activation=None, input_shape=(2, ))) layer = keras.layers.Dense(4, use_bias=False, activation=None) model.add(layer) model.add(layer) model.build() graph = KerasConverter(batch_size=1).convert(model) assert_equal(len(graph.inputs), 1) ops = traverse.listup_operators(graph) assert_equal(len(ops), 3) assert_equal(type(ops[0]), Linear) assert_equal(type(ops[1]), Linear) assert_equal(type(ops[2]), Linear) assert_equal(len(graph.outputs), 1)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Linear): x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter = Axis() if x.ndim == 2: w, = ReinterpretAxis(None, in_order=OrderNC, out_order=Order([Axis.C, a_filter]))(w) new_y, = Tensordot(None, axes=[Axis.C, a_filter])(x, w) elif x.ndim == 4: w, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.C, Axis.H, Axis.W, a_filter]))(w) new_y, = Tensordot(None, axes=[[Axis.H, Axis.W, Axis.C], [Axis.H, Axis.W, a_filter]])(x, w) else: raise NotImplementedError OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y) return graph, flag_changed
def allocate_variables(cls, graph: Graph, variables: List[Variable]): # check if constant variable with shape with unresolved placeholder. dynamic_constants = traverse.filter_nodes( [v for v in variables if not Placeholder.check_resolved(v.size)], ConstantVariable) assert len( dynamic_constants ) == 0, f"ConstantVariable with unresolved placeholder shape is detected: f{dynamic_constants}" ops = traverse.listup_operators(graph) layout = MemoryLayout() lifetime = get_lifetime( graph, ops, variables) # type: Dict[Variable, Tuple[int, int]] offsets = generate_allocation_info( variables, lifetime) # type: Dict[Variable, Union[int, Placeholder]] for variable, offset in offsets.items(): layout.append(variable, offset) layout.data = np.zeros(layout.static_size, dtype=np.float32) constant_size = 0 for var in variables: if not isinstance(var, ConstantVariable): continue allocation = layout[var] layout.data[allocation.offset:allocation.offset + allocation.size] = var.data.flatten() constant_size += var.data.size layout.data = layout.data[:constant_size] if flags.VISUALIZE_MEMORY_ALLOCATION: _visualize_allocation(ops, variables, layout, lifetime, offsets) return layout
def test_rgba2r(): """test_rgba2r before) v0 -{ConvertRtoRGBA}- v1 -{ConvertRGBAtoR}- v2 after) v0 -{Transpose}- v2 """ v0 = Variable((2, 3, 4, 5), OrderNCHW) v1, = ConvertRtoRGBA(None)(v0) v2, = ConvertRGBAtoR(None)(v1) v2.change_order(OrderNHWC) v0_original_order = v0.order v2_original_order = v2.order graph = Graph([v0], [v2]) SimplifyNonsenseChannelModeConversion().optimize(graph) assert len(graph.inputs) == 1 and graph.inputs[0] == v0 assert len(graph.outputs) == 1 and graph.outputs[0] == v2 assert v0.order == v0_original_order assert len(traverse.listup_operators(graph)) == 1 assert isinstance(v2.output_from, Transpose) and v2.output_from.inputs["x0"] == v0 assert v2.order == v2_original_order
def optimize(self, graph: Graph): if not (flags.optimize.OPTIMIZE and flags.optimize.CONCAT_AFFINE): return graph, False flag_changed = False while True: flag_changed_in_iter = False ops = traverse.listup_operators(graph) current_seq = [] for op in ops: if isinstance(op, Convolution2D) or isinstance(op, Linear): self._start_found(op, current_seq) elif (isinstance(op, AxiswiseScale) or isinstance(op, AxiswiseBias)) and \ op.parameters["axis"] is Axis.C: self._cont_found(op, current_seq) else: flag_changed_in_iter = self._non_cont_found(current_seq) if flag_changed_in_iter: break else: # conv-scaleで終了した場合にも最適化する flag_changed_in_iter = self._non_cont_found(current_seq) flag_changed |= flag_changed_in_iter if not flag_changed_in_iter: break return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter, a_kh, a_kw = Axis(), Axis(), Axis() w, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.C, a_kh, a_kw, a_filter]))(w) if op.WH == 1 and op.WW == 1 and op.stride == ( 1, 1) and op.padding == (0, 0): # Projection col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(x) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) elif op.WH == x.shape_dict[Axis.H] and op.WW == x.shape_dict[ Axis.W] and op.padding == (0, 0): # Global convolution col, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.N, a_kh, a_kw, a_filter]))(x) new_y, = Tensordot( None, [[[a_kh, a_kw, a_filter], [a_kh, a_kw, a_filter]], [a_kh, a_kw, a_filter]])(col, w) else: # General convolution col, = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate)(x) col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(col) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) new_y = new_y.transpose(y.order) OptimizeRule.replace_variable(graph, new_y, y) return graph, flag_changed
def test_conv_scale(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) scale = AxiswiseScale(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) s_shape = [h.shape_dict[Axis.C]] s = ConstantVariable(arange_shaped(s_shape), OrderC) s_data = s.data.copy() y, = scale(h, s) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) # noinspection PyTypeChecker expander = (None, ) * order_w.axes_dict[Axis.N] + ( Ellipsis, ) + (None, ) * (3 - order_w.axes_dict[Axis.N]) w_data_expected = w_data * s_data[expander] ops = listup_operators(graph) assert len(ops) == 1 and isinstance(ops[0], Convolution2D) assert conv.outputs["y"] == y assert np.all(np.equal(w.data, w_data_expected))
def merged_elementwise_kernel(op: FusedElementwise, memory_layout: MemoryLayout) -> List[Kernel]: ops = traverse.listup_operators(op.sub_graph) command_buffer, buffer_injector = generate_elementwise_command_buffer(ops, [_registered_items[op.__class__] for op in ops], memory_layout, dummy2real=op.dummy2real) return elementwise_kernel_base(op, command_buffer, buffer_injector)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Sgemm): # type: Sgemm A = op.inputs["A"] B = op.inputs["B"] M = op.M N = op.N K = op.K transpose_A = op.transpose_A transpose_B = op.transpose_B if transpose_A: if TextureShape.get(A) != [M, K]: flag_changed = True TextureShape.set(A, width=K, height=M) else: if TextureShape.get(A) != [K, M]: flag_changed = True TextureShape.set(A, width=M, height=K) if transpose_B: if TextureShape.get(B) != [K, N]: flag_changed = True TextureShape.set(B, width=N, height=K) else: if TextureShape.get(B) != [N, K]: flag_changed = True TextureShape.set(B, width=K, height=N) return graph, flag_changed
def test_nested_model(): model1 = keras.models.Sequential() model1.add( keras.layers.Dense(8, use_bias=False, activation=None, input_shape=(4, ))) model2 = keras.models.Sequential() model2.add( keras.layers.Dense(4, use_bias=False, activation=None, input_shape=(2, ))) model2.add(model1) model2.add(keras.layers.Dense(16, use_bias=False, activation=None)) model2.build() graph = KerasConverter(batch_size=1).convert(model2) assert_equal(len(graph.inputs), 1) ops = traverse.listup_operators(graph) assert_equal(len(ops), 3) assert_equal(type(ops[0]), Linear) assert_equal(type(ops[1]), Linear) assert_equal(type(ops[2]), Linear) assert_equal(len(graph.outputs), 1)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), InplaceOperator): attr = op.get_attribute(InplaceOperator)[0] v_in = attr.get_input() v_out = attr.get_output() flag_inplace = True if v_in.has_attribute(Input): # Input variable cannot be overwritten. flag_inplace = False if isinstance(v_in, ConstantVariable): # Constant variable cannot be overwritten flag_inplace = False if any(v_in.stride_dict[a] != v_out.stride_dict[a] for a in v_out.order.axes if a in v_in.order.axes): flag_inplace = False if flag_inplace != attr.get_status(): attr.toggle_status(flag_inplace) flag_changed = True return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.listup_operators(graph): if not isinstance(op, Linear): continue op: Linear x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] assert x.order == OrderNC or x.order == OrderNHWC assert w.order == OrderCN or w.order == OrderHWCN assert y.order == OrderNC or y.order == OrderNHWC assert w.ndim == x.ndim flag_changed = True op.remove_all() sgemm = Sgemm(None, M=y.shape_dict[Axis.N], N=y.size // y.shape_dict[Axis.N], K=x.size // x.shape_dict[Axis.N], out_shape=y.shape, out_order=y.order, transpose_A=True, transpose_B=True) new_y, = sgemm(x, w) sgemm.replace_output(new_y, y) return graph, flag_changed
def test_conv_bias(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) bias = AxiswiseBias(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) b_shape = [h.shape_dict[Axis.C]] b = ConstantVariable(arange_shaped(b_shape), OrderC) b_data = b.data.copy() y, = bias(h, b) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) w_data_expected = w_data b_data_expected = b_data ops = listup_operators(graph) assert len(ops) == 2 and isinstance( ops[0], Convolution2D) and isinstance(ops[1], AxiswiseBias) assert np.all(np.equal(ops[0].inputs["w"].data, w_data_expected)) assert np.all(np.equal(ops[1].inputs["b"].data, b_data_expected))
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False ops = traverse.listup_operators(graph) ops = traverse.filter_nodes(ops, Inplace) ops = traverse.filter_nodes(ops, InlineInplace, mode_not=True) for op in ops: # type: Operator inplace = op.get_attribute(Inplace)[0] # type: Inplace if isinstance(op, Relu): op.attributes.add( InlineInplace(op, lambda exp: f"({exp}>0?{exp}:0)", inplace)) flag_changed = True elif isinstance(op, Elu): op.attributes.add( InlineInplace( op, lambda exp: f"({exp}>0?{exp}:(exp({exp})-1))", inplace)) flag_changed = True elif isinstance(op, Tanh): op.attributes.add( InlineInplace(op, lambda exp: f"(tanh({exp}))", inplace)) flag_changed = True else: continue return graph, flag_changed
def allocate_variables(cls, graph: Graph, variables: List[Variable]) -> MemoryLayout: ops = traverse.listup_operators(graph) layout = MemoryLayout() if flags.optimize.OPTIMIZE and flags.optimize.OPTIMIZE_MEMORY_ALLOCATION: analysis_list = _analyse_variable_lifetime(graph, ops, variables) _optimize_allocation_offset(analysis_list) allocation_dict = { item.variable: item.offset for item in analysis_list } for var in variables: original_var = var while "inplace_src" in var.parameters: var = var.parameters["inplace_src"] layout.append(original_var, allocation_dict[var]) else: for variable in variables: layout.append(variable) if flags.VISUALIZE_MEMORY_ALLOCATION: _visualize_allocation(layout, graph, variables, ops) return layout
def _check_condition2(v, sub_graph): ops = traverse.listup_operators(sub_graph) for op in v.input_to: if op not in ops: return False return True
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op1 in traverse.filter_nodes(traverse.listup_operators(graph), Associative): # type: Operator associative1 = op1.get_attribute(Associative)[0] var1, var2 = associative1.vars op2 = var1.output_from if isinstance(var1, ConstantVariable): # Left hand operand(var1) has no child tree continue if var1 in graph.outputs or len(var1.input_to) > 1: # var1 will be removed in this optimize rule continue if not isinstance(op2, op1.__class__): # op1 and op2 must be same operator class continue associative2 = op2.get_attribute(Associative)[0] var3, var4 = associative2.vars if not isinstance(var4, ConstantVariable): # No optimization is needed. # If either var3 or var4 is constant, then it is var4 because optimization rule of commutative operator reorder operands to # gather constant variables for right hand. continue """ var3 -+ +-{op2}- var1 -+ var4 -+ +-{op1}- var2 -+ """ if isinstance(var2, ConstantVariable): # Fold var4 and var2 associative1.reorder( op2) # (var3*var4)*var2 => var3*(var4*var2) flag_changed = True else: # Sweep out var4 if not op1.has_attribute(Commutative): continue associative2 = op2.get_attribute(Associative)[0] commutative2 = op2.get_attribute(Commutative)[0] if not isinstance(associative2.vars[1], ConstantVariable): continue commutative2.swap() # (var3*var4)*var2 => (var4*var3)*var2 associative1.reorder( op2) # (var4*var3)*var2 => var4*(var3*var2) flag_changed = True return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] # type: ConstantVariable old_y = op.outputs["y"] flag_changed = True op.remove_all() assert x.order == OrderNHWC assert isinstance(w, ConstantVariable) assert old_y.order == OrderNHWC w.change_order(OrderNHWC) col, = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate)(x) col.change_order(OrderNHWC) ChannelMode.set_mode(col, ChannelModeEnum.RGBA) M = col.shape_dict[Axis.N] * col.shape_dict[ Axis.H] * col.shape_dict[Axis.W] N = w.shape_dict[Axis.N] K = col.shape_dict[Axis.C] if K > (w.size // N): w2_data = np.hstack([ w.data.reshape(N, w.size // N), np.zeros([N, K - w.size // N]) ]) else: w2_data = w.data.reshape(N, w.size // N) w = ConstantVariable(w2_data, OrderNC) ChannelMode.set_mode(w, ChannelModeEnum.RGBA) sgemm = Sgemm(None, M=M, N=N, K=K, out_shape=[ col.shape_dict[Axis.N], col.shape_dict[Axis.H], col.shape_dict[Axis.W], w.shape_dict[Axis.N] ], out_order=OrderNHWC, transpose_A=True, transpose_B=False) new_y, = sgemm(col, w) sgemm.replace_output(new_y, old_y) return graph, flag_changed
def generate_kernels(graph: Graph, constants_layout: MemoryLayout, variables_layout: MemoryLayout) -> List[Kernel]: kernels: List[Kernel] = [] for op in traverse.listup_operators(graph): if isinstance(op, AxiswiseBias): kernels += axiswise_bias(op, constants_layout, variables_layout) elif isinstance(op, Relu): kernels += relu(op, constants_layout, variables_layout) elif isinstance(op, Elu): kernels += elu(op, constants_layout, variables_layout) elif isinstance(op, Tanh): kernels += tanh(op, constants_layout, variables_layout) elif isinstance(op, LocalResponseNormalization): kernels += local_response_normalization(op, constants_layout, variables_layout) elif isinstance(op, MaxPooling2D): kernels += max_pooling_2d(op, constants_layout, variables_layout) elif isinstance(op, AveragePooling2D): kernels += average_pooling_2d(op, constants_layout, variables_layout) elif isinstance(op, AxiswiseScale): kernels += axiswise_scale(op, constants_layout, variables_layout) elif isinstance(op, ElementwiseSum): kernels += elementwise_sum(op, constants_layout, variables_layout) elif isinstance(op, Flatten): kernels += flatten(op, constants_layout, variables_layout) elif isinstance(op, Sgemm): kernels += sgemm(op, constants_layout, variables_layout) elif isinstance(op, Im2Col): kernels += im2col(op, constants_layout, variables_layout) elif isinstance(op, Col2Im): kernels += col2im(op, constants_layout, variables_layout) elif isinstance(op, ScalarAffine): kernels += scalar_affine(op, constants_layout, variables_layout) elif isinstance(op, Concat): kernels += concat(op, constants_layout, variables_layout) else: raise NotImplementedError( f"{op} is Unknown for WebGPUDescriptorGenerator") return kernels
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Tensordot): if not op.has_attribute(UseEigenAttribute): op.attributes.add(UseEigenAttribute()) flag_changed = True graph.licenses["eigen"] = EIGEN_LICENSE return graph, flag_changed
def generate_kernels(cls, graph: Graph) -> List[Kernel]: kernels = [] # Type: List[T_KERNEL] for op in traverse.listup_operators(graph): key = cls.serialize_operator_type(op) if key not in cls._handler_map[cls.__name__]: raise NotImplementedError(f"[{cls.__name__}] Operator {op} is not handled by any generator handler") kernels += cls._handler_map[cls.__name__][key](op) return kernels
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Sgemm): # type: Sgemm if not op.has_attribute(SgemmWithEigen): op.attributes.add(SgemmWithEigen(op)) flag_changed = True graph.licenses["eigen"] = EIGEN_LICENSE return graph, flag_changed