def __init__(self): sub_rules = [ OptimizeRuleGroup([ InsertTranspose(), ReplaceConvolutionByIm2Col(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceDeconvolutionByCol2Im(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceLinearBySgemm(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ConcatLSTMInputAndHidden(), RemoveRedundantOperator(), RemoveNoEffectOperator(), UpdateInplaceAttribute() ]), ElementwiseKernelFusion() ] if flags.DEBUG: sub_rules.append(DumpGraph("cg{count}.dot")) super(WebGPUOptimizeRule, self).__init__(sub_rules)
def optimize(self, graph: Graph): flag_changed = False """ Some operators does not support splitting, but only appear as constant. Workaround for such case, use ConstantFolding for limited operators even if it is turned off. """ cf = ConstantFolding() graph, flag_changed_in_cf = cf.optimize(graph, (Transpose, )) flag_changed |= flag_changed_in_cf c_before = traverse.filter_nodes(traverse.listup_variables(graph), ConstantVariable) c_size_before = sum([c.size for c in c_before]) for v in traverse.filter_nodes(traverse.listup_variables(graph), SplitTarget): axis = _choose_split_axis(v) _split_axis(v, axis, graph) flag_changed = True c_after = traverse.filter_nodes(traverse.listup_variables(graph), ConstantVariable) c_size_after = sum([c.size for c in c_after]) if c_size_before > c_size_after: raise Exception return graph, flag_changed
def __init__(self): super(SimplifyAssociativeOperator, self).__init__([ SimplifyAssociativeOperatorRightHand(), ConstantFolding(), SimplifyAssociativeOperatorLeftHand(), ConstantFolding() ])
def __init__(self): sub_rules = [ OptimizeRuleGroup([ InsertTranspose(), ReplaceConvolutionByIm2Col(), ReplaceDeconvolutionByCol2Im(), ReplaceLinearByTensordot(), DecomposeSoftmax(), FixTensordotTextureShape(), MergeTensordotAndElementwiseMul(), ConstantFolding(), RemoveRedundantOperator(), RemoveNoEffectOperator(), SplitTexture(), UnrollConcat(), ]), OptimizeRuleGroup([ InsertTranspose(), InsertChannelModeConversion(), SimplifyChannelModeConversion(), ConstantFolding(), RemoveRedundantOperator(), RemoveNoEffectOperator(), ]), ] # type: List[OptimizeRule] if flags.DEBUG: sub_rules.append( DumpGraph(f"cg_{config.WEBGL_MAX_TEXTURE_SIZE}_{{count}}.dot")) super(WebGLOptimizeRule, self).__init__(sub_rules, repeat=False)
def __init__(self): sub_rules = [ InsertTranspose(), ReplaceConvolutionByIm2Col(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceDeconvolutionByCol2Im(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceLinearBySgemm(), MergeSgemmAndElementwiseMul(), ConstantFolding(), UseEigen(), ElementwiseKernelFusion(), UpdateInplaceAttribute() ] if flags.DEBUG: sub_rules.append(DumpGraph("cg{count}.dot")) super(WebassemblyOptimizeRule, self).__init__(sub_rules)
def __init__(self): sub_rules = [ OptimizeRuleGroup([ InsertTranspose(), ReplaceConvolutionByIm2Col(), ReplaceDeconvolutionByCol2Im(), DecomposeSoftmax(), ReplaceLinearBySgemm(), MergeSgemmAndElementwiseMul(), FixSGEMMTextureShape(optimize_channel_mode=False), ConstantFolding(), SplitTexture(), ]), OptimizeRuleGroup([ InsertChannelModeConversion(), SimplifyElementwise(), RemoveRedundantOperator(), SimplifyChannelModeConversion(), FixSGEMMTextureShape(optimize_channel_mode=True), ]), AttachConcatWorkspace(), ] if flags.DEBUG: sub_rules.append(DumpGraph("cg{count}.dot")) super(WebGLOptimizeRule, self).__init__(sub_rules, repeat=False)
def test_fold_mul_deep(): """ before) c0 -+ +{Mul}-h1-+ c1 -+ +-{Mul}-h2-+ c2-+ +-{Add}-h4 h3-+ after) c0*c1*c2 -+ +-{Add}-h3 h2 -+ """ c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) h1 = c0 * c1 c2 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) h2 = h1 * c2 h3 = Variable([2, 3, 4, 5], OrderNCHW) h4 = h2 + h3 graph = Graph([h3], [h4]) ConstantFolding().optimize(graph) h2_new = h4.output_from.inputs["x0"] assert h2_new is not h2 assert isinstance(h2_new, ConstantVariable) assert np.abs(np.mean(h2_new.data - (c0.data * c1.data * c2.data))) < 1e-5
def test_fold_add(): """ before) c0 -+ +{Add}-h1-+ c1 -+ +-{Add}-h3 h2-+ after) c0+c1 -+ +-{Add}-h3 h2 -+ """ c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) h1 = c0 + c1 h2 = Variable([2, 3, 4, 5], OrderNCHW) h3 = h1 + h2 graph = Graph([h2], [h3]) ConstantFolding().optimize(graph) h1_new = h3.output_from.inputs["x0"] assert h1_new is not h1 assert isinstance(h1_new, ConstantVariable) assert np.abs(np.mean(h1_new.data - (c0.data + c1.data))) < 1e-5
def __init__(self): super(WebassemblyOptimizeRule, self).__init__([ InsertTranspose(), ReplaceConvolutionByIm2Col(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceDeconvolutionByCol2Im(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceLinearBySgemm(), MergeSgemmAndElementwiseMul(), ConstantFolding(), OptimizeSgemmEigen(), ElementwiseKernelFusion(), UpdateInplaceAttribute() ])
def __init__(self): super(GeneralOptimizeRule, self).__init__() self.register(RemoveRedundantOperator()) self.register(RemoveNoEffectOperator()) self.register(ReplaceScalarAffine()) self.register(SimplifyElementwise()) self.register(ConcatZeroPadding()) self.register(ConstantFolding())
def __init__(self): super(WebGPUOptimizeRule, self).__init__([ OptimizeRuleGroup([ InsertTranspose(), ReplaceConvolutionByIm2Col(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceDeconvolutionByCol2Im(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ReplaceLinearBySgemm(), MergeSgemmAndElementwiseMul(), ConstantFolding(), ConcatLSTMInputAndHidden(), RemoveRedundantOperator(), RemoveNoEffectOperator(), UpdateInplaceAttribute() ]), ElementwiseKernelFusion() ])
def __init__(self): super(GeneralOptimizeRule, self).__init__([ RemoveRedundantOperator(), RemoveNoEffectOperator(), SimplifyElementwise(), ConcatZeroPadding(), ConstantFolding(), Convolution2DSvdCompression(), ConvFilterPruning(), UpgradeOperatorType() ])
def test_fold_sub(): c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW) h1 = c0 - c1 h2 = Variable([2, 3, 4, 5], OrderNCHW) h3 = h1 + h2 graph = Graph([h2], [h3]) ConstantFolding().optimize(graph) h1_new = h3.output_from.inputs["x0"] assert h1_new is not h1 assert isinstance(h1_new, ConstantVariable) assert np.abs(np.mean(h1_new.data - (c0.data - c1.data))) < 1e-5
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for sgemm in traverse.filter_nodes(traverse.listup_operators(graph), Sgemm): # type: Sgemm A = sgemm.inputs["A"] B = sgemm.inputs["B"] M = sgemm.M N = sgemm.N K = sgemm.K transpose_A = sgemm.transpose_A transpose_B = sgemm.transpose_B if all([ self.optimize_channel_mode, K % 4 == 0, isinstance(A, ConstantVariable) or transpose_A == True, isinstance(B, ConstantVariable) or transpose_B == False ]): if transpose_A != True: assert isinstance(A, ConstantVariable) flag_changed = True old_A = A A = ConstantVariable( A.data.reshape([K, M]).transpose(), Order([Axis(None), Axis(None)])) ChannelMode.set(A, ChannelMode.get(old_A)) sgemm.replace_input(old_A, A, with_assert=False) sgemm.parameters["transpose_A"] = transpose_A = True if transpose_B != False: assert isinstance(B, ConstantVariable) flag_changed = True old_B = B B = ConstantVariable( B.data.reshape([K, N]).transpose(), Order([Axis(None), Axis(None)])) ChannelMode.set(B, ChannelMode.get(old_B)) sgemm.replace_input(old_B, B, with_assert=False) sgemm.parameters["transpose_B"] = transpose_B = False if ChannelMode.get(A) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(A, ChannelModeEnum.RGBA) if ChannelMode.get(B) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(B, ChannelModeEnum.RGBA) texture_shape_A = [M, K // 4] if transpose_A else [K // 4, M] texture_shape_B = [K // 4, N] if transpose_B else [N, K // 4] else: if ChannelMode.get(A) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(A, ChannelModeEnum.R) if ChannelMode.get(B) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(B, ChannelModeEnum.R) texture_shape_A = [M, K] if transpose_A else [K, M] texture_shape_B = [K, N] if transpose_B else [N, K] if TextureShape.get(A) != texture_shape_A: flag_changed = True TextureShape.set(A, height=texture_shape_A[0], width=texture_shape_A[1]) if TextureShape.get(B) != texture_shape_B: flag_changed = True TextureShape.set(B, height=texture_shape_B[0], width=texture_shape_B[1]) if flag_changed: graph, _ = ConstantFolding().optimize(graph) return graph, flag_changed
def convert( self, inputs: Sequence["tf.Tensor"], outputs: Sequence["tf.Tensor"], order_hints: Optional[Dict[Union["tf.Tensor", "tf.Variable"], Order]] = None ) -> Graph: """convert(model, input_orders=None) Args: inputs (list of `tf.Tensor`): tensorflow input tensors outputs (list of `tf.Tensor`): tensorflow output tensors order_hints: Order annotations which helps webdnn's optimizer. .. admonition:: example Convert TensorFlow model. .. code:: import tensorflow as tf from webdnn.frontend.tensorflow import TensorFlowConverter # y = x @ W + b x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x, W) + b) graph = TensorFlowConverter().convert([x], [y]) Returns: (:class:`~webdnn.graph.graph.Graph`): WebDNN IR Graph """ for tensor in inputs: shape = [ Placeholder() if dim.value is None else dim.value for dim in tensor.shape.dims ] if isinstance(shape[0], Placeholder): shape[0] = self._batch_size self.set_variable(tensor, Variable(shape, Order([None] * len(shape)))) ops = _listup_operations(inputs, outputs) for op in ops: self._convert_operator(op) sub_graph = Graph([ self.get_variable(tf_tensor) for tf_tensor in op.inputs if self.has_variable(tf_tensor) ], [ self.get_variable(tf_tensor) for tf_tensor in op.outputs if self.has_variable(tf_tensor) ]) old_outputs = list(sub_graph.outputs) # Constant folding improves possibility of conversion, because many tensors are used not only for main input variable but also # for other parameter like indices of operation, and WebDNN doesn't support dynamic indices operation. OptimizeRuleGroup([ConstantFolding()], repeat=True).optimize(sub_graph) # After constant folding, it need to replace old variable with new constant variable for tf_tensor in op.outputs: if not self.has_variable(tf_tensor): # This tensor is not converted (ignored) continue old_v = self.get_variable(tf_tensor) new_v = sub_graph.outputs[old_outputs.index(old_v)] if old_v != new_v: self.set_variable(tf_tensor, new_v, overwrite=True) if order_hints: for tensor, order in order_hints.items(): if isinstance(tensor, tf.Variable): tensor = tensor.value() variable = self.get_variable(tensor) for axis1, axis2 in zip(variable.order.axes, order.axes): axis1.unify(axis2) # Remove redundant ReinterpretAxis operators graph = Graph([self.get_variable(tensor) for tensor in inputs], [self.get_variable(tensor) for tensor in outputs]) graph, _ = TensorFlowFrontendOptimizeRule().optimize(graph) for v in graph.inputs: v.attributes.add(Input(v)) for v in graph.outputs: v.attributes.add(Output(v)) return graph