Ejemplo n.º 1
0
    def __init__(self):
        sub_rules = [
            OptimizeRuleGroup([
                InsertTranspose(),
                ReplaceConvolutionByIm2Col(),
                MergeSgemmAndElementwiseMul(),
                ConstantFolding(),
                ReplaceDeconvolutionByCol2Im(),
                MergeSgemmAndElementwiseMul(),
                ConstantFolding(),
                ReplaceLinearBySgemm(),
                MergeSgemmAndElementwiseMul(),
                ConstantFolding(),
                ConcatLSTMInputAndHidden(),
                RemoveRedundantOperator(),
                RemoveNoEffectOperator(),
                UpdateInplaceAttribute()
            ]),
            ElementwiseKernelFusion()
        ]

        if flags.DEBUG:
            sub_rules.append(DumpGraph("cg{count}.dot"))

        super(WebGPUOptimizeRule, self).__init__(sub_rules)
Ejemplo n.º 2
0
    def optimize(self, graph: Graph):
        flag_changed = False
        """
        Some operators does not support splitting, but only appear as constant.
        Workaround for such case, use ConstantFolding for limited operators even if it is turned off.
        """
        cf = ConstantFolding()
        graph, flag_changed_in_cf = cf.optimize(graph, (Transpose, ))
        flag_changed |= flag_changed_in_cf

        c_before = traverse.filter_nodes(traverse.listup_variables(graph),
                                         ConstantVariable)
        c_size_before = sum([c.size for c in c_before])

        for v in traverse.filter_nodes(traverse.listup_variables(graph),
                                       SplitTarget):
            axis = _choose_split_axis(v)
            _split_axis(v, axis, graph)
            flag_changed = True

            c_after = traverse.filter_nodes(traverse.listup_variables(graph),
                                            ConstantVariable)
            c_size_after = sum([c.size for c in c_after])

            if c_size_before > c_size_after:
                raise Exception

        return graph, flag_changed
Ejemplo n.º 3
0
 def __init__(self):
     super(SimplifyAssociativeOperator, self).__init__([
         SimplifyAssociativeOperatorRightHand(),
         ConstantFolding(),
         SimplifyAssociativeOperatorLeftHand(),
         ConstantFolding()
     ])
Ejemplo n.º 4
0
    def __init__(self):
        sub_rules = [
            OptimizeRuleGroup([
                InsertTranspose(),
                ReplaceConvolutionByIm2Col(),
                ReplaceDeconvolutionByCol2Im(),
                ReplaceLinearByTensordot(),
                DecomposeSoftmax(),
                FixTensordotTextureShape(),
                MergeTensordotAndElementwiseMul(),
                ConstantFolding(),
                RemoveRedundantOperator(),
                RemoveNoEffectOperator(),
                SplitTexture(),
                UnrollConcat(),
            ]),
            OptimizeRuleGroup([
                InsertTranspose(),
                InsertChannelModeConversion(),
                SimplifyChannelModeConversion(),
                ConstantFolding(),
                RemoveRedundantOperator(),
                RemoveNoEffectOperator(),
            ]),
        ]  # type: List[OptimizeRule]

        if flags.DEBUG:
            sub_rules.append(
                DumpGraph(f"cg_{config.WEBGL_MAX_TEXTURE_SIZE}_{{count}}.dot"))

        super(WebGLOptimizeRule, self).__init__(sub_rules, repeat=False)
Ejemplo n.º 5
0
    def __init__(self):
        sub_rules = [
            InsertTranspose(),

            ReplaceConvolutionByIm2Col(),
            MergeSgemmAndElementwiseMul(),
            ConstantFolding(),

            ReplaceDeconvolutionByCol2Im(),
            MergeSgemmAndElementwiseMul(),
            ConstantFolding(),

            ReplaceLinearBySgemm(),
            MergeSgemmAndElementwiseMul(),
            ConstantFolding(),

            UseEigen(),
            ElementwiseKernelFusion(),
            UpdateInplaceAttribute()
        ]

        if flags.DEBUG:
            sub_rules.append(DumpGraph("cg{count}.dot"))

        super(WebassemblyOptimizeRule, self).__init__(sub_rules)
Ejemplo n.º 6
0
    def __init__(self):
        sub_rules = [
            OptimizeRuleGroup([
                InsertTranspose(),
                ReplaceConvolutionByIm2Col(),
                ReplaceDeconvolutionByCol2Im(),
                DecomposeSoftmax(),
                ReplaceLinearBySgemm(),
                MergeSgemmAndElementwiseMul(),
                FixSGEMMTextureShape(optimize_channel_mode=False),
                ConstantFolding(),
                SplitTexture(),
            ]),
            OptimizeRuleGroup([
                InsertChannelModeConversion(),
                SimplifyElementwise(),
                RemoveRedundantOperator(),
                SimplifyChannelModeConversion(),
                FixSGEMMTextureShape(optimize_channel_mode=True),
            ]),
            AttachConcatWorkspace(),
        ]

        if flags.DEBUG:
            sub_rules.append(DumpGraph("cg{count}.dot"))

        super(WebGLOptimizeRule, self).__init__(sub_rules, repeat=False)
Ejemplo n.º 7
0
def test_fold_mul_deep():
    """
    before)

    c0 -+
        +{Mul}-h1-+
    c1 -+         +-{Mul}-h2-+
               c2-+          +-{Add}-h4
                          h3-+
    after)

    c0*c1*c2 -+
              +-{Add}-h3
          h2 -+
    """
    c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)
    c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)

    h1 = c0 * c1
    c2 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)

    h2 = h1 * c2
    h3 = Variable([2, 3, 4, 5], OrderNCHW)

    h4 = h2 + h3

    graph = Graph([h3], [h4])

    ConstantFolding().optimize(graph)

    h2_new = h4.output_from.inputs["x0"]

    assert h2_new is not h2
    assert isinstance(h2_new, ConstantVariable)
    assert np.abs(np.mean(h2_new.data - (c0.data * c1.data * c2.data))) < 1e-5
Ejemplo n.º 8
0
def test_fold_add():
    """
    before)

    c0 -+
        +{Add}-h1-+
    c1 -+         +-{Add}-h3
              h2-+

    after)

    c0+c1 -+
           +-{Add}-h3
       h2 -+
    """
    c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)
    c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)

    h1 = c0 + c1
    h2 = Variable([2, 3, 4, 5], OrderNCHW)

    h3 = h1 + h2

    graph = Graph([h2], [h3])

    ConstantFolding().optimize(graph)

    h1_new = h3.output_from.inputs["x0"]

    assert h1_new is not h1
    assert isinstance(h1_new, ConstantVariable)
    assert np.abs(np.mean(h1_new.data - (c0.data + c1.data))) < 1e-5
Ejemplo n.º 9
0
 def __init__(self):
     super(WebassemblyOptimizeRule, self).__init__([
         InsertTranspose(),
         ReplaceConvolutionByIm2Col(),
         MergeSgemmAndElementwiseMul(),
         ConstantFolding(),
         ReplaceDeconvolutionByCol2Im(),
         MergeSgemmAndElementwiseMul(),
         ConstantFolding(),
         ReplaceLinearBySgemm(),
         MergeSgemmAndElementwiseMul(),
         ConstantFolding(),
         OptimizeSgemmEigen(),
         ElementwiseKernelFusion(),
         UpdateInplaceAttribute()
     ])
Ejemplo n.º 10
0
    def __init__(self):
        super(GeneralOptimizeRule, self).__init__()

        self.register(RemoveRedundantOperator())
        self.register(RemoveNoEffectOperator())
        self.register(ReplaceScalarAffine())
        self.register(SimplifyElementwise())
        self.register(ConcatZeroPadding())
        self.register(ConstantFolding())
Ejemplo n.º 11
0
 def __init__(self):
     super(WebGPUOptimizeRule, self).__init__([
         OptimizeRuleGroup([
             InsertTranspose(),
             ReplaceConvolutionByIm2Col(),
             MergeSgemmAndElementwiseMul(),
             ConstantFolding(),
             ReplaceDeconvolutionByCol2Im(),
             MergeSgemmAndElementwiseMul(),
             ConstantFolding(),
             ReplaceLinearBySgemm(),
             MergeSgemmAndElementwiseMul(),
             ConstantFolding(),
             ConcatLSTMInputAndHidden(),
             RemoveRedundantOperator(),
             RemoveNoEffectOperator(),
             UpdateInplaceAttribute()
         ]),
         ElementwiseKernelFusion()
     ])
Ejemplo n.º 12
0
 def __init__(self):
     super(GeneralOptimizeRule, self).__init__([
         RemoveRedundantOperator(),
         RemoveNoEffectOperator(),
         SimplifyElementwise(),
         ConcatZeroPadding(),
         ConstantFolding(),
         Convolution2DSvdCompression(),
         ConvFilterPruning(),
         UpgradeOperatorType()
     ])
Ejemplo n.º 13
0
def test_fold_sub():
    c0 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)
    c1 = ConstantVariable(np.random.rand(2, 3, 4, 5), OrderNCHW)

    h1 = c0 - c1
    h2 = Variable([2, 3, 4, 5], OrderNCHW)

    h3 = h1 + h2

    graph = Graph([h2], [h3])

    ConstantFolding().optimize(graph)

    h1_new = h3.output_from.inputs["x0"]

    assert h1_new is not h1
    assert isinstance(h1_new, ConstantVariable)
    assert np.abs(np.mean(h1_new.data - (c0.data - c1.data))) < 1e-5
Ejemplo n.º 14
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for sgemm in traverse.filter_nodes(traverse.listup_operators(graph),
                                           Sgemm):  # type: Sgemm
            A = sgemm.inputs["A"]
            B = sgemm.inputs["B"]
            M = sgemm.M
            N = sgemm.N
            K = sgemm.K
            transpose_A = sgemm.transpose_A
            transpose_B = sgemm.transpose_B

            if all([
                    self.optimize_channel_mode, K % 4 == 0,
                    isinstance(A, ConstantVariable) or transpose_A == True,
                    isinstance(B, ConstantVariable) or transpose_B == False
            ]):
                if transpose_A != True:
                    assert isinstance(A, ConstantVariable)
                    flag_changed = True
                    old_A = A
                    A = ConstantVariable(
                        A.data.reshape([K, M]).transpose(),
                        Order([Axis(None), Axis(None)]))
                    ChannelMode.set(A, ChannelMode.get(old_A))
                    sgemm.replace_input(old_A, A, with_assert=False)
                    sgemm.parameters["transpose_A"] = transpose_A = True

                if transpose_B != False:
                    assert isinstance(B, ConstantVariable)
                    flag_changed = True
                    old_B = B
                    B = ConstantVariable(
                        B.data.reshape([K, N]).transpose(),
                        Order([Axis(None), Axis(None)]))
                    ChannelMode.set(B, ChannelMode.get(old_B))
                    sgemm.replace_input(old_B, B, with_assert=False)
                    sgemm.parameters["transpose_B"] = transpose_B = False

                if ChannelMode.get(A) != ChannelModeEnum.RGBA:
                    flag_changed = True
                    ChannelMode.set(A, ChannelModeEnum.RGBA)

                if ChannelMode.get(B) != ChannelModeEnum.RGBA:
                    flag_changed = True
                    ChannelMode.set(B, ChannelModeEnum.RGBA)

                texture_shape_A = [M, K // 4] if transpose_A else [K // 4, M]
                texture_shape_B = [K // 4, N] if transpose_B else [N, K // 4]

            else:
                if ChannelMode.get(A) != ChannelModeEnum.R:
                    flag_changed = True
                    ChannelMode.set(A, ChannelModeEnum.R)

                if ChannelMode.get(B) != ChannelModeEnum.R:
                    flag_changed = True
                    ChannelMode.set(B, ChannelModeEnum.R)

                texture_shape_A = [M, K] if transpose_A else [K, M]
                texture_shape_B = [K, N] if transpose_B else [N, K]

            if TextureShape.get(A) != texture_shape_A:
                flag_changed = True
                TextureShape.set(A,
                                 height=texture_shape_A[0],
                                 width=texture_shape_A[1])

            if TextureShape.get(B) != texture_shape_B:
                flag_changed = True
                TextureShape.set(B,
                                 height=texture_shape_B[0],
                                 width=texture_shape_B[1])

        if flag_changed:
            graph, _ = ConstantFolding().optimize(graph)

        return graph, flag_changed
Ejemplo n.º 15
0
    def convert(
        self,
        inputs: Sequence["tf.Tensor"],
        outputs: Sequence["tf.Tensor"],
        order_hints: Optional[Dict[Union["tf.Tensor", "tf.Variable"],
                                   Order]] = None
    ) -> Graph:
        """convert(model, input_orders=None)

        Args:
            inputs (list of `tf.Tensor`): tensorflow input tensors
            outputs (list of `tf.Tensor`): tensorflow output tensors
            order_hints: Order annotations which helps webdnn's optimizer.

        .. admonition:: example

            Convert TensorFlow model.

            .. code::

                import tensorflow as tf
                from webdnn.frontend.tensorflow import TensorFlowConverter

                # y = x @ W + b
                x = tf.placeholder(tf.float32, [None, 784])
                W = tf.Variable(tf.zeros([784, 10]))
                b = tf.Variable(tf.zeros([10]))
                y = tf.nn.softmax(tf.matmul(x, W) + b)

                graph = TensorFlowConverter().convert([x], [y])

        Returns:
            (:class:`~webdnn.graph.graph.Graph`): WebDNN IR Graph
        """

        for tensor in inputs:
            shape = [
                Placeholder() if dim.value is None else dim.value
                for dim in tensor.shape.dims
            ]
            if isinstance(shape[0], Placeholder):
                shape[0] = self._batch_size
            self.set_variable(tensor,
                              Variable(shape, Order([None] * len(shape))))

        ops = _listup_operations(inputs, outputs)
        for op in ops:
            self._convert_operator(op)
            sub_graph = Graph([
                self.get_variable(tf_tensor)
                for tf_tensor in op.inputs if self.has_variable(tf_tensor)
            ], [
                self.get_variable(tf_tensor)
                for tf_tensor in op.outputs if self.has_variable(tf_tensor)
            ])
            old_outputs = list(sub_graph.outputs)

            # Constant folding improves possibility of conversion, because many tensors are used not only for main input variable but also
            # for other parameter like indices of operation, and WebDNN doesn't support dynamic indices operation.
            OptimizeRuleGroup([ConstantFolding()],
                              repeat=True).optimize(sub_graph)

            # After constant folding, it need to replace old variable with new constant variable
            for tf_tensor in op.outputs:
                if not self.has_variable(tf_tensor):
                    # This tensor is not converted (ignored)
                    continue

                old_v = self.get_variable(tf_tensor)
                new_v = sub_graph.outputs[old_outputs.index(old_v)]
                if old_v != new_v:
                    self.set_variable(tf_tensor, new_v, overwrite=True)

        if order_hints:
            for tensor, order in order_hints.items():
                if isinstance(tensor, tf.Variable):
                    tensor = tensor.value()

                variable = self.get_variable(tensor)
                for axis1, axis2 in zip(variable.order.axes, order.axes):
                    axis1.unify(axis2)

        # Remove redundant ReinterpretAxis operators
        graph = Graph([self.get_variable(tensor) for tensor in inputs],
                      [self.get_variable(tensor) for tensor in outputs])
        graph, _ = TensorFlowFrontendOptimizeRule().optimize(graph)

        for v in graph.inputs:
            v.attributes.add(Input(v))

        for v in graph.outputs:
            v.attributes.add(Output(v))

        return graph