Exemple #1
0
    def gen_code(cls, tf_op, inputs):

        input_identifier = code_gen.c_safe_identifier(inputs[0].name)

        begin_type = tf_utils.get_parent_of_tensor(inputs[1]).type
        if begin_type != "Const":
            print("Error generating 'Slice' Operation: op_kernel only "
                  "supports Constant begin tensors.")
            return "// Error cannot generate Slice operation with " \
                   "non-const begin tensor!"

        size_type = tf_utils.get_parent_of_tensor(inputs[2]).type
        if size_type != "Const":
            print("Error generating 'Slice' Operation: op_kernel only "
                  "supports Constant size tensors.")
            return "// Error cannot generate Slice operation with " \
                   "non-const size tensor!"

        begin = tf_utils.get_const_tensor(
            tf_utils.get_parent_of_tensor(inputs[1]))
        size = tf_utils.get_const_tensor(
            tf_utils.get_parent_of_tensor(inputs[2]))

        # if -1 was given for any size dimensions then set them to the size
        # required to fill the remainder of the input
        for si in range(len(size)):
            if size[si] == -1:
                size[si] = inputs[0].dim_size(si) - begin[si]

        code = "%s %s.slice(Eigen::array<int, 2>(%s), " \
               "Eigen::array<int, 2>({%s}));" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input_identifier,
                code_gen.ndarray_1d_to_literal(begin),
                code_gen.ndarray_1d_to_literal(size))

        # print("Slice operation looks like this. . .")
        # super().print_operation_details(tf_op)

        return code
Exemple #2
0
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the bias tensor needs to be cast into the same time as the input
        bias_cast = ""

        # if the bias tensor needs to be broadcast into the same shape as the input
        bias_broadcast = ""
        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])
        shapes_match = False
        if len(input0_shape) == len(input1_shape):
            shapes_match = True
            for i in range(len(input0_shape)):
                if input0_shape[i] != input1_shape[i]:
                    shapes_match = False
        if not shapes_match:

            broadcast_shape = tf_utils.np_tensor_shape(inputs[0])
            broadcast_shape[len(broadcast_shape) - 1] = 1

            reshape_shape = np.array(([1] * (len(broadcast_shape) - 1)) +
                                     [input1_shape[0]])
            bias_broadcast = "\n    .reshape(Eigen::array<int, %d>(%s))" % \
                             (len(reshape_shape),
                              code_gen.ndarray_1d_to_literal(reshape_shape))
            bias_broadcast += "\n        .broadcast(Eigen::array<int, %d>(%s))" % \
                              (len(broadcast_shape),
                               code_gen.ndarray_1d_to_literal(broadcast_shape))

        code = "%s %s + %s%s%s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, False),
                input0_identifier,
                input1_identifier,
                bias_cast,
                bias_broadcast)
        return code
Exemple #3
0
    def gen_code(cls, tf_op, inputs):

        output_shape = tf_utils.np_tensor_shape(tf_op.outputs[0])
        input_identifier = code_gen.c_safe_identifier(inputs[0].name)

        code = "%s %s.reshape(Eigen::array<int, %d>(%s));" % \
               (base_op.BaseOpKernel.output_assignment(
                 tf_op, base_op.BaseOpKernel.evaluate_all
               ),
                input_identifier,
                len(output_shape),
                code_gen.ndarray_1d_to_literal(output_shape))
        return code
Exemple #4
0
    def add_verification_to_class(self, class_obj, constructor):
        if self.validation_type == 'Full':
            for op in self.list_operations:
                for out in op.outputs:

                    identifier = code_gen.c_safe_identifier(out.name)
                    shape = tf_utils.np_tensor_shape(out)
                    if len(shape) == 0:
                        shape = [1]
                    type = code_gen.get_c_dtype(out.dtype)

                    inner_template = cpp_gen.TemplateInstance()
                    inner_template.add_element(cpp_gen.TypeDefinition(type))
                    inner_template.add_element(str(len(shape)))
                    inner_template.add_element("Eigen::" + self.data_layout)
                    template = cpp_gen.TemplateInstance()
                    template.add_element(
                        cpp_gen.TypeDefinition('Tensor',
                                               namespace='Eigen',
                                               template=inner_template))
                    tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                         namespace='Eigen',
                                                         template=template)
                    tensor_map_property = cpp_gen.ClassProperty(
                        identifier + "_val", tensor_type)
                    tensor_map_property.access_modifier = "private"
                    class_obj.add(tensor_map_property)
                    lit_suffix = ""
                    if type == "float" or type == "double" or type == "long double":
                        lit_suffix = "Hex"

                    literal_identifier = (class_obj.identifier + "Weights::" +
                                          identifier + "VerificationData" +
                                          lit_suffix)

                    constructor.initialiser_list += [
                        "%s((%s*)%s,%s)" %
                        (identifier + "_val", type, literal_identifier,
                         code_gen.ndarray_1d_to_literal(
                             shape, open='', close=''))
                    ]
Exemple #5
0
    def add_weights_to_class(self, class_obj, constructor):

        # Add stored tensors to properties and constructor initialiser list
        for t in self.list_training_tensors:

            type = code_gen.get_c_dtype(t.dtype.base_dtype)
            rank = max(1, len(tf_utils.np_tensor_shape(t)))

            inner_template = cpp_gen.TemplateInstance()
            inner_template.add_element(cpp_gen.TypeDefinition(type))
            inner_template.add_element(str(rank))
            inner_template.add_element("Eigen::" + self.data_layout)
            template = cpp_gen.TemplateInstance()
            template.add_element(
                cpp_gen.TypeDefinition('Tensor',
                                       namespace='Eigen',
                                       template=inner_template))
            tensor_type = cpp_gen.TypeDefinition('TensorMap',
                                                 namespace='Eigen',
                                                 template=template)
            tensor_map_property = cpp_gen.ClassProperty(
                code_gen.c_safe_identifier(t.name), tensor_type)
            tensor_map_property.access_modifier = "private"
            class_obj.add(tensor_map_property)

            # For now just use literal values, TODO add option to load weights from file as well
            literal_name = class_obj.identifier + "Weights::" + \
                           code_gen.c_safe_identifier(t.name) + "Flat"
            if type == "float" or type == "double" or type == "long double":
                literal_name += "Hex"
            shape = code_gen.ndarray_1d_to_literal(tf_utils.np_tensor_shape(t),
                                                   open='',
                                                   close='')
            # convert rank zero tensor to rank 1 for eigen
            if shape == '  ':
                shape = ' 1 '

            constructor.initialiser_list += [
                "%s((%s*)%s,%s)" %
                (code_gen.c_safe_identifier(t.name), type, literal_name, shape)
            ]
Exemple #6
0
    def add_parameters_to_methods(self, eval_method, validate_method,
                                  timing_method, class_name):
        parameter_comment = "Input tensors\n"
        for i, input_placeholder in enumerate(self.list_input_placeholders):
            type = code_gen.get_c_dtype(
                input_placeholder.outputs[0].dtype.base_dtype)
            identifier = code_gen.c_safe_identifier(
                input_placeholder.outputs[0].name)
            shape = tf_utils.np_tensor_shape(input_placeholder.outputs[0])
            if len(shape) == 0:
                shape = [1]

            parameter_comment += "[%s] %s %s\n" % (
                type, identifier, str(input_placeholder.outputs[0].shape[1:]))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            param_tensor_map = "Eigen::TensorMap<Eigen::Tensor" \
                               "<%s, %d, %s>> %s(%s,%s)" % \
                               (type,
                                len(shape),
                                "Eigen::"+self.data_layout,
                                identifier,
                                identifier+"Param",
                                code_gen.ndarray_1d_to_literal(shape,
                                                               open='',
                                                               close=''))

            val_data_identifier = (class_name + "Weights::" + identifier +
                                   "VerificationDataHex")

            val_tensor_map = (
                "Eigen::TensorMap<Eigen::Tensor"
                "<%s, %d, %s>> %s((%s*)%s,%s)" %
                (type, len(shape), "Eigen::" + self.data_layout, identifier,
                 type, val_data_identifier,
                 code_gen.ndarray_1d_to_literal(shape, open='', close='')))

            comment = None
            if i == 0:
                comment = cpp_gen.Comment("Creating TensorMaps of inputs")

            eval_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))
            timing_method.code_block.add_statement(
                cpp_gen.Statement(param_tensor_map, comment))

            validate_method.code_block.add_statement(
                cpp_gen.Statement(val_tensor_map, comment))

        parameter_comment += "Output tensors\n"
        for out in self.output_tensors:
            type = code_gen.get_c_dtype(out.dtype)
            identifier = code_gen.c_safe_identifier(out.name)
            shape = tf_utils.np_tensor_shape(out)

            parameter_comment += "[%s] %s [%s]\n" % \
                                 (type,
                                  identifier,
                                  code_gen.ndarray_1d_to_literal(shape, open='', close=''))

            eval_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))
            timing_method.parameter_list.add(
                cpp_gen.Parameter(identifier + "Param",
                                  cpp_gen.TypeDefinition(type, ptr_levels=1)))

            # create buffers to hold final output tensors in the validate method which doesn't actually
            # return anything to the calling process
            dummy_param = "%s %s[%d]" % (type, identifier + "Param",
                                         np.prod(shape))
            dummy_param_comment = cpp_gen.Comment("Dummy parameter for output")
            validate_method.code_block.add_statement(
                cpp_gen.Statement(dummy_param, dummy_param_comment))

            # Tag this tensor as an output so that operation kernels will
            # map the output to the given function parameter instead of a block in the memory map.
            # out.tfmin_is_output = True
            if out.op.type == 'Identity':
                out = out.op.inputs[0]
            out.tfmin_output_identifier = identifier + "Param"

        timing_method.parameter_list.add(
            cpp_gen.Parameter('print',
                              cpp_gen.TypeDefinition('bool'),
                              default='true'))
        eval_method.comment.text += parameter_comment
        timing_method.comment.text += parameter_comment
Exemple #7
0
    def gen_code(cls, tf_op, inputs):

        input0_identifier = code_gen.c_safe_identifier(inputs[0].name)
        input1_identifier = code_gen.c_safe_identifier(inputs[1].name)

        # If the input tensor sizes match then this is a simple elementwise addition
        # however if one of th tensors is smaller than the other then it will attempt to
        # `broadcast' the smaller tensor upto the size of the larger one
        input0_expression = input0_identifier
        input1_expression = input1_identifier

        input0_shape = tf_utils.np_tensor_shape(inputs[0])
        input1_shape = tf_utils.np_tensor_shape(inputs[1])

        if not np.array_equal(input0_shape, input1_shape):
            # print("Broadcasting needed in Add operation!")

            # print("Old input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            smaller = None
            # if one shape has lower rank than the other then pad the smaller rank
            # with size 1 dimensions
            if input1_shape.size < input0_shape.size:
                smaller = 1
                padding = np.ones(int(input0_shape.size - input1_shape.size),
                                  np.int)
                input1_shape = np.concatenate((padding, input1_shape))
                input1_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input1_shape.size,
                                        code_gen.ndarray_1d_to_literal(input1_shape))
            elif input0_shape.size < input1_shape.size:
                smaller = 0
                padding = np.ones(int(input1_shape.size - input0_shape.size),
                                  np.int)
                input0_shape = np.concatenate((padding, input0_shape))
                input0_expression += ".reshape(Eigen::array<int, %d>(%s))" % \
                                       (input0_shape.size,
                                        code_gen.ndarray_1d_to_literal(input0_shape))

            # print("New input_0 (%s) input_1 (%s)" %
            #      (input0_shape, input1_shape))

            broadcast_multiplier = np.ones(input1_shape.size, dtype=np.int)

            for d in range(input0_shape.size):

                if input0_shape[d] != input1_shape[d]:

                    # check error cases where dimensions are not universally smaller on one side
                    if (smaller == 0 and input0_shape[d] > input1_shape[d]) or\
                            (smaller == 1 and input1_shape[d] > input0_shape[d]):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check error case where dimenions are not equal or one of them is 1
                    if (input0_shape[d] < input1_shape[d] and input0_shape[d] != 1) or \
                            (input1_shape[d] < input0_shape[d] and input1_shape[d] != 1):
                        print(
                            "Error: Add operation with non-broadcastable sized input tensors!"
                        )
                        return "// Error generating Add operation, non-broadcastable sized input tensors."

                    # check if this dimension defines the smallest tensor
                    if smaller is None and input0_shape[d] < input1_shape[d]:
                        smaller = 0
                    elif smaller is None and input1_shape[d] < input0_shape[d]:
                        smaller = 1

                    # update the broadcast multiplier for this dimension
                    if smaller == 0:
                        broadcast_multiplier[d] = input1_shape[d]
                    else:
                        broadcast_multiplier[d] = input0_shape[d]

            broadcast_expression = ".broadcast(Eigen::array<int, %d>(%s))" % \
                                   (broadcast_multiplier.size,
                                    code_gen.ndarray_1d_to_literal(broadcast_multiplier))

            # update the expression for the smaller tensor
            if smaller == 0:
                input0_expression += broadcast_expression
            elif smaller == 1:
                input1_expression += broadcast_expression

        code = "%s %s + %s;" % \
               (base_op.BaseOpKernel.output_assignment(tf_op, True),
                input0_expression,
                input1_expression)

        return code
Exemple #8
0
    def output_assignment(tf_op, eval=True, idx=0, assignment=True):
        """ Words."""

        identifier = code_gen.c_safe_identifier(tf_op.outputs[idx].name)
        type = code_gen.get_c_dtype(tf_op.outputs[idx].dtype.base_dtype)
        rank = len(tf_utils.np_tensor_shape(tf_op.outputs[idx]))
        shape_np = tf_utils.np_tensor_shape(tf_op.outputs[idx])
        shape = code_gen.ndarray_1d_to_literal(shape_np, open='', close='')

        # -- special case --
        # if the result of this operation is a model output then
        # create a tensor map to the output buffer
        if hasattr(tf_op.outputs[idx], 'tfmin_output_identifier'):
            code = "\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" % \
                    (type,
                     rank,
                     BaseOpKernel.data_layout)
            code += " %s((%s*)%s, %s);" % \
                    (identifier,
                     type,
                     tf_op.outputs[idx].tfmin_output_identifier,
                     shape)

            if assignment:
                code += "\n%s = " % identifier

            return code

        # if this operation needs to be concrete or all ops are being evaluated
        if BaseOpKernel.evaluate_all or tf_op.tfmin_concrete_needed:
            eval = True

        # if evaluate is true then create a concrete tensor or
        # map of the operations result
        if eval:

            if BaseOpKernel.use_memory_map:

                precalculated_offset = None
                if hasattr(tf_op.outputs[idx], '_tfmin_memory_offset'):
                    precalculated_offset = tf_op.outputs[
                        idx]._tfmin_memory_offset

                tensor_map_pointer = "(%s*)(memoryBlock + %s)" % \
                                     (type,
                                      precalculated_offset)

                # if no precalculated_offset was found then assume it is
                # safe to use the memory space of the input to this operation.
                # NOTE this will be safe is most cases but this may well explode
                # in some rare cases!! I apologise in advance if this has just
                # happened to you.
                if precalculated_offset is None:
                    input = tf_op.inputs[0]
                    if input.op.type == "Identity":
                        input = input.op.inputs[0]
                    tensor_map_pointer = "%s.data()" % \
                                         code_gen.c_safe_identifier(input.name)

                code = ("\nEigen::TensorMap<Eigen::Tensor<%s, %d, %s>>" %
                        (type, rank, BaseOpKernel.data_layout))

                code += " %s(%s, %s);" % \
                        (identifier,
                         tensor_map_pointer,
                         shape)
            else:
                code = "\nEigen::Tensor<%s, %d, %s> %s =" % \
                        (type,
                         rank,
                         data_layout,
                         identifier)

            if assignment:
                code += "\n%s.device(d) =" % identifier

            return code

        # if this operation is not being evaluated then create
        # an auto type so that the Eigen library produces a evaluator
        # object instead of a concrete tensor.
        else:
            code = "\nauto %s = " % identifier

            return code
Exemple #9
0
    def gen_code(cls, tf_op, inputs):

        # base_op.BaseOpKernel.print_operation_details(tf_op)

        num_split = tf_op.get_attr("num_split")

        # This development version only supports the form where axis is
        # provided by a rank 0 constant operation
        if tf_utils.get_parent_of_tensor(inputs[0]).type != "Const":
            print("Error : Split operation doesn't support computed values "
                  "for axis yet!")
            return "// Error : Couldn't produce split operation with a " \
                   "computed axis dimension."

        # axis is provided by the first input tensor
        axis = tf_utils.get_const_scalar(
            tf_utils.get_parent_of_tensor(inputs[0]))

        # if there is an undefined batch dimension that has been collapsed
        # reduce the axis index by 1
        reduced_rank = len(tf_utils.np_tensor_shape(tf_op.outputs[0]))
        if reduced_rank != tf_op.outputs[0].shape.ndims:
            axis -= (tf_op.outputs[0].shape.ndims - reduced_rank)

        code = ""

        # if num_split is an integer then generate form 1 of this
        # operation where the input tensor is split into
        # num_split tensors, divided evenly along axis
        if type(num_split) is int:

            # verify that the size of dimenions 'axis' is a muliple of num_split
            input_axis_size = tf_utils.np_tensor_shape(inputs[1])[axis]
            if input_axis_size % num_split != 0:
                print("Error : Split operation trying to split dimenson of "
                      "size %d into %d parts, leaves remainder." %
                      (input_axis_size, num_split))
                return "// Error : Couldn't produce split operation where " \
                       "tensor doesn't divide into num_split parts"

            # Calculate the size in 'axis' of each output slice
            size = input_axis_size / num_split

            input1_identifier = code_gen.c_safe_identifier(inputs[1].name)
            rank = len(tf_utils.np_tensor_shape(inputs[1]))

            offset = np.zeros(rank, dtype=int)
            extents = tf_utils.np_tensor_shape(inputs[1])
            extents[axis] = size

            # generate code for each output tensor
            for idx in range(num_split):
                code += base_op.BaseOpKernel.output_assignment(tf_op, idx=idx)

                offset[axis] = idx * size

                code += " %s.slice(Eigen::array<int, %d>(%s), " \
                        "Eigen::array<int, %d>(%s));" % \
                        (input1_identifier,
                         rank,
                         code_gen.ndarray_1d_to_literal(offset),
                         rank,
                         code_gen.ndarray_1d_to_literal(extents)
                         )

        else:  # TODO need to implement this
            code = "// Error Split operation does not currently " \
                   "support arbitrary sized splits"

        return code