Ejemplo n.º 1
0
    def test_ref_assign(self):
        """ test behavior of StaticVectorizer on predicated ReferenceAssign """
        va = Variable("a")
        vb = Variable("b")
        vc = Variable("c")
        scheme = Statement(
            ReferenceAssign(va, Constant(3)),
            ConditionBlock(
                (va > vb).modify_attributes(likely=True),
                Statement(ReferenceAssign(vb, va),
                          ReferenceAssign(va, Constant(11)), Return(va)),
            ), ReferenceAssign(va, Constant(7)), Return(vb))
        vectorized_path = StaticVectorizer().extract_vectorizable_path(
            scheme, fallback_policy)

        linearized_most_likely_path = instanciate_variable(
            vectorized_path.linearized_optree,
            vectorized_path.variable_mapping)
        test_result = (isinstance(linearized_most_likely_path, Constant)
                       and linearized_most_likely_path.get_value() == 11)
        if not test_result:
            print("test UT_StaticVectorizer failure")
            print("scheme: {}".format(scheme.get_str()))
            print("linearized_most_likely_path: {}".format(
                linearized_most_likely_path))
        self.assertTrue(test_result)
Ejemplo n.º 2
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.precision)
        vy = self.implementation.add_input_variable("y", self.precision)

        Cst0 = Constant(5, precision=self.precision)
        Cst1 = Constant(7, precision=self.precision)
        comp = Comparison(vx,
                          vy,
                          specifier=Comparison.Greater,
                          precision=ML_Bool,
                          tag="comp")
        comp_eq = Comparison(vx,
                             vy,
                             specifier=Comparison.Equal,
                             precision=ML_Bool,
                             tag="comp_eq")

        scheme = Statement(
            ConditionBlock(
                comp, Return(vy, precision=self.precision),
                ConditionBlock(
                    comp_eq,
                    Return(vx + vy * Cst0 - Cst1, precision=self.precision))),
            ConditionBlock(comp_eq, Return(Cst1 * vy,
                                           precision=self.precision)),
            Return(vx * vy, precision=self.precision))

        return scheme
Ejemplo n.º 3
0
    def generate_scheme(self):
        # declaring input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        vx2 = vx * vx

        scheme = ConditionBlock(
            vx > 0, Return(vx - 0.33 * vx2 * vx + (2 / 15.0) * vx * vx2 * vx2),
            Return(FP_QNaN(self.precision)))

        return scheme
Ejemplo n.º 4
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", FIXED_FORMAT)
        # declaring specific interval for input variable <x>
        vx.set_interval(Interval(-1, 1))

        acc_format = ML_Custom_FixedPoint_Format(6, 58, False)

        c = Constant(2, precision=acc_format, tag="C2")

        ivx = vx
        add_ivx = Addition(
                    c,
                    Multiplication(ivx, ivx, precision=acc_format, tag="mul"),
                    precision=acc_format,
                    tag="add"
                  )
        result = add_ivx

        input_mapping = {ivx: ivx.get_precision().round_sollya_object(0.125)}
        error_eval_map = runtime_error_eval.generate_error_eval_graph(result, input_mapping)

        # dummy scheme to make functionnal code generation
        scheme = Statement()
        for node in error_eval_map:
            scheme.add(error_eval_map[node])
        scheme.add(Return(result))
        return scheme
Ejemplo n.º 5
0
  def externalize_call(self, optree, arg_list, tag = "foo", result_format = None):
    # determining return format
    return_format = optree.get_precision() if result_format is None else result_format
    assert(not return_format is None and "external call result format must be defined")
    # function_name = self.main_code_object.declare_free_function_name(tag)
    function_name = self.name_factory.declare_free_function_name(tag)

    ext_function = CodeFunction(function_name, output_format = return_format)

    # creating argument copy
    arg_map = {}
    arg_index = 0
    for arg in arg_list:
      arg_tag = arg.get_tag(default = "arg_%d" % arg_index)
      arg_index += 1
      arg_map[arg] = ext_function.add_input_variable(arg_tag, arg.get_precision())

    # copying optree while swapping argument for variables
    optree_copy = optree.copy(copy_map = arg_map)
    # instanciating external function scheme
    if isinstance(optree, ML_ArithmeticOperation):
      function_optree = Statement(Return(optree_copy))
    else:
      function_optree = Statement(optree_copy)
    ext_function.set_scheme(function_optree)
    self.name_factory.declare_function(function_name, ext_function.get_function_object())

    return ext_function
Ejemplo n.º 6
0
 def ExpRaiseReturn(*args, **kwords):
     kwords["arg_value"] = vx
     kwords["function_name"] = self.function_name
     if self.libm_compliant:
         return RaiseReturn(*args, precision=self.precision, **kwords)
     else:
         return Return(kwords["return_value"], precision=self.precision)
Ejemplo n.º 7
0
    def generate_scheme(self):
        self.var_mapping = {}
        for var_index in range(self.arity):
            # FIXME: maximal arity is 4
            var_tag = ["x", "y", "z", "t"][var_index]
            self.var_mapping[var_tag] = self.implementation.add_input_variable(
                var_tag,
                self.get_input_precision(var_index),
                interval=self.input_intervals[var_index])

        self.function_expr = function_parser(self.function_expr_str,
                                             self.var_mapping)

        Log.report(Log.Info, "evaluating function range")
        evaluate_range(self.function_expr, update_interval=True)
        Log.report(
            LOG_VERBOSE_FUNCTION_EXPR, "scheme is: \n{}",
            self.function_expr.get_str(depth=None, display_interval=True))

        # defined copy map to avoid copying input Variables
        copy_map = dict((var, var) for var in self.var_mapping.items())

        function_expr_copy = self.function_expr.copy(copy_map)

        result, scheme = self.instanciate_graph(function_expr_copy,
                                                expand_div=self.expand_div)
        scheme.add(Return(result, precision=self.precision))

        return scheme
    def generate_test_wrapper(self, tensor_descriptors, input_tables,
                              output_tables):
        auto_test = CodeFunction("test_wrapper", output_format=ML_Int32)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True,
            require_header=["stdio.h"])
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        test_loop = self.get_tensor_test_wrapper(
            tested_function, tensor_descriptors, input_tables, output_tables,
            acc_num, self.generate_tensor_check_loop)

        # common test scheme between scalar and vector functions
        test_scheme = Statement(test_loop, printf_success_function(),
                                Return(Constant(0, precision=ML_Int32)))
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
Ejemplo n.º 9
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.precision)
        vy = self.implementation.add_input_variable("y", self.precision)

        scheme = Return(vx + vy)

        return scheme
Ejemplo n.º 10
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        in_storage = self.implementation.add_input_variable(
            "buffer_in", ML_Pointer_Format(self.precision))
        kernel_storage = self.implementation.add_input_variable(
            "buffer_kernel", ML_Pointer_Format(self.precision))
        out_storage = self.implementation.add_input_variable(
            "buffer_out", ML_Pointer_Format(self.precision))

        # Matrix sizes
        w = self.implementation.add_input_variable("w", size_format)
        h = self.implementation.add_input_variable("h", size_format)

        # A is a (n x p) matrix in row-major
        tIn = Tensor(in_storage,
                     TensorDescriptor([w, h], [1, w], self.precision))
        # B is a (p x m) matrix in row-major
        kernel_strides = [1]
        for previous_dim in self.kernel_size[:-1]:
            kernel_strides.append(previous_dim * kernel_strides[-1])
        print("kernel_strides: {}".format(kernel_strides))
        tKernel = Tensor(
            kernel_storage,
            TensorDescriptor(self.kernel_size, kernel_strides, self.precision))
        # C is a (n x m) matrix in row-major
        tOut = Tensor(out_storage,
                      TensorDescriptor([w, h], [1, w], self.precision))

        index_format = ML_Int32

        # main NDRange description
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k_w = Variable("k_w", precision=index_format, var_type=Variable.Local)
        k_h = Variable("k_h", precision=index_format, var_type=Variable.Local)
        result = NDRange([IterRange(i, 0, w - 1),
                          IterRange(j, 0, h - 1)],
                         WriteAccessor(
                             tOut, [i, j],
                             Sum(Sum(Multiplication(
                                 ReadAccessor(tIn, [i + k_w, j - k_h],
                                              self.precision),
                                 ReadAccessor(tKernel, [k_w, k_h],
                                              self.precision)),
                                     IterRange(k_w,
                                               -(self.kernel_size[0] - 1) // 2,
                                               (self.kernel_size[0] - 1) // 2),
                                     precision=self.precision),
                                 IterRange(k_h,
                                           -(self.kernel_size[1] - 1) // 2,
                                           (self.kernel_size[1] - 1) // 2),
                                 precision=self.precision)))

        mdl_scheme = expand_ndrange(result)
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None)))
        return Statement(mdl_scheme, Return())
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        approx = ReciprocalSeed(vx, precision=self.precision, tag="approx")

        result = approx

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        return scheme
    def generate_tensor_check_loop(self, tensor_descriptors, input_tables,
                                   output_tables):
        # unpack tensor descriptors tuple
        (input_tensor_descriptor_list,
         output_tensor_descriptor_list) = tensor_descriptors
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_error_detail_function = self.get_printf_error_detail_fct(
            output_tensor_descriptor_list[0])

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_tables = self.generate_expected_table(tensor_descriptors,
                                                       input_tables)

        # global statement to list all checks
        check_statement = Statement()

        # implement check for each output tensor
        for out_id, out_td in enumerate(output_tensor_descriptor_list):
            # expected values for the (vj)-th entry of the sub-array
            expected_values = [
                TableLoad(expected_tables[out_id], vj, i)
                for i in range(self.accuracy.get_num_output_value())
            ]
            # local result for the (vj)-th entry of the sub-array
            local_result = TableLoad(output_tables[out_id], vj)

            array_len = out_td.get_bounding_size()

            if self.break_error:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, output_values))
            else:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, expected_values),
                    Return(Constant(1, precision=ML_Int32)))
            check_array_loop = Loop(
                ReferenceAssign(vj, 0), vj < array_len,
                Statement(
                    ConditionBlock(
                        self.accuracy.get_output_check_test(
                            local_result, expected_values),
                        return_statement_break),
                    ReferenceAssign(vj, vj + 1),
                ))
            check_statement.add(check_array_loop)
        return check_statement
Ejemplo n.º 13
0
def generate_function_from_optree(name_factory,
                                  optree,
                                  arg_list,
                                  tag="foo",
                                  result_format=None):
    """ Function which transform a sub-graph @p optree whose inputs are @p arg_list
        into a meta function
        @param optree operation graph to be incorporated as function boday
        @param arg_list list of @p optree's parameters to be used as function arguments
        @param name_factory engine to generate unique function name and to register function
        @param tag string to be used as seed to generate function name
        @param result_format hint to indicate function's return format (if optree is not
            an arithmetic operation (e.g. it already contains a Return node, then @p result_format
            must be used to specify the funciton return format)

        @return CodeFunction object containing the function implementation (plus the function
            would have been declared into name_factory)
        
        """
    # determining return format
    return_format = optree.get_precision(
    ) if result_format is None else result_format
    assert (not return_format is None
            and "external call result format must be defined")
    function_name = name_factory.declare_free_function_name(tag)

    ext_function = CodeFunction(function_name, output_format=return_format)

    # creating argument copy
    arg_map = {}
    arg_index = 0
    for arg in arg_list:
        arg_tag = arg.get_tag(default="arg_%d" % arg_index)
        arg_index += 1
        arg_map[arg] = ext_function.add_input_variable(arg_tag,
                                                       arg.get_precision())

    # extracting const table to make sure then are not duplicated
    table_set = extract_tables(optree)
    arg_map.update({table: table for table in table_set if table.const})

    # copying optree while swapping argument for variables
    optree_copy = optree.copy(copy_map=arg_map)
    # instanciating external function scheme
    if isinstance(optree, ML_ArithmeticOperation):
        function_optree = Statement(Return(optree_copy))
    else:
        function_optree = Statement(optree_copy)
    ext_function.set_scheme(function_optree)
    name_factory.declare_function(function_name,
                                  ext_function.get_function_object())

    return ext_function
Ejemplo n.º 14
0
    def generate_scheme(self):
        var = self.implementation.add_input_variable("x", self.precision)
        var_y = self.implementation.add_input_variable("y", self.precision)
        var_z = self.implementation.add_input_variable("z", self.precision)
        mult = Multiplication(var, var_z, precision=self.precision)
        add = Addition(var_y, mult, precision=self.precision)

        test_program = Statement(
            add,
            Return(add)
        )
        return test_program
Ejemplo n.º 15
0
    def generate_scalar_scheme(self, vx, vy):
        div = Division(vx, vy, precision=self.precision)
        div_if = Trunc(div, precision=self.precision)
        rem = Variable("rem",
                       var_type=Variable.Local,
                       precision=self.precision)
        qi = Variable("qi", var_type=Variable.Local, precision=self.precision)
        qi_bound = Constant(S2**self.precision.get_mantissa_size())
        init_rem = FusedMultiplyAdd(-div_if, vy, vx)

        # factorizing 1 / vy to save time
        # NOTES: it makes rem / vy approximate
        # shared_rcp = Division(1, vy, precision=self.precision)

        iterative_fmod = Loop(
            Statement(
                ReferenceAssign(rem, init_rem),
                ReferenceAssign(qi, div_if),
            ),
            Abs(qi) > qi_bound,
            Statement(
                ReferenceAssign(
                    qi,
                    #Trunc(shared_rcp * rem, precision=self.precision)
                    Trunc(rem / vy, precision=self.precision)),
                ReferenceAssign(rem, FMA(-qi, vy, rem))))
        scheme = Statement(
            rem,
            # shared_rcp,
            iterative_fmod,
            ConditionBlock(
                # if rem's sign and vx sign mismatch
                (rem * vx < 0.0).modify_attributes(tag="update_cond",
                                                   debug=debug_multi),
                Return(rem + vy),
                Return(rem),
            ))
        return scheme
Ejemplo n.º 16
0
    def generate_scheme(self):
        """ main scheme generation """
        input_precision = self.precision
        output_precision = self.precision

        # declaring main input variable
        x_interval = Interval(-10.3, 10.7)
        var_x = self.implementation.add_input_variable("x",
                                                       input_precision,
                                                       interval=x_interval)

        y_interval = Interval(-17.9, 17.2)
        var_y = self.implementation.add_input_variable("y",
                                                       input_precision,
                                                       interval=y_interval)

        z_interval = Interval(-70.3, -57.7)
        var_z = self.implementation.add_input_variable("z",
                                                       input_precision,
                                                       interval=z_interval)

        min_yz = Min(var_z, var_y)

        cst0 = Constant(42.5, tag="cst0", precision=self.precision)
        cst1 = Constant(2.5, tag="cst1", precision=self.precision)
        cst2 = Constant(12.5, tag="cst2", precision=self.precision)

        new_cst = cst0 + cst1 * cst2

        result = min_yz + new_cst

        scheme = ConditionBlock(
            LogicalAnd(
                LogicalOr(cst0 > cst1, LogicalNot(cst1 > cst0)),
                var_x > var_y,
            ), Return(result), Return(cst2))
        return scheme
Ejemplo n.º 17
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        A_storage = self.implementation.add_input_variable("buffer_a", ML_Pointer_Format(self.precision))
        B_storage = self.implementation.add_input_variable("buffer_b", ML_Pointer_Format(self.precision))
        C_storage = self.implementation.add_input_variable("buffer_c", ML_Pointer_Format(self.precision))

        # Matrix sizes
        n = self.implementation.add_input_variable("n", size_format)
        m = self.implementation.add_input_variable("m", size_format)
        p = self.implementation.add_input_variable("p", size_format)


        # A is a (n x p) matrix in row-major
        tA = Tensor(A_storage, TensorDescriptor([p, n], [1, p], self.precision))
        # B is a (p x m) matrix in row-major
        tB = Tensor(B_storage, TensorDescriptor([m, p], [1, m], self.precision))
        # C is a (n x m) matrix in row-major
        tC = Tensor(C_storage, TensorDescriptor([m, n], [1, m], self.precision))

        index_format = ML_Int32

        #
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k = Variable("k", precision=index_format, var_type=Variable.Local)
        result = NDRange(
            [IterRange(j, 0, m-1), IterRange(i, 0, n -1)],
            WriteAccessor(
                tC, [j, i],
                Sum(
                    Multiplication(
                        ReadAccessor(tA, [k, i], self.precision),
                        ReadAccessor(tB, [j, k], self.precision),
                        precision=self.precision),
                    IterRange(k, 0, p - 1),
                    precision=self.precision)))

        #mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        if self.vectorize:
            mdl_scheme = expand_ndrange(vectorize_ndrange(result, j, 4))
        else:
            mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None, display_precision=True)))
        return Statement(
            mdl_scheme,
            Return()
        )
Ejemplo n.º 18
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.get_input_precision(0))

        bf16_params = ML_NewTable(dimensions=[self.table_size], storage_precision=BFloat16)
        for i in range(self.table_size):
            bf16_params[i] = 1.1**i

        conv_vx = Conversion(TableLoad(bf16_params, vx), precision=ML_Binary32, tag="conv_vx", debug=debug_multi)

        result = conv_vx

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        return scheme
Ejemplo n.º 19
0
    def generate_scalar_scheme(self, vx):
        output_precision = self.precision
        input_precision = vx.get_precision()

        bias = -output_precision.get_bias()
        bound_exp = Max(Min(
            vx, output_precision.get_emax(), precision=input_precision),
                        output_precision.get_emin_normal(),
                        precision=input_precision) + bias
        scheme = Return(ExponentInsertion(bound_exp,
                                          specifier=ExponentInsertion.NoOffset,
                                          precision=self.precision),
                        tag="result",
                        debug=debug_multi)
        return scheme
Ejemplo n.º 20
0
    def generate_scheme(self):
        # declare a new input parameters vx whose tag is "x" and
        # whose format is single precision
        vx = self.implementation.add_input_variable("x", self.get_input_precision(0))

        # declare a new input parameters vy whose tag is "y" and
        # whose format is single precision
        vy = self.implementation.add_input_variable("x", self.get_input_precision(0))

        # declare main operation graph for the meta-function:
        # a single Statement containing a single return statement which
        # the addition of the two inputs variable in single-precision
        main_scheme = Statement(
            Return(vx + vy, precision=ML_Binary32)
        )
        return main_scheme
Ejemplo n.º 21
0
    def generate_scheme(self):
        """ generate an operation unitary bench test scheme
            (graph of operation implementing latency computation
             on a dependent sequence of self.op_class)"""
        unroll_factor = self.unroll_factor
        test_num = self.test_num

        bench_statement = metaop.Statement()
        # floating-point bench
        for op_class in self.operation_map:
            for output_precision in self.operation_map[op_class]:
                for predicate in OPERATOR_BENCH_MAP[op_class]:
                    if predicate(op_class, output_precision, None):
                        op_bench = OPERATOR_BENCH_MAP[op_class][predicate]
                        bench_statement.add(
                            op_bench(output_precision).generate_bench(
                                self.processor, test_num, unroll_factor))
        bench_statement.add(Return(0))

        return bench_statement
Ejemplo n.º 22
0
def vectorize_function_scheme(vectorizer,
                              name_factory,
                              scalar_scheme,
                              scalar_output_format,
                              scalar_arg_list,
                              vector_size,
                              sub_vector_size=None):
    """ Use a vectorization engine @p vectorizer to vectorize the sub-graph @p
        scalar_scheme, that is transforming and inputs and outputs from scalar
        to vectors and performing required internal path duplication """

    sub_vector_size = vector_size if sub_vector_size is None else sub_vector_size

    vec_arg_list, vector_scheme, vector_mask = \
        vectorizer.vectorize_scheme(scalar_scheme, scalar_arg_list,
                                    vector_size, sub_vector_size)

    vector_output_format = vectorize_format(scalar_output_format, vector_size)

    vec_res = Variable("vec_res",
                       precision=vector_output_format,
                       var_type=Variable.Local)

    vector_mask.set_attributes(tag="vector_mask", debug=debug_multi)

    callback_name = "scalar_callback"
    scalar_callback_fct = generate_function_from_optree(
        name_factory, scalar_scheme, scalar_arg_list, callback_name,
        scalar_output_format)
    scalar_callback = scalar_callback_fct.get_function_object()

    if no_scalar_fallback_required(vector_mask):
        function_scheme = Statement(
            Return(vector_scheme, precision=vector_output_format))
    function_scheme = generate_c_vector_wrapper(vector_size, vec_arg_list,
                                                vector_scheme, vector_mask,
                                                vec_res, scalar_callback)

    return vec_res, vec_arg_list, function_scheme, scalar_callback, scalar_callback_fct
Ejemplo n.º 23
0
    def generic_atan2_generate(self, _vx, vy=None):
        """ if vy is None, compute atan(_vx), else compute atan2(vy / vx) """

        if vy is None:
            # approximation
            # if abs_vx <= 1.0 then atan(abx_vx) is directly approximated
            # if abs_vx > 1.0 then atan(abs_vx) = pi/2 - atan(1 / abs_vx)
            #
            # for vx >= 0, atan(vx) = atan(abs_vx)
            #
            # for vx < 0, atan(vx) = -atan(abs_vx) for vx < 0
            #                      = -pi/2 + atan(1 / abs_vx)
            vx = _vx
            sign_cond = vx < 0
            abs_vx = Select(vx < 0, -vx, vx, tag="abs_vx", debug=debug_multi)
            bound_cond = abs_vx > 1
            inv_abs_vx = 1 / abs_vx

            # condition to select subtraction
            cond = LogicalOr(LogicalAnd(vx < 0, LogicalNot(bound_cond)),
                             vx > 1,
                             tag="cond",
                             debug=debug_multi)

            # reduced argument
            red_vx = Select(bound_cond,
                            inv_abs_vx,
                            abs_vx,
                            tag="red_vx",
                            debug=debug_multi)

            offset = None
        else:
            # bound_cond is True iff Abs(vy / _vx) > 1.0
            bound_cond = Abs(vy) > Abs(_vx)
            bound_cond.set_attributes(tag="bound_cond", debug=debug_multi)
            # vx and vy are of opposite signs
            #sign_cond = (_vx * vy) < 0
            # using cast to int(signed) and bitwise xor
            # to determine if _vx and vy are of opposite sign rapidly
            fast_sign_cond = BitLogicXor(
                TypeCast(_vx, precision=self.precision.get_integer_format()),
                TypeCast(vy, precision=self.precision.get_integer_format()),
                precision=self.precision.get_integer_format()) < 0
            # sign_cond = (_vx * vy) < 0
            sign_cond = fast_sign_cond
            sign_cond.set_attributes(tag="sign_cond", debug=debug_multi)

            # condition to select subtraction
            # TODO: could be accelerated if LogicalXor existed
            slow_cond = LogicalOr(
                LogicalAnd(sign_cond,
                           LogicalNot(bound_cond)),  # 1 < (vy / _vx) < 0
                LogicalAnd(bound_cond,
                           LogicalNot(sign_cond)),  # (vy / _vx) > 1
                tag="cond",
                debug=debug_multi)
            cond = slow_cond

            numerator = Select(bound_cond,
                               _vx,
                               vy,
                               tag="numerator",
                               debug=debug_multi)
            denominator = Select(bound_cond,
                                 vy,
                                 _vx,
                                 tag="denominator",
                                 debug=debug_multi)
            # reduced argument
            red_vx = Abs(numerator) / Abs(denominator)
            red_vx.set_attributes(tag="red_vx", debug=debug_multi)

            offset = Select(
                _vx > 0,
                Constant(0, precision=self.precision),
                # vx < 0
                Select(
                    sign_cond,
                    # vy > 0
                    Constant(sollya.pi, precision=self.precision),
                    Constant(-sollya.pi, precision=self.precision),
                    precision=self.precision),
                precision=self.precision,
                tag="offset")

        approx_fct = sollya.atan(sollya.x)

        if self.method == "piecewise":
            sign_vx = Select(cond,
                             -1,
                             1,
                             precision=self.precision,
                             tag="sign_vx",
                             debug=debug_multi)

            cst_sign = Select(sign_cond,
                              -1,
                              1,
                              precision=self.precision,
                              tag="cst_sign",
                              debug=debug_multi)
            cst = cst_sign * Select(
                bound_cond, sollya.pi / 2, 0, precision=self.precision)
            cst.set_attributes(tag="cst", debug=debug_multi)

            bound_low = 0.0
            bound_high = 1.0
            num_intervals = self.num_sub_intervals
            error_threshold = S2**-(self.precision.get_mantissa_size() + 8)

            approx, eval_error = piecewise_approximation(
                approx_fct,
                red_vx,
                self.precision,
                bound_low=bound_low,
                bound_high=bound_high,
                max_degree=None,
                num_intervals=num_intervals,
                error_threshold=error_threshold,
                odd=True)

            result = cst + sign_vx * approx
            result.set_attributes(tag="result",
                                  precision=self.precision,
                                  debug=debug_multi)

        elif self.method == "single":
            approx_interval = Interval(0, 1.0)
            # determining the degree of the polynomial approximation
            poly_degree_range = sollya.guessdegree(
                approx_fct / sollya.x, approx_interval,
                S2**-(self.precision.get_field_size() + 2))
            poly_degree = int(sollya.sup(poly_degree_range)) + 4
            Log.report(Log.Info, "poly_degree={}".format(poly_degree))

            # arctan is an odd function, so only odd coefficient must be non-zero
            poly_degree_list = list(range(1, poly_degree + 1, 2))
            poly_object, poly_error = Polynomial.build_from_approximation_with_error(
                approx_fct, poly_degree_list,
                [1] + [self.precision.get_sollya_object()] *
                (len(poly_degree_list) - 1), approx_interval)

            odd_predicate = lambda index, _: ((index - 1) % 4 != 0)
            even_predicate = lambda index, _: (index != 1 and
                                               (index - 1) % 4 == 0)

            poly_odd_object = poly_object.sub_poly_cond(odd_predicate,
                                                        offset=1)
            poly_even_object = poly_object.sub_poly_cond(even_predicate,
                                                         offset=1)

            sollya.settings.display = sollya.hexadecimal
            Log.report(Log.Info, "poly_error: {}".format(poly_error))
            Log.report(Log.Info, "poly_odd: {}".format(poly_odd_object))
            Log.report(Log.Info, "poly_even: {}".format(poly_even_object))

            poly_odd = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_odd_object, abs_vx)
            poly_odd.set_attributes(tag="poly_odd", debug=debug_multi)
            poly_even = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_even_object, abs_vx)
            poly_even.set_attributes(tag="poly_even", debug=debug_multi)
            exact_sum = poly_odd + poly_even
            exact_sum.set_attributes(tag="exact_sum", debug=debug_multi)

            # poly_even should be (1 + poly_even)
            result = vx + vx * exact_sum
            result.set_attributes(tag="result",
                                  precision=self.precision,
                                  debug=debug_multi)

        else:
            raise NotImplementedError

        if not offset is None:
            result = result + offset

        std_scheme = Statement(Return(result))
        scheme = std_scheme

        return scheme
Ejemplo n.º 24
0
    def generate_scheme(self):
        # We wish to compute vx / vy
        vx = self.implementation.add_input_variable(
            "x", self.precision, interval=self.input_intervals[0])
        vy = self.implementation.add_input_variable(
            "y", self.precision, interval=self.input_intervals[1])

        # maximum exponent magnitude (to avoid overflow/ underflow during
        # intermediary computations
        int_prec = self.precision.get_integer_format()
        max_exp_mag = Constant(self.precision.get_emax() - 1,
                               precision=int_prec)

        exact_ex = ExponentExtraction(vx,
                                      tag="exact_ex",
                                      precision=int_prec,
                                      debug=debug_multi)
        exact_ey = ExponentExtraction(vy,
                                      tag="exact_ey",
                                      precision=int_prec,
                                      debug=debug_multi)

        ex = Max(Min(exact_ex, max_exp_mag, precision=int_prec),
                 -max_exp_mag,
                 tag="ex",
                 precision=int_prec)
        ey = Max(Min(exact_ey, max_exp_mag, precision=int_prec),
                 -max_exp_mag,
                 tag="ey",
                 precision=int_prec)

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        # computing the inverse square root
        init_approx = None

        scaling_factor_x = ExponentInsertion(-ex,
                                             tag="sfx_ei",
                                             precision=self.precision,
                                             debug=debug_multi)
        scaling_factor_y = ExponentInsertion(-ey,
                                             tag="sfy_ei",
                                             precision=self.precision,
                                             debug=debug_multi)

        def test_interval_out_of_bound_risk(x_range, y_range):
            """ Try to determine from x and y's interval if there is a risk
                of underflow or overflow """
            div_range = abs(x_range / y_range)
            underflow_risk = sollya.inf(div_range) < S2**(
                self.precision.get_emin_normal() + 2)
            overflow_risk = sollya.sup(div_range) > S2**(
                self.precision.get_emax() - 2)
            return underflow_risk or overflow_risk

        out_of_bound_risk = (self.input_intervals[0] is None
                             or self.input_intervals[1] is None
                             ) or test_interval_out_of_bound_risk(
                                 self.input_intervals[0],
                                 self.input_intervals[1])
        Log.report(Log.Debug,
                   "out_of_bound_risk: {}".format(out_of_bound_risk))

        # scaled version of vx and vy, to avoid overflow and underflow
        if out_of_bound_risk:
            scaled_vx = vx * scaling_factor_x
            scaled_vy = vy * scaling_factor_y
            scaled_interval = MetaIntervalList(
                [MetaInterval(Interval(-2, -1)),
                 MetaInterval(Interval(1, 2))])
            scaled_vx.set_attributes(tag="scaled_vx",
                                     debug=debug_multi,
                                     interval=scaled_interval)
            scaled_vy.set_attributes(tag="scaled_vy",
                                     debug=debug_multi,
                                     interval=scaled_interval)
            seed_interval = 1 / scaled_interval
            print("seed_interval=1/{}={}".format(scaled_interval,
                                                 seed_interval))
        else:
            scaled_vx = vx
            scaled_vy = vy
            seed_interval = 1 / scaled_vy.get_interval()

        # We need a first approximation to 1 / scaled_vy
        dummy_seed = ReciprocalSeed(EmptyOperand(precision=self.precision),
                                    precision=self.precision)

        if self.processor.is_supported_operation(dummy_seed, self.language):
            init_approx = ReciprocalSeed(scaled_vy,
                                         precision=self.precision,
                                         tag="init_approx",
                                         debug=debug_multi)

        else:
            # generate tabulated version of seed
            raise NotImplementedError

        current_approx_std = init_approx
        # correctly-rounded inverse computation
        num_iteration = self.num_iter

        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()

        # check if inputs are zeros
        x_zero = Test(vx,
                      specifier=Test.IsZero,
                      likely=False,
                      precision=ML_Bool)
        y_zero = Test(vy,
                      specifier=Test.IsZero,
                      likely=False,
                      precision=ML_Bool)

        comp_sign = Test(vx,
                         vy,
                         specifier=Test.CompSign,
                         tag="comp_sign",
                         debug=debug_multi)

        # check if divisor is NaN
        y_nan = Test(vy, specifier=Test.IsNaN, likely=False, precision=ML_Bool)

        # check if inputs are signaling NaNs
        x_snan = Test(vx,
                      specifier=Test.IsSignalingNaN,
                      likely=False,
                      precision=ML_Bool)
        y_snan = Test(vy,
                      specifier=Test.IsSignalingNaN,
                      likely=False,
                      precision=ML_Bool)

        # check if inputs are infinities
        x_inf = Test(vx,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="x_inf",
                     precision=ML_Bool)
        y_inf = Test(vy,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="y_inf",
                     debug=debug_multi,
                     precision=ML_Bool)

        scheme = None
        gappa_vx, gappa_vy = None, None

        # initial reciprocal approximation of 1.0 / scaled_vy
        inv_iteration_list, recp_approx = compute_reduced_reciprocal(
            init_approx, scaled_vy, self.num_iter)

        recp_approx.set_attributes(tag="recp_approx", debug=debug_multi)

        # approximation of scaled_vx / scaled_vy
        yerr_last, reduced_div_approx, div_iteration_list = compute_reduced_division(
            scaled_vx, scaled_vy, recp_approx)

        eval_error_range, div_eval_error_range = self.solve_eval_error(
            init_approx, recp_approx, reduced_div_approx, scaled_vx, scaled_vy,
            inv_iteration_list, div_iteration_list, S2**-7, seed_interval)
        eval_error = sup(abs(eval_error_range))
        recp_interval = 1 / scaled_vy.get_interval() + eval_error_range
        recp_approx.set_interval(recp_interval)

        div_interval = scaled_vx.get_interval() / scaled_vy.get_interval(
        ) + div_eval_error_range
        reduced_div_approx.set_interval(div_interval)
        reduced_div_approx.set_tag("reduced_div_approx")

        if out_of_bound_risk:
            unscaled_result = scaling_div_result(reduced_div_approx, ex,
                                                 scaling_factor_y,
                                                 self.precision)

            subnormal_result = subnormalize_result(recp_approx,
                                                   reduced_div_approx, ex, ey,
                                                   yerr_last, self.precision)
        else:
            unscaled_result = reduced_div_approx
            subnormal_result = reduced_div_approx

        x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False)
        y_inf_or_nan = Test(vy,
                            specifier=Test.IsInfOrNaN,
                            likely=False,
                            tag="y_inf_or_nan",
                            debug=debug_multi)

        # generate IEEE exception raising only of libm-compliant
        # mode is enabled
        enable_raise = self.libm_compliant

        # managing special cases
        # x inf and y inf
        pre_scheme = ConditionBlock(
            x_inf_or_nan,
            ConditionBlock(
                x_inf,
                ConditionBlock(
                    y_inf_or_nan,
                    Statement(
                        # signaling NaNs raise invalid operation flags
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                        if enable_raise else Statement(),
                        Return(FP_QNaN(self.precision)),
                    ),
                    ConditionBlock(comp_sign,
                                   Return(FP_MinusInfty(self.precision)),
                                   Return(FP_PlusInfty(self.precision)))),
                Statement(
                    ConditionBlock(x_snan, Raise(ML_FPE_Invalid))
                    if enable_raise else Statement(),
                    Return(FP_QNaN(self.precision)))),
            ConditionBlock(
                x_zero,
                ConditionBlock(
                    LogicalOr(y_zero, y_nan, precision=ML_Bool),
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                        if enable_raise else Statement(),
                        Return(FP_QNaN(self.precision))), Return(vx)),
                ConditionBlock(
                    y_inf_or_nan,
                    ConditionBlock(
                        y_inf,
                        Return(
                            Select(comp_sign, FP_MinusZero(self.precision),
                                   FP_PlusZero(self.precision))),
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                            if enable_raise else Statement(),
                            Return(FP_QNaN(self.precision)))),
                    ConditionBlock(
                        y_zero,
                        Statement(
                            Raise(ML_FPE_DivideByZero)
                            if enable_raise else Statement(),
                            ConditionBlock(
                                comp_sign,
                                Return(FP_MinusInfty(self.precision)),
                                Return(FP_PlusInfty(self.precision)))),
                        # managing numerical value result cases
                        Statement(
                            recp_approx,
                            reduced_div_approx,
                            ConditionBlock(
                                Test(unscaled_result,
                                     specifier=Test.IsSubnormal,
                                     likely=False),
                                # result is subnormal
                                Statement(
                                    # inexact flag should have been raised when computing yerr_last
                                    # ConditionBlock(
                                    #    Comparison(
                                    #        yerr_last, 0,
                                    #        specifier=Comparison.NotEqual, likely=True),
                                    #    Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow))
                                    #),
                                    Return(subnormal_result), ),
                                # result is normal
                                Statement(
                                    # inexact flag should have been raised when computing yerr_last
                                    #ConditionBlock(
                                    #    Comparison(
                                    #        yerr_last, 0,
                                    #        specifier=Comparison.NotEqual, likely=True),
                                    #    Raise(ML_FPE_Inexact)
                                    #),
                                    Return(unscaled_result))),
                        )))))
        # managing rounding mode save and restore
        # to ensure intermediary computations are performed in round-to-nearest
        # clearing exception before final computation

        #rnd_mode = GetRndMode()
        #scheme = Statement(
        #    rnd_mode,
        #    SetRndMode(ML_RoundToNearest),
        #    yerr_last,
        #    SetRndMode(rnd_mode),
        #    unscaled_result,
        #    ClearException(),
        #    pre_scheme
        #)

        scheme = pre_scheme

        return scheme
Ejemplo n.º 25
0
    def generate_expr(self,
                      code_object,
                      optree,
                      folded=True,
                      result_var=None,
                      initial=False,
                      language=None,
                      force_variable_storing=False):
        """ code generation function """
        language = self.language if language is None else language

        # search if <optree> has already been processed
        if self.has_memoization(optree):
            return self.get_memoization(optree)

        result = None
        # implementation generation
        if isinstance(optree, CodeVariable):
            result = optree

        elif isinstance(optree, Variable):
            if optree.get_var_type() is Variable.Local:
                final_var = code_object.get_free_var_name(
                    optree.get_precision(),
                    prefix=optree.get_tag(),
                    declare=True)
                result = CodeVariable(final_var, optree.get_precision())
            else:
                result = CodeVariable(optree.get_tag(), optree.get_precision())

        elif isinstance(optree, ML_NewTable):
            # Implementing LeafNode ML_NewTable generation support
            table = optree
            tag = table.get_tag()
            table_name = code_object.declare_table(
                table, prefix=tag if tag != None else "table")
            result = CodeVariable(table_name, table.get_precision())

        elif isinstance(optree, SwitchBlock):
            switch_value = optree.inputs[0]
            # generating pre_statement
            self.generate_expr(code_object,
                               optree.get_pre_statement(),
                               folded=folded,
                               language=language)

            switch_value_code = self.generate_expr(code_object,
                                                   switch_value,
                                                   folded=folded,
                                                   language=language)
            case_map = optree.get_case_map()

            code_object << "\nswitch(%s) {\n" % switch_value_code.get()
            for case in case_map:
                case_value = case
                case_statement = case_map[case]
                if isinstance(case_value, tuple):
                    for sub_case in case:
                        code_object << "case %s:\n" % sub_case
                else:
                    code_object << "case %s:\n" % case
                code_object.open_level()
                self.generate_expr(code_object,
                                   case_statement,
                                   folded=folded,
                                   language=language)
                code_object.close_level()
            code_object << "}\n"

            return None

        elif isinstance(optree, ReferenceAssign):
            output_var = optree.inputs[0]
            result_value = optree.inputs[1]

            output_var_code = self.generate_expr(code_object,
                                                 output_var,
                                                 folded=False,
                                                 language=language)

            if isinstance(result_value, Constant):
                # generate assignation
                result_value_code = self.generate_expr(code_object,
                                                       result_value,
                                                       folded=folded,
                                                       language=language)
                code_object << self.generate_assignation(
                    output_var_code.get(), result_value_code.get())
            else:
                result_value_code = self.generate_expr(code_object,
                                                       result_value,
                                                       folded=folded,
                                                       language=language)
                code_object << self.generate_assignation(
                    output_var_code.get(), result_value_code.get())
                if optree.get_debug() and not self.disable_debug:
                    code_object << self.generate_debug_msg(
                        result_value,
                        result_value_code,
                        code_object,
                        debug_object=optree.get_debug())

            #code_object << self.generate_assignation(output_var_code.get(), result_value_code.get())
            #code_object << output_var.get_precision().generate_c_assignation(output_var_code, result_value_code)

            return None

        elif isinstance(optree, Loop):
            init_statement = optree.inputs[0]
            exit_condition = optree.inputs[1]
            loop_body = optree.inputs[2]

            self.generate_expr(code_object,
                               init_statement,
                               folded=folded,
                               language=language)
            code_object << "\nfor (;%s;)" % self.generate_expr(
                code_object, exit_condition, folded=False,
                language=language).get()
            code_object.open_level()
            self.generate_expr(code_object,
                               loop_body,
                               folded=folded,
                               language=language)
            code_object.close_level()

            return None

        elif isinstance(optree, ConditionBlock):
            condition = optree.inputs[0]
            if_branch = optree.inputs[1]
            else_branch = optree.inputs[2] if len(optree.inputs) > 2 else None

            # generating pre_statement
            self.generate_expr(code_object,
                               optree.get_pre_statement(),
                               folded=folded,
                               language=language)

            cond_code = self.generate_expr(code_object,
                                           condition,
                                           folded=folded,
                                           language=language)
            if isinstance(condition, BooleanOperation):
                cond_likely = condition.get_likely()
            else:
                # TODO To be refined (for example Constant(True)
                #      should be associated with likely True
                cond_likely = None
                Log.report(
                    Log.Warning,
                    " The following condition has no (usable) likely attribute: {}",
                    condition,
                )
            if cond_likely in [True, False]:
                code_object << "\nif (__builtin_expect(%s, %d)) " % (
                    cond_code.get(), {
                        True: 1,
                        False: 0
                    }[condition.get_likely()])
            else:
                code_object << "\nif (%s) " % cond_code.get()
            self.open_memoization_level()
            code_object.open_level()
            #if_branch_code = self.processor.generate_expr(self, code_object, if_branch, if_branch.inputs, folded)
            if_branch_code = self.generate_expr(code_object,
                                                if_branch,
                                                folded=folded,
                                                language=language)
            code_object.close_level(cr="")
            self.close_memoization_level()
            if else_branch:
                code_object << " else "
                code_object.open_level()
                self.open_memoization_level()
                else_branch_code = self.generate_expr(code_object,
                                                      else_branch,
                                                      folded=folded,
                                                      language=language)
                code_object.close_level()
                self.close_memoization_level()
            else:
                code_object << "\n"

            return None

        elif isinstance(optree, Return):
            if len(optree.inputs) == 0:
                # void return
                code_object << "return;\n"

            else:
                return_result = optree.inputs[0]
                return_code = self.generate_expr(code_object,
                                                 return_result,
                                                 folded=folded,
                                                 language=language)
                code_object << "return %s;\n" % return_code.get()
                return None  #return_code

        elif isinstance(optree, ExceptionOperation):
            if optree.get_specifier() in [
                    ExceptionOperation.RaiseException,
                    ExceptionOperation.ClearException,
                    ExceptionOperation.RaiseReturn
            ]:
                result_code = self.processor.generate_expr(
                    self,
                    code_object,
                    optree,
                    optree.inputs,
                    folded=False,
                    result_var=result_var,
                    language=language)
                code_object << "%s;\n" % result_code.get()
                if optree.get_specifier() == ExceptionOperation.RaiseReturn:
                    if self.libm_compliant:
                        # libm compliant exception management
                        code_object.add_header(
                            "support_lib/ml_libm_compatibility.h")
                        return_value = self.generate_expr(
                            code_object,
                            optree.get_return_value(),
                            folded=folded,
                            language=language)
                        arg_value = self.generate_expr(code_object,
                                                       optree.get_arg_value(),
                                                       folded=folded,
                                                       language=language)
                        function_name = optree.function_name
                        exception_list = [
                            op.get_value() for op in optree.inputs
                        ]
                        if ML_FPE_Inexact in exception_list:
                            exception_list.remove(ML_FPE_Inexact)

                        if len(exception_list) > 1:
                            raise NotImplementedError
                        if ML_FPE_Overflow in exception_list:
                            code_object << "return ml_raise_libm_overflowf(%s, %s, \"%s\");\n" % (
                                return_value.get(), arg_value.get(),
                                function_name)
                        elif ML_FPE_Underflow in exception_list:
                            code_object << "return ml_raise_libm_underflowf(%s, %s, \"%s\");\n" % (
                                return_value.get(), arg_value.get(),
                                function_name)
                        elif ML_FPE_Invalid in exception_list:
                            code_object << "return %s;\n" % return_value.get()
                    else:
                        return_precision = optree.get_return_value(
                        ).get_precision()
                        self.generate_expr(code_object,
                                           Return(optree.get_return_value(),
                                                  precision=return_precision),
                                           folded=folded,
                                           language=language)
                return None
            else:
                result = self.processor.generate_expr(self,
                                                      code_object,
                                                      optree,
                                                      optree.inputs,
                                                      folded=folded,
                                                      result_var=result_var,
                                                      language=language)

        elif isinstance(optree, NoResultOperation):
            result_code = self.processor.generate_expr(self,
                                                       code_object,
                                                       optree,
                                                       optree.inputs,
                                                       folded=False,
                                                       result_var=result_var,
                                                       language=language)
            code_object << "%s;\n" % result_code.get()
            return None

        elif isinstance(optree, PlaceHolder):
            head = optree.get_input(0)
            for tail_node in optree.inputs[1:]:
                if not self.has_memoization(tail_node):
                    self.generate_expr(code_object,
                                       tail_node,
                                       folded=folded,
                                       initial=True,
                                       language=language)

            # generate PlaceHolder's main_value
            head_code = self.generate_expr(code_object,
                                           head,
                                           folded=folded,
                                           initial=initial,
                                           language=language)
            return head_code

        elif isinstance(optree, Statement):
            for op in optree.inputs:
                if not self.has_memoization(op):
                    self.generate_expr(code_object,
                                       op,
                                       folded=folded,
                                       initial=True,
                                       language=language)

            return None
        elif isinstance(optree, Constant):
            generate_pre_process = self.generate_clear_exception if optree.get_clearprevious(
            ) else None
            result = self.processor.generate_expr(
                self,
                code_object,
                optree, [],
                generate_pre_process=generate_pre_process,
                folded=folded,
                result_var=result_var,
                language=language)

        else:
            generate_pre_process = self.generate_clear_exception if optree.get_clearprevious(
            ) else None
            result = self.processor.generate_expr(
                self,
                code_object,
                optree,
                optree.inputs,
                generate_pre_process=generate_pre_process,
                folded=folded,
                result_var=result_var,
                language=language)

        # registering result into memoization table
        self.add_memoization(optree, result)

        # debug management
        if optree.get_debug() and not self.disable_debug:
            code_object << self.generate_debug_msg(optree, result, code_object)

        if initial and not isinstance(result,
                                      CodeVariable) and not result is None:
            final_var = result_var if result_var else code_object.get_free_var_name(
                optree.get_precision(), prefix="result", declare=True)
            code_object << self.generate_assignation(final_var, result.get())
            return CodeVariable(final_var, optree.get_precision())

        return result
Ejemplo n.º 26
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
Ejemplo n.º 27
0
    def generate_scheme(self):
        # declaring function input variable
        v_x = [
            self.implementation.add_input_variable(
                "x%d" % index, self.get_input_precision(index))
            for index in range(self.arity)
        ]

        double_format = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble
        }[self.precision]

        # testing Add211
        exact_add = Addition(v_x[0],
                             v_x[1],
                             precision=double_format,
                             tag="exact_add")
        # testing Mul211
        exact_mul = Multiplication(v_x[0],
                                   v_x[1],
                                   precision=double_format,
                                   tag="exact_mul")
        # testing Sub211
        exact_sub = Subtraction(v_x[1],
                                v_x[0],
                                precision=double_format,
                                tag="exact_sub")
        # testing Add222
        multi_add = Addition(exact_add,
                             exact_sub,
                             precision=double_format,
                             tag="multi_add")
        # testing Mul222
        multi_mul = Multiplication(multi_add,
                                   exact_mul,
                                   precision=double_format,
                                   tag="multi_mul")
        # testing Add221 and Add212 and Sub222
        multi_sub = Subtraction(Addition(exact_sub,
                                         v_x[1],
                                         precision=double_format,
                                         tag="add221"),
                                Addition(v_x[0],
                                         multi_mul,
                                         precision=double_format,
                                         tag="add212"),
                                precision=double_format,
                                tag="sub222")
        # testing Mul212 and Mul221
        mul212 = Multiplication(multi_sub,
                                v_x[0],
                                precision=double_format,
                                tag="mul212")
        mul221 = Multiplication(exact_mul,
                                v_x[1],
                                precision=double_format,
                                tag="mul221")
        # testing Sub221 and Sub212
        sub221 = Subtraction(mul212,
                             mul221.hi,
                             precision=double_format,
                             tag="sub221")
        sub212 = Subtraction(sub221,
                             mul212.lo,
                             precision=double_format,
                             tag="sub212")
        # testing FMA2111
        fma2111 = FMA(sub221.lo,
                      sub212.hi,
                      mul221.hi,
                      precision=double_format,
                      tag="fma2111")
        # testing FMA2112
        fma2112 = FMA(fma2111.lo,
                      fma2111.hi,
                      fma2111,
                      precision=double_format,
                      tag="fma2112")
        # testing FMA2212
        fma2212 = FMA(fma2112,
                      fma2112.hi,
                      fma2112,
                      precision=double_format,
                      tag="fma2212")
        # testing FMA2122
        fma2122 = FMA(fma2212.lo,
                      fma2212,
                      fma2212,
                      precision=double_format,
                      tag="fma2122")
        # testing FMA22222
        fma2222 = FMA(fma2122,
                      fma2212,
                      fma2111,
                      precision=double_format,
                      tag="fma2222")
        # testing Add122
        add122 = Addition(fma2222,
                          fma2222,
                          precision=self.precision,
                          tag="add122")
        # testing Add112
        add112 = Addition(add122,
                          fma2222,
                          precision=self.precision,
                          tag="add112")
        # testing Add121
        add121 = Addition(fma2222,
                          add112,
                          precision=self.precision,
                          tag="add121")
        # testing subnormalization
        multi_subnormalize = SpecificOperation(
            Addition(add121, add112, precision=double_format),
            Constant(3, precision=self.precision.get_integer_format()),
            specifier=SpecificOperation.Subnormalize,
            precision=double_format,
            tag="multi_subnormalize")
        result = Conversion(multi_subnormalize, precision=self.precision)

        scheme = Statement(Return(result))

        return scheme
Ejemplo n.º 28
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_intervals[0]))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.get_output_precision()
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(sollya.x), [0, 2],
            [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.get_output_precision())

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
Ejemplo n.º 29
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Ejemplo n.º 30
0
    def generate_scalar_scheme(self, vx):
        """ Generating implementation script for hyperic tangent
            meta-function """
        # tanh(x) = sinh(x) / cosh(x)
        #         = (e^x - e^-x) / (e^x + e^-x)
        #         = (e^(2x) - 1) / (e^(2x) + 1)
        #   when x -> +inf, tanh(x) -> 1
        #   when x -> -inf, tanh(x) -> -1
        #   ~0 e^x    ~ 1 + x - x^2 / 2 + x^3 / 6 + ...
        #      e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ...
        #   when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x
        # We can divide the input interval into 3 parts
        # positive, around 0, and finally negative

        # Possible argument reduction
        # x = m.2^E = k * log(2) + r
        # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1)
        #                     = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k)
        #
        # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1)
        #         = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1)
        #         = 1 - 2 / (e^(2x) + 1)

        # tanh is odd so we reduce the computation to the absolute value of
        # vx
        abs_vx = Abs(vx, precision=self.precision)

        # if p is the expected output precision
        # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps
        #   where eps < 1/2 * 2^-p
        p = self.precision.get_mantissa_size()
        high_bound = (p + 2) * sollya.log(2) / 2
        near_zero_bound = 0.125
        interval_num = 1024
        Log.report(Log.Verbose,
                   "high_bound={}, near_zero_bound={}, interval_num={}",
                   float(high_bound), near_zero_bound, interval_num)

        interval_size = (high_bound - near_zero_bound) / (1024)
        new_interval_size = S2**int(sollya.log2(interval_size))
        interval_num *= 2
        high_bound = new_interval_size * interval_num + near_zero_bound
        Log.report(Log.Verbose,
                   "high_bound={}, near_zero_bound={}, interval_num={}",
                   float(high_bound), near_zero_bound, interval_num)

        ERROR_THRESHOLD = S2**-p
        Log.report(Log.Info, "ERROR_THRESHOLD={}", ERROR_THRESHOLD)

        # Near 0 approximation
        near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero(
            sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx)

        # approximation parameters
        poly_degree = 7
        approx_interval = Interval(near_zero_bound, high_bound)

        sollya.settings.points = 117

        approx_scheme, approx_error = piecewise_approximation(
            sollya.tanh,
            abs_vx,
            self.precision,
            bound_low=near_zero_bound,
            bound_high=high_bound,
            num_intervals=interval_num,
            max_degree=poly_degree,
            error_threshold=ERROR_THRESHOLD)
        Log.report(Log.Warning, "approx_error={}".format(approx_error))

        comp_near_zero_bound = abs_vx < near_zero_bound
        comp_near_zero_bound.set_attributes(tag="comp_near_zero_bound",
                                            debug=debug_multi)
        comp_high_bound = abs_vx < high_bound
        comp_high_bound.set_attributes(tag="comp_high_bound",
                                       debug=debug_multi)

        complete_scheme = Select(
            comp_near_zero_bound, near_zero_scheme,
            Select(comp_high_bound, approx_scheme,
                   Constant(1.0, precision=self.precision)))

        scheme = Return(Select(vx < 0, Negation(complete_scheme),
                               complete_scheme),
                        precision=self.precision)
        return scheme