Beispiel #1
0
        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real z in [{},{}];".format(x_low, x_high),
                "Definitions", "  retval rnd64= {};".format(poly_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
Beispiel #2
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN)

        if not self.skip_reduction:
            abs_x = Abs(x, tag="abs_x")

            n_one = self.precision.round_sollya_object(1, sollya.RN)
            one = Constant(n_one, precision=self.precision, tag="one")

            sign_x = CopySign(one, x, tag="sign_x")

            n_invpi = self.precision.round_sollya_object(
                1 / sollya.pi, sollya.RN)
            invpi = Constant(n_invpi, tag="invpi")

            unround_k = Multiplication(abs_x, invpi, tag="unround_k")

            k = Floor(unround_k, precision=self.precision, tag="k")

            pi = Constant(n_pi, tag="pi")

            whole = Multiplication(k, pi, tag="whole")

            r = Subtraction(abs_x, whole, tag="r")

            ik = Conversion(k,
                            precision=ML_Binary32.get_integer_format(),
                            tag="ik")

            part = Modulo(ik,
                          2,
                          precision=ML_Binary32.get_integer_format(),
                          tag="part")

            z = r

        else:
            z = x

        approx_interval = sollya.Interval(-2**-7, n_pi + 2**-7)
        approx_func = sollya.sin(sollya.x)
        builder = Polynomial.build_from_approximation
        poly_object = builder(approx_func, range(1, self.poly_degree + 1, 2),
                              [self.precision] * (self.poly_degree + 1),
                              approx_interval, sollya.relative)

        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, z)
        self.poly = poly

        if not self.skip_reduction:
            post_bool = Equal(part, 1, tag="part_eq_1")

            flipped_poly = Select(post_bool, -poly, poly)

            retval = flipped_poly * sign_x

        else:
            retval = poly

        scheme = Return(retval, precision=self.precision)

        return scheme
Beispiel #3
0
        def generate_reduction_fptaylor(x):
            # get sign and abs_x, must be the same at endpoints
            if sollya.sup(x) <= 0:
                sign_x_expr = "-1.0"
                abs_x_expr = "-x"
                abs_x = -x
            elif sollya.inf(x) >= 0:
                sign_x_expr = "1.0"
                abs_x_expr = "x"
                abs_x = x
            else:
                assert False, "Interval must not straddle 0"

            # get k, must be the same at endpoints
            unround_k = abs_x * n_invpi
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if k_low != k_high:
                assert False, "Interval must not straddle multples of pi"
            k = int(k_low)
            part = k % 2

            r_expr = "abs_x - whole"
            r = abs_x - k * n_pi

            z_expr = "r"
            z = r

            if part == 1:
                flipped_poly_expr = "-poly"
            else:
                flipped_poly_expr = "poly"

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  abs_x rnd64= {};".format(abs_x_expr),
                "  whole rnd64= {} * {};".format(k, n_pi),
                "  r rnd64= abs_x - whole;", "  z rnd64= {};".format(z_expr),
                "  poly rnd64= {};".format(poly_expr),
                "  flipped_poly rnd64= {};".format(flipped_poly_expr),
                "  retval rnd64= flipped_poly*{};".format(sign_x_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
Beispiel #4
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="sinf.c",
                 function_name="sinf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32)))

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        inv_pi_value = 1 / pi

        # argument reduction
        mod_pi_x = NearestInteger(vx * inv_pi_value)
        red_vx = vx - mod_pi_x * pi

        approx_interval = Interval(0, pi / 2)

        poly_degree = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            sin(sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object  #.sub_poly(start_index = 1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        pre_result = vx * _poly

        result = pre_result
        result.set_attributes(tag="result", debug=debug_lftolx)

        # main scheme
        print "MDL scheme"
        scheme = Statement(Return(result))

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Beispiel #5
0
 def numeric_emulate(self, input_value):
     if self.sin_output:
         return sin(input_value)
     else:
         return cos(input_value)
Beispiel #6
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def SincosRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 8
        cw_hi_precision = self.precision.get_field_size() - 4

        ext_precision = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_Binary64
        }[self.precision]

        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        if self.precision is ML_Binary32:
            ph_bound = S2**10
        else:
            ph_bound = S2**33

        test_ph_bound = Comparison(vx,
                                   ph_bound,
                                   specifier=Comparison.GreaterOrEqual,
                                   precision=ML_Bool,
                                   likely=False)

        # argument reduction
        # m
        frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision]

        C0 = Constant(0, precision=int_precision)
        C1 = Constant(1, precision=int_precision)
        C_offset = Constant(3 * S2**(frac_pi_index - 1),
                            precision=int_precision)

        # 2^m / pi
        frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN)
        frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision,
                           sollya.RN)
        # pi / 2^m, high part
        inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN)
        # pi / 2^m, low part
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)

        # computing k
        vx.set_attributes(tag="vx", debug=debug_multi)

        vx_pi = Addition(Multiplication(vx,
                                        Constant(frac_pi,
                                                 precision=self.precision),
                                        precision=self.precision),
                         Multiplication(vx,
                                        Constant(frac_pi_lo,
                                                 precision=self.precision),
                                        precision=self.precision),
                         precision=self.precision,
                         tag="vx_pi",
                         debug=debug_multi)

        k = NearestInteger(vx_pi,
                           precision=int_precision,
                           tag="k",
                           debug=debug_multi)
        # k in floating-point precision
        fk = Conversion(k,
                        precision=self.precision,
                        tag="fk",
                        debug=debug_multi)

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision,
                                   debug=debug_multi)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision,
                                      debug=debug_multi)

        # Cody-Waite reduction
        red_coeff1 = Multiplication(fk,
                                    inv_frac_pi_cst,
                                    precision=self.precision,
                                    exact=True)
        red_coeff2 = Multiplication(Negation(fk, precision=self.precision),
                                    inv_frac_pi_lo_cst,
                                    precision=self.precision,
                                    exact=True)

        # Should be exact / Sterbenz' Lemma
        pre_sub_mul = Subtraction(vx,
                                  red_coeff1,
                                  precision=self.precision,
                                  exact=True)

        # Fast2Sum
        s = Addition(pre_sub_mul,
                     red_coeff2,
                     precision=self.precision,
                     unbreakable=True,
                     tag="s",
                     debug=debug_multi)
        z = Subtraction(s,
                        pre_sub_mul,
                        precision=self.precision,
                        unbreakable=True,
                        tag="z",
                        debug=debug_multi)
        t = Subtraction(red_coeff2,
                        z,
                        precision=self.precision,
                        unbreakable=True,
                        tag="t",
                        debug=debug_multi)

        red_vx_std = Addition(s, t, precision=self.precision)
        red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi)

        # To compute sine we offset x by 3pi/2
        # which means add 3  * S2^(frac_pi_index-1) to k
        if self.sin_output:
            Log.report(Log.Info, "Computing Sin")
            offset_k = Addition(k,
                                C_offset,
                                precision=int_precision,
                                tag="offset_k")
        else:
            Log.report(Log.Info, "Computing Cos")
            offset_k = k

        modk = Variable("modk",
                        precision=int_precision,
                        var_type=Variable.Local)
        red_vx = Variable("red_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        # Faster modulo using bitwise logic
        modk_std = BitLogicAnd(offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="modk",
                               debug=debug_multi)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        red_vx.set_interval(approx_interval)

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        Log.report(Log.Info,
                   "building tabulated approximation for sin and cos")

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        table_index_size = frac_pi_index + 1
        cos_table = ML_NewTable(dimensions=[2**table_index_size, 1],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("cos_table"))

        for i in range(2**(frac_pi_index + 1)):
            local_x = i * pi / S2**frac_pi_index
            cos_local = round(cos(local_x), self.precision.get_sollya_object(),
                              sollya.RN)
            cos_table[i][0] = cos_local

        sin_index = Modulo(modk + 2**(frac_pi_index - 1),
                           2**(frac_pi_index + 1),
                           precision=int_precision,
                           tag="sin_index")  #, debug = debug_multi)
        tabulated_cos = TableLoad(cos_table,
                                  modk,
                                  C0,
                                  precision=self.precision,
                                  tag="tab_cos",
                                  debug=debug_multi)
        tabulated_sin = -TableLoad(cos_table,
                                   sin_index,
                                   C0,
                                   precision=self.precision,
                                   tag="tab_sin",
                                   debug=debug_multi)

        poly_degree_cos = sup(
            guessdegree(cos(sollya.x), approx_interval, S2**
                        -self.precision.get_precision()) + 2)
        poly_degree_sin = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -self.precision.get_precision()) + 2)

        poly_degree_cos_list = range(0, int(poly_degree_cos) + 3)
        poly_degree_sin_list = range(0, int(poly_degree_sin) + 3)

        # cosine polynomial: limiting first and second coefficient precision to 1-bit
        poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list)
        # sine polynomial: limiting first coefficient precision to 1-bit
        poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
        Log.report(Log.Info,
                   "building mathematical polynomials for sin and cos")
        # Polynomial approximations
        Log.report(Log.Info, "cos")
        poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error(
            cos(sollya.x),
            poly_degree_cos_list,
            poly_cos_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(Log.Info, "sin")
        poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error(
            sin(sollya.x),
            poly_degree_sin_list,
            poly_sin_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        Log.report(
            Log.Info, "poly error cos: {} / {:d}".format(
                poly_error_cos, int(sollya.log2(poly_error_cos))))
        Log.report(
            Log.Info, "poly error sin: {0} / {1:d}".format(
                poly_error_sin, int(sollya.log2(poly_error_sin))))
        Log.report(Log.Info, "poly cos : %s" % poly_object_cos)
        Log.report(Log.Info, "poly sin : %s" % poly_object_sin)

        # Polynomial evaluation scheme
        poly_cos = polynomial_scheme_builder(
            poly_object_cos.sub_poly(start_index=1),
            red_vx,
            unified_precision=self.precision)
        poly_sin = polynomial_scheme_builder(
            poly_object_sin.sub_poly(start_index=2),
            red_vx,
            unified_precision=self.precision)
        poly_cos.set_attributes(tag="poly_cos", debug=debug_multi)
        poly_sin.set_attributes(tag="poly_sin",
                                debug=debug_multi,
                                unbreakable=True)

        # TwoProductFMA
        mul_cos_x = tabulated_cos * poly_cos
        mul_cos_y = FusedMultiplyAdd(tabulated_cos,
                                     poly_cos,
                                     -mul_cos_x,
                                     precision=self.precision)

        mul_sin_x = tabulated_sin * poly_sin
        mul_sin_y = FusedMultiplyAdd(tabulated_sin,
                                     poly_sin,
                                     -mul_sin_x,
                                     precision=self.precision)

        mul_coeff_sin_hi = tabulated_sin * red_vx
        mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx,
                                            -mul_coeff_sin_hi)

        mul_cos = Addition(mul_cos_x,
                           mul_cos_y,
                           precision=self.precision,
                           tag="mul_cos")  #, debug = debug_multi)
        mul_sin = Negation(Addition(mul_sin_x,
                                    mul_sin_y,
                                    precision=self.precision),
                           precision=self.precision,
                           tag="mul_sin")  #, debug = debug_multi)
        mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi,
                                          mul_coeff_sin_lo,
                                          precision=self.precision),
                                 precision=self.precision,
                                 tag="mul_coeff_sin")  #, debug = debug_multi)

        mul_cos_x.set_attributes(
            tag="mul_cos_x", precision=self.precision)  #, debug = debug_multi)
        mul_cos_y.set_attributes(
            tag="mul_cos_y", precision=self.precision)  #, debug = debug_multi)
        mul_sin_x.set_attributes(
            tag="mul_sin_x", precision=self.precision)  #, debug = debug_multi)
        mul_sin_y.set_attributes(
            tag="mul_sin_y", precision=self.precision)  #, debug = debug_multi)

        cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos)

        cos_eval_d_1.set_attributes(tag="cos_eval_d_1",
                                    precision=self.precision,
                                    debug=debug_multi)

        result_1 = Statement(Return(cos_eval_d_1))

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################
        # payne and hanek argument reduction for large arguments
        ph_k = frac_pi_index
        ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN)
        ph_inv_frac_pi = pi / S2**ph_k

        ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx,
                                                                ph_frac_pi,
                                                                self.precision,
                                                                n=100,
                                                                k=ph_k)

        # assigning Large Argument Reduction reduced variable
        lar_vx = Variable("lar_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        lar_red_vx = Addition(Multiplication(lar_vx,
                                             inv_frac_pi,
                                             precision=self.precision),
                              Multiplication(lar_vx,
                                             inv_frac_pi_lo,
                                             precision=self.precision),
                              precision=self.precision,
                              tag="lar_red_vx",
                              debug=debug_multi)

        C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32")
        ph_acc_int_red = Select(ph_acc_int < C0,
                                C32 + ph_acc_int,
                                ph_acc_int,
                                precision=int_precision,
                                tag="ph_acc_int_red")
        if self.sin_output:
            lar_offset_k = Addition(ph_acc_int_red,
                                    C_offset,
                                    precision=int_precision,
                                    tag="lar_offset_k")
        else:
            lar_offset_k = ph_acc_int_red

        ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi)
        lar_modk = BitLogicAnd(lar_offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="lar_modk",
                               debug=debug_multi)

        lar_statement = Statement(ph_statement,
                                  ReferenceAssign(lar_vx,
                                                  ph_acc,
                                                  debug=debug_multi),
                                  ReferenceAssign(red_vx,
                                                  lar_red_vx,
                                                  debug=debug_multi),
                                  ReferenceAssign(modk, lar_modk),
                                  prevent_optimization=True)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               tag="NaN_or_Inf",
                               debug=debug_multi)
        return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision)))

        scheme = ConditionBlock(
            test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf),
            Statement(
                modk, red_vx,
                ConditionBlock(
                    test_ph_bound, lar_statement,
                    Statement(
                        ReferenceAssign(modk, modk_std),
                        ReferenceAssign(red_vx, red_vx_std),
                    )), result_1))

        return scheme
Beispiel #7
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme