Exemple #1
0
    def __call__(args):
        PRECISION = ML_Binary64
        value_list = [
            FP_PlusInfty(PRECISION),
            FP_MinusInfty(PRECISION),
            FP_PlusZero(PRECISION),
            FP_MinusZero(PRECISION),
            FP_QNaN(PRECISION),
            FP_SNaN(PRECISION),
            #FP_PlusOmega(PRECISION),
            #FP_MinusOmega(PRECISION),
            NumericValue(7.0),
            NumericValue(-3.0),
        ]
        op_map = {
            "+": operator.__add__,
            "-": operator.__sub__,
            "*": operator.__mul__,
        }
        for op in op_map:
            for lhs in value_list:
                for rhs in value_list:
                    print("{} {} {} = {}".format(lhs, op, rhs,
                                                 op_map[op](lhs, rhs)))

        return True
Exemple #2
0
 def __call__(args):
     for PRECISION in [ML_Binary32, ML_Binary64]:
         TEST_CASE = [
             (FP_PlusOmega(PRECISION), "<", FP_SNaN(PRECISION), Unordered),
             (FP_PlusOmega(PRECISION), "<=", FP_SNaN(PRECISION), Unordered),
             (FP_PlusInfty(PRECISION), "==", FP_PlusInfty(PRECISION), True),
             (FP_PlusInfty(PRECISION), "==", FP_QNaN(PRECISION), False),
             (FP_PlusZero(PRECISION), "==", FP_MinusZero(PRECISION), True),
             (FP_PlusZero(PRECISION), "!=", FP_MinusZero(PRECISION), False),
             (FP_QNaN(PRECISION), "==", FP_QNaN(PRECISION), False),
             (FP_PlusInfty(PRECISION), "==", -FP_MinusInfty(PRECISION), True),
             (FP_MinusInfty(PRECISION), ">", 0, False),
             (FP_MinusInfty(PRECISION), ">", FP_PlusZero(PRECISION), False),
             (FP_MinusInfty(PRECISION), ">", FP_MinusZero(PRECISION), False),
         ]
         for lhs, op, rhs, expected in TEST_CASE:
             result = op_map[op](lhs, rhs)
             assert result == expected, "failure: {} {} {} = {} vs expected {} ".format(lhs, op, rhsresult, expected)
     return True
Exemple #3
0
 def numeric_emulate(self, vx, n):
     """ Numeric emulation of n-th root """
     if FP_SpecialValue.is_special_value(vx):
         if is_nan(vx):
             return FP_QNaN(self.precision)
         elif is_plus_infty(vx):
             return SOLLYA_INFTY
         elif is_minus_infty(vx):
             if int(n) % 2 == 1:
                 return vx
             else:
                 return FP_QNaN(self.precision)
         elif is_zero(vx):
             if int(n) % 2 != 0 and n < 0:
                 if is_plus_zero(vx):
                     return FP_PlusInfty(self.precision)
                 else:
                     return FP_MinusInfty(self.precision)
             elif int(n) % 2 == 0:
                 if n < 0:
                     return FP_PlusInfty(self.precision)
                 elif n > 0:
                     return FP_PlusZero(self.precision)
             return FP_QNaN(self.precision)
         else:
             raise NotImplementedError
     # OpenCL-C rootn, x < 0 and y odd: -exp2(log2(-x) / y)
     S2 = sollya.SollyaObject(2)
     if vx < 0:
         if int(n) % 2 != 0:
             if n > 0:
                 v = -bigfloat.root(
                     sollya.SollyaObject(-vx).bigfloat(), int(n))
             else:
                 v = -S2**(sollya.log2(-vx) / n)
         else:
             return FP_QNaN(self.precision)
     elif n < 0:
         # OpenCL-C definition
         v = S2**(sollya.log2(vx) / n)
     else:
         v = bigfloat.root(sollya.SollyaObject(vx).bigfloat(), int(n))
     return sollya.SollyaObject(v)
Exemple #4
0
    def generate_scheme(self):
        # declaring input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        vx2 = vx * vx

        scheme = ConditionBlock(
            vx > 0, Return(vx - 0.33 * vx2 * vx + (2 / 15.0) * vx * vx2 * vx2),
            Return(FP_QNaN(self.precision)))

        return scheme
Exemple #5
0
 def standard_test_cases(self):
     return [
         (sollya.parse("0xbf50bc3a"),),
         (sollya.parse("0x1.0p-126"),),
         (sollya.parse("0x1.0p-127"),),
         (sollya.parse("-0x1.fffffep126"),),
         (sollya.parse("-infty"),),
         (sollya.parse("infty"),),
         (FP_QNaN(self.precision),),
         # issue in generic newlib implementation
         (sollya.parse("0x1.62e302p+6"),),
     ]
Exemple #6
0
    def numeric_emulate(self, vx, vy):
        """ Numeric emulation of exponential """
        if self.mode is QUOTIENT_MODE:
            if is_nan(vx) or is_nan(vy) or is_zero(vy) or is_infty(vx):
                # invalid value specified by OpenCL-C
                return 0
            if is_infty(vy) or is_zero(vx):
                # valid value
                return 0
        else:
            if is_nan(vx) or is_nan(vy) or is_zero(vy):
                return FP_QNaN(self.precision)
            elif is_zero(vx):
                return vx
            elif is_infty(vx):
                return FP_QNaN(self.precision)
            elif is_infty(vy):
                return vx
        # factorizing canonical cases (including correctionà
        # between quotient_mode and remainder mode
        pre_mod = sollya.euclidian_mod(vx, vy)
        pre_quo = int(sollya.euclidian_div(vx, vy))
        if abs(pre_mod) > abs(vy * 0.5):
            if (pre_mod < 0 and vy < 0) or (pre_mod > 0 and vy > 0):
                # same sign
                pre_mod -= vy
                pre_quo += 1
            else:
                # opposite sign
                pre_mod += vy
                pre_quo -= 1
        if self.mode is QUOTIENT_MODE:
            quo_mod = abs(pre_quo) % 2**self.quotient_size
            if vx / vy < 0:
                return -quo_mod
            else:
                return quo_mod

        else:
            return pre_mod
Exemple #7
0
    def numeric_emulate(self, io_map):
        vx = io_map["x"]
        rnd_mode_i = io_map["rnd_mode"]

        def div_numeric_emulate(vx):
            sollya_format = self.precision.get_sollya_object()
            return sollya.round(1.0 / vx, sollya_format, rnd_mode)

        rnd_mode = {
            0: sollya.RN,
            1: sollya.RU,
            2: sollya.RD,
            3: sollya.RZ
        }[rnd_mode_i]
        value_mapping = {
            is_plus_infty:
            lambda _: 0.0,
            is_nan:
            lambda _: FP_QNaN(self.precision),
            is_minus_infty:
            lambda _: FP_QNaN(self.precision),
            is_plus_zero:
            lambda _: FP_PlusInfty(self.precision),
            is_minus_zero:
            lambda _: FP_MinusInfty(self.precision),
            is_sv_omega:
            lambda op: lambda _: div_numeric_emulate(op.get_value()),
            lambda op: not (FP_SpecialValue.is_special_value(op)):
            div_numeric_emulate,
        }
        result = {}
        for predicate in value_mapping:
            if predicate(vx):
                result["vr_out"] = value_mapping[predicate](vx)
                return result
        Log.report(Log.Error,
                   "no predicate fits {} in numeric_emulate\n".format(vx))
Exemple #8
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
Exemple #9
0
    def numeric_emulate(self, vx, vy):
        """ Numeric emulation of pow """

        if is_snan(vx) or is_snan(vy):
            return FP_QNaN(self.precision)
        # pow(x, ±0) is 1 if x is not a signaling NaN
        if is_zero(vy):
            return 1.0
        # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
        if is_plus_zero(vx) and not is_special_value(vy) and (int(vy) == vy) and (int(vy) % 2 == 1) and vy < 0:
            return FP_PlusInfty(self.precision)
        if is_minus_zero(vx) and not is_special_value(vy) and (int(vy) == vy) and (int(vy) % 2 == 1) and vy < 0:
            return FP_MinusInfty(self.precision)
        # pow(±0, −∞) is +∞ with no exception
        if is_zero(vx) and is_minus_zero(vy):
            return FP_MinusInfty(self.precision)
        # pow(±0, +∞) is +0 with no exception
        if is_zero(vx) and is_plus_zero(vy):
            return FP_PlusZero(self.precision)
        # pow(±0, y) is ±0 for finite y>0 an odd integer
        if is_zero(vx) and not is_special_value(vy) and (int(vy) == vy) and (int(vy) % 2 == 1) and vy > 0:
            return vx
        # pow(−1, ±∞) is 1 with no exception
        if vx == -1.0 and is_infty(vy):
            return 1
        # pow(+1, y) is 1 for any y (even a quiet NaN)
        if vx == 1.0:
            return 1.0
        # pow(x, +∞) is +0 for −1<x<1
        if -1 < vx < 1 and is_plus_infty(vy):
            return FP_PlusZero(self.precision)
        # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
        if (vx < -1 or vx > 1) and is_plus_infty(vy):
            return FP_PlusInfty(self.precision)
        # pow(x, −∞) is +∞ for −1<x<1
        if -1 < vx < 1 and is_minus_infty(vy):
            return FP_PlusInfty(self.precision)
        # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
        if is_minus_infty(vy) and (vx < -1 or vx > 1):
            return FP_PlusZero(self.precision)
        # pow(+∞, y) is +0 for a number y < 0
        if is_plus_infty(vx) and vy < 0:
            return FP_PlusZero(self.precision)
        # pow(+∞, y) is +∞ for a number y > 0
        if is_plus_infty(vx) and vy > 0:
            return FP_PlusInfty(self.precision)
        # pow(−∞, y) is −0 for finite y < 0 an odd integer
        if is_minus_infty(vx) and vy < 0 and not is_special_value(vy) and int(vy) == vy and int(vy) % 2 == 1:
            return FP_MinusZero(self.precision)
        # pow(−∞, y) is −∞ for finite y > 0 an odd integer
        if is_minus_infty(vx) and vy > 0 and not is_special_value(vy) and  int(vy) == vy and int(vy) % 2 == 1:
            return FP_MinusInfty(self.precision)
        # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
        if is_minus_infty(vx) and vy < 0 and not is_special_value(vy) and not(int(vy) == vy and int(vy) % 2 == 1):
            return FP_PlusZero(self.precision)
        # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
        if is_minus_infty(vx) and vy > 0 and not is_special_value(vy) and not(int(vy) == vy and int(vy) % 2 == 1):
            return FP_PlusInfty(self.precision)
        # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
        if is_zero(vx) and vy < 0 and not is_special_value(vy) and not(int(vy) == vy and int(vy) % 2 == 1):
            return FP_PlusInfty(self.precision)
        # pow(±0, y) is +0 for finite y>0 and not an odd integer
        if is_zero(vx) and vy > 0 and not is_special_value(vy) and not(int(vy) == vy and int(vy) % 2 == 1):
            return FP_PlusZero(self.precision)
        # TODO
        # pow(x, y) signals the invalid operation exception for finite x<0 and finite non-integer y.
        return sollya.SollyaObject(vx)**sollya.SollyaObject(vy)
Exemple #10
0
    def generate_scheme(self):
        # We wish to compute vx / vy
        vx = self.implementation.add_input_variable(
            "x", self.precision, interval=self.input_intervals[0])
        vy = self.implementation.add_input_variable(
            "y", self.precision, interval=self.input_intervals[1])

        # maximum exponent magnitude (to avoid overflow/ underflow during
        # intermediary computations
        int_prec = self.precision.get_integer_format()
        max_exp_mag = Constant(self.precision.get_emax() - 1,
                               precision=int_prec)

        exact_ex = ExponentExtraction(vx,
                                      tag="exact_ex",
                                      precision=int_prec,
                                      debug=debug_multi)
        exact_ey = ExponentExtraction(vy,
                                      tag="exact_ey",
                                      precision=int_prec,
                                      debug=debug_multi)

        ex = Max(Min(exact_ex, max_exp_mag, precision=int_prec),
                 -max_exp_mag,
                 tag="ex",
                 precision=int_prec)
        ey = Max(Min(exact_ey, max_exp_mag, precision=int_prec),
                 -max_exp_mag,
                 tag="ey",
                 precision=int_prec)

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        # computing the inverse square root
        init_approx = None

        scaling_factor_x = ExponentInsertion(-ex,
                                             tag="sfx_ei",
                                             precision=self.precision,
                                             debug=debug_multi)
        scaling_factor_y = ExponentInsertion(-ey,
                                             tag="sfy_ei",
                                             precision=self.precision,
                                             debug=debug_multi)

        def test_interval_out_of_bound_risk(x_range, y_range):
            """ Try to determine from x and y's interval if there is a risk
                of underflow or overflow """
            div_range = abs(x_range / y_range)
            underflow_risk = sollya.inf(div_range) < S2**(
                self.precision.get_emin_normal() + 2)
            overflow_risk = sollya.sup(div_range) > S2**(
                self.precision.get_emax() - 2)
            return underflow_risk or overflow_risk

        out_of_bound_risk = (self.input_intervals[0] is None
                             or self.input_intervals[1] is None
                             ) or test_interval_out_of_bound_risk(
                                 self.input_intervals[0],
                                 self.input_intervals[1])
        Log.report(Log.Debug,
                   "out_of_bound_risk: {}".format(out_of_bound_risk))

        # scaled version of vx and vy, to avoid overflow and underflow
        if out_of_bound_risk:
            scaled_vx = vx * scaling_factor_x
            scaled_vy = vy * scaling_factor_y
            scaled_interval = MetaIntervalList(
                [MetaInterval(Interval(-2, -1)),
                 MetaInterval(Interval(1, 2))])
            scaled_vx.set_attributes(tag="scaled_vx",
                                     debug=debug_multi,
                                     interval=scaled_interval)
            scaled_vy.set_attributes(tag="scaled_vy",
                                     debug=debug_multi,
                                     interval=scaled_interval)
            seed_interval = 1 / scaled_interval
            print("seed_interval=1/{}={}".format(scaled_interval,
                                                 seed_interval))
        else:
            scaled_vx = vx
            scaled_vy = vy
            seed_interval = 1 / scaled_vy.get_interval()

        # We need a first approximation to 1 / scaled_vy
        dummy_seed = ReciprocalSeed(EmptyOperand(precision=self.precision),
                                    precision=self.precision)

        if self.processor.is_supported_operation(dummy_seed, self.language):
            init_approx = ReciprocalSeed(scaled_vy,
                                         precision=self.precision,
                                         tag="init_approx",
                                         debug=debug_multi)

        else:
            # generate tabulated version of seed
            raise NotImplementedError

        current_approx_std = init_approx
        # correctly-rounded inverse computation
        num_iteration = self.num_iter

        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()

        # check if inputs are zeros
        x_zero = Test(vx,
                      specifier=Test.IsZero,
                      likely=False,
                      precision=ML_Bool)
        y_zero = Test(vy,
                      specifier=Test.IsZero,
                      likely=False,
                      precision=ML_Bool)

        comp_sign = Test(vx,
                         vy,
                         specifier=Test.CompSign,
                         tag="comp_sign",
                         debug=debug_multi)

        # check if divisor is NaN
        y_nan = Test(vy, specifier=Test.IsNaN, likely=False, precision=ML_Bool)

        # check if inputs are signaling NaNs
        x_snan = Test(vx,
                      specifier=Test.IsSignalingNaN,
                      likely=False,
                      precision=ML_Bool)
        y_snan = Test(vy,
                      specifier=Test.IsSignalingNaN,
                      likely=False,
                      precision=ML_Bool)

        # check if inputs are infinities
        x_inf = Test(vx,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="x_inf",
                     precision=ML_Bool)
        y_inf = Test(vy,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="y_inf",
                     debug=debug_multi,
                     precision=ML_Bool)

        scheme = None
        gappa_vx, gappa_vy = None, None

        # initial reciprocal approximation of 1.0 / scaled_vy
        inv_iteration_list, recp_approx = compute_reduced_reciprocal(
            init_approx, scaled_vy, self.num_iter)

        recp_approx.set_attributes(tag="recp_approx", debug=debug_multi)

        # approximation of scaled_vx / scaled_vy
        yerr_last, reduced_div_approx, div_iteration_list = compute_reduced_division(
            scaled_vx, scaled_vy, recp_approx)

        eval_error_range, div_eval_error_range = self.solve_eval_error(
            init_approx, recp_approx, reduced_div_approx, scaled_vx, scaled_vy,
            inv_iteration_list, div_iteration_list, S2**-7, seed_interval)
        eval_error = sup(abs(eval_error_range))
        recp_interval = 1 / scaled_vy.get_interval() + eval_error_range
        recp_approx.set_interval(recp_interval)

        div_interval = scaled_vx.get_interval() / scaled_vy.get_interval(
        ) + div_eval_error_range
        reduced_div_approx.set_interval(div_interval)
        reduced_div_approx.set_tag("reduced_div_approx")

        if out_of_bound_risk:
            unscaled_result = scaling_div_result(reduced_div_approx, ex,
                                                 scaling_factor_y,
                                                 self.precision)

            subnormal_result = subnormalize_result(recp_approx,
                                                   reduced_div_approx, ex, ey,
                                                   yerr_last, self.precision)
        else:
            unscaled_result = reduced_div_approx
            subnormal_result = reduced_div_approx

        x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False)
        y_inf_or_nan = Test(vy,
                            specifier=Test.IsInfOrNaN,
                            likely=False,
                            tag="y_inf_or_nan",
                            debug=debug_multi)

        # generate IEEE exception raising only of libm-compliant
        # mode is enabled
        enable_raise = self.libm_compliant

        # managing special cases
        # x inf and y inf
        pre_scheme = ConditionBlock(
            x_inf_or_nan,
            ConditionBlock(
                x_inf,
                ConditionBlock(
                    y_inf_or_nan,
                    Statement(
                        # signaling NaNs raise invalid operation flags
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                        if enable_raise else Statement(),
                        Return(FP_QNaN(self.precision)),
                    ),
                    ConditionBlock(comp_sign,
                                   Return(FP_MinusInfty(self.precision)),
                                   Return(FP_PlusInfty(self.precision)))),
                Statement(
                    ConditionBlock(x_snan, Raise(ML_FPE_Invalid))
                    if enable_raise else Statement(),
                    Return(FP_QNaN(self.precision)))),
            ConditionBlock(
                x_zero,
                ConditionBlock(
                    LogicalOr(y_zero, y_nan, precision=ML_Bool),
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                        if enable_raise else Statement(),
                        Return(FP_QNaN(self.precision))), Return(vx)),
                ConditionBlock(
                    y_inf_or_nan,
                    ConditionBlock(
                        y_inf,
                        Return(
                            Select(comp_sign, FP_MinusZero(self.precision),
                                   FP_PlusZero(self.precision))),
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid))
                            if enable_raise else Statement(),
                            Return(FP_QNaN(self.precision)))),
                    ConditionBlock(
                        y_zero,
                        Statement(
                            Raise(ML_FPE_DivideByZero)
                            if enable_raise else Statement(),
                            ConditionBlock(
                                comp_sign,
                                Return(FP_MinusInfty(self.precision)),
                                Return(FP_PlusInfty(self.precision)))),
                        # managing numerical value result cases
                        Statement(
                            recp_approx,
                            reduced_div_approx,
                            ConditionBlock(
                                Test(unscaled_result,
                                     specifier=Test.IsSubnormal,
                                     likely=False),
                                # result is subnormal
                                Statement(
                                    # inexact flag should have been raised when computing yerr_last
                                    # ConditionBlock(
                                    #    Comparison(
                                    #        yerr_last, 0,
                                    #        specifier=Comparison.NotEqual, likely=True),
                                    #    Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow))
                                    #),
                                    Return(subnormal_result), ),
                                # result is normal
                                Statement(
                                    # inexact flag should have been raised when computing yerr_last
                                    #ConditionBlock(
                                    #    Comparison(
                                    #        yerr_last, 0,
                                    #        specifier=Comparison.NotEqual, likely=True),
                                    #    Raise(ML_FPE_Inexact)
                                    #),
                                    Return(unscaled_result))),
                        )))))
        # managing rounding mode save and restore
        # to ensure intermediary computations are performed in round-to-nearest
        # clearing exception before final computation

        #rnd_mode = GetRndMode()
        #scheme = Statement(
        #    rnd_mode,
        #    SetRndMode(ML_RoundToNearest),
        #    yerr_last,
        #    SetRndMode(rnd_mode),
        #    unscaled_result,
        #    ClearException(),
        #    pre_scheme
        #)

        scheme = pre_scheme

        return scheme
Exemple #11
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)
        sollya_precision = self.get_input_precision().sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
                kwords["arg_value"] = vx
                kwords["function_name"] = self.function_name
                return RaiseReturn(*args, **kwords)

        # 2-limb approximation of log(2)
        # hi part precision is reduced to provide exact operation
        # when multiplied by an exponent value
        log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)


        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
        # storing accurate logarithm approximation of value returned
        # by the fast reciprocal operation
        for i in range(0, 2**table_index_size):
            inv_value = inv_approx_table[i]
            value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low


        neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input")
        vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf")
        vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan")
        vx_inf    = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf")
        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal")

        # for x = m.2^e, such that e >= 0
        #
        # log(1+x) = log(1 + m.2^e)
        #          = log(2^e . 2^-e + m.2^e)
        #          = log(2^e . (2^-e + m))
        #          = log(2^e) + log(2^-e + m)
        #          = e . log(2) + log (2^-e + m)
        #
        # t = (2^-e + m)
        # t = m_t . 2^e_t
        # r ~ 1 / m_t   => r.m_t ~ 1 ~ 0
        #
        # t' = t . 2^-e_t
        #    = 2^-e-e_t + m . 2^-e_t
        #
        # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3
        # r = m_r . 2^e_r
        #
        # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r)
        #          = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r)
        #
        #

        # argument reduction
        m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi)
        e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi)

        # 2^-e
        TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi)
        t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi)

        m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi)
        e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi)

        # 2^(-e-e_t)
        TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision)
        TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi)

        rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi)

        INDEX_SIZE = table_index_size
        table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t)
        table_index.set_attributes(tag="table_index", debug=debug_multi)

        log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) 
        log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi)

        inv_err = S2**-6 # TODO: link to target DivisionSeed precision

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        approx_fct = sollya.log1p(sollya.x) / (sollya.x)
        poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
        Log.report(Log.Debug, "poly_degree is {}", poly_degree)
        global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
        poly_object = global_poly_object # .sub_poly(start_index=1)

        EXT_PRECISION_MAP = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
            ML_SingleSingle: ML_TripleSingle,
            ML_DoubleDouble: ML_TripleDouble
        }
        if not self.precision in EXT_PRECISION_MAP:
            Log.report(Log.Error, "no extended precision available for {}", self.precision)

        ext_precision = EXT_PRECISION_MAP[self.precision]

        # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t
        pre_rtp = Addition(
            rcp_mt * TwoMinusEEt,
            Multiplication(
                rcp_mt,
                Multiplication(
                    m,
                    TwoMinusEt,
                    precision=self.precision,
                    tag="pre_mult",
                    debug=debug_multi,
                ),
                precision=ext_precision,
                tag="pre_mult2",
                debug=debug_multi,
            ),
            precision=ext_precision,
            tag="pre_rtp",
            debug=debug_multi
        )
        pre_red_vx = Addition(
            pre_rtp,
            -1,
            precision=ext_precision,
        )

        red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)

        poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object())

        fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi)


        ext_poly = Multiplication(red_vx, poly, precision=ext_precision)

        pre_result = Addition(
            Addition(
                fp_e * log2_hi,
                fp_e * log2_lo,
                precision=ext_precision
            ),
            Addition(
                Addition(
                    -log_inv_hi,
                    -log_inv_lo,
                    precision=ext_precision
                ),
                ext_poly,
                precision=ext_precision
            ),
            precision=ext_precision
        )

        result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi)


        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(neg_input,
            Statement(
                ClearException(),
                Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision))
            ),
            ConditionBlock(vx_nan_or_inf,
                ConditionBlock(vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan,
                            Raise(ML_FPE_Invalid)
                        ),
                        Return(FP_QNaN(self.precision))
                    )
                ),
                Return(result)
            )
        )
        scheme = pre_scheme
        return scheme
Exemple #12
0
    def generate_scheme(self):
        int_precision = self.precision.get_integer_format()
        # We wish to compute vx / vy
        vx = self.implementation.add_input_variable("x", self.precision, interval=self.input_intervals[0])
        vy = self.implementation.add_input_variable("y", self.precision, interval=self.input_intervals[1])
        if self.mode is FULL_MODE:
            quo = self.implementation.add_input_variable("quo", ML_Pointer_Format(int_precision))

        i = Variable("i", precision=int_precision, var_type=Variable.Local)
        q = Variable("q", precision=int_precision, var_type=Variable.Local)

        CI = lambda v: Constant(v, precision=int_precision)
        CF = lambda v: Constant(v, precision=self.precision)

        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="vx_subnormal")
        vy_subnormal = Test(vy, specifier=Test.IsSubnormal, tag="vy_subnormal")

        DELTA_EXP = self.precision.get_mantissa_size()
        scale_factor = Constant(2.0**DELTA_EXP, precision=self.precision)
        inv_scale_factor = Constant(2.0**-DELTA_EXP, precision=self.precision)

        normalized_vx = Select(vx_subnormal, vx * scale_factor, vx, tag="scaled_vx")
        normalized_vy = Select(vy_subnormal, vy * scale_factor, vy, tag="scaled_vy")

        real_ex = ExponentExtraction(vx, tag="real_ex", precision=int_precision)
        real_ey = ExponentExtraction(vy, tag="real_ey", precision=int_precision)

        # if real_e<x/y> is +1023 then it may Overflow in -real_ex for ExponentInsertion
        # which only supports downto -1022 before falling into subnormal numbers (which are
        # not supported by ExponentInsertion)
        real_ex_h0 = real_ex / 2
        real_ex_h1 = real_ex - real_ex_h0

        real_ey_h0 = real_ey / 2
        real_ey_h1 = real_ey - real_ey_h0

        EI = lambda v: ExponentInsertion(v, precision=self.precision)

        mx = Abs((vx * EI(-real_ex_h0)) * EI(-real_ex_h1), tag="mx")
        my = Abs((vy * EI(-real_ey_h0)) * EI(-real_ey_h1), tag="pre_my")

        # scale_ey is used to regain the unscaling of mx in the first loop
        # if real_ey >= real_ex, the first loop is never executed
        # so a different scaling is required
        mx_unscaling = Select(real_ey < real_ex, real_ey, real_ex)
        ey_half0 = (mx_unscaling) / 2
        ey_half1 = (mx_unscaling) - ey_half0

        scale_ey_half0 = ExponentInsertion(ey_half0, precision=self.precision, tag="scale_ey_half0")
        scale_ey_half1 = ExponentInsertion(ey_half1, precision=self.precision, tag="scale_ey_half1")

        # if only vy is subnormal we want to normalize it
        #normal_cond = LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal))
        normal_cond = vy_subnormal #LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal))
        my = Select(normal_cond, Abs(MantissaExtraction(vy * scale_factor)), my, tag="my")


        # vx / vy = vx * 2^-ex * 2^(ex-ey) / (vy * 2^-ey)
        # vx % vy

        post_mx = Variable("post_mx", precision=self.precision, var_type=Variable.Local)

        # scaling for half comparison
        VY_SCALING = Select(vy_subnormal, 1.0, 0.5, precision=self.precision)
        VX_SCALING = Select(vy_subnormal, 2.0, 1.0, precision=self.precision)

        def LogicalXor(a, b):
            return LogicalOr(LogicalAnd(a, LogicalNot(b)), LogicalAnd(LogicalNot(a), b))

        rem_sign = Select(vx < 0, CF(-1), CF(1), precision=self.precision, tag="rem_sign")
        quo_sign = Select(LogicalXor(vx <0, vy < 0), CI(-1), CI(1), precision=int_precision, tag="quo_sign")

        loop_watchdog = Variable("loop_watchdog", precision=ML_Int32, var_type=Variable.Local)

        loop = Statement(
            real_ex, real_ey, mx, my, loop_watchdog,
            ReferenceAssign(loop_watchdog, 5000),
            ReferenceAssign(q, CI(0)),
            Loop(
                ReferenceAssign(i, CI(0)), i < (real_ex - real_ey),
                Statement(
                    ReferenceAssign(i, i+CI(1)),
                    ReferenceAssign(q, ((q << 1) + Select(mx >= my, CI(1), CI(0))).modify_attributes(tag="step1_q")),
                    ReferenceAssign(mx, (CF(2) * (mx - Select(mx >= my, my, CF(0)))).modify_attributes(tag="step1_mx")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            # unscaling remainder
            ReferenceAssign(mx, ((mx * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem")),
            ReferenceAssign(my, ((my * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem_my")),
            Loop(
                Statement(), (my > Abs(vy)),
                Statement(
                    ReferenceAssign(q, ((q << 1) + Select(mx >= Abs(my), CI(1), CI(0))).modify_attributes(tag="step2_q")),
                    ReferenceAssign(mx, (mx - Select(mx >= Abs(my), Abs(my), CF(0))).modify_attributes(tag="step2_mx")),
                    ReferenceAssign(my, (my * 0.5).modify_attributes(tag="step2_my")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            ReferenceAssign(q, q << 1),
            Loop(
                ReferenceAssign(i, CI(0)), mx > Abs(vy),
                Statement(
                    ReferenceAssign(q, (q + Select(mx > Abs(vy), CI(1), CI(0))).modify_attributes(tag="step3_q")),
                    ReferenceAssign(mx, (mx - Select(mx > Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="step3_mx")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            ReferenceAssign(q, q + Select(mx >= Abs(vy), CI(1), CI(0))),
            ReferenceAssign(mx, (mx - Select(mx >= Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="pre_half_mx")),
            ConditionBlock(
                # actual comparison is mx > | abs(vy * 0.5) | to avoid rounding effect when
                # vy is subnormal we mulitply both side by 2.0**60
                ((mx * VX_SCALING) > Abs(vy * VY_SCALING)).modify_attributes(tag="half_test"),
                Statement(
                    ReferenceAssign(q, q + CI(1)),
                    ReferenceAssign(mx, (mx - Abs(vy)))
                )
            ),
            ConditionBlock(
                # if the remainder is exactly half the dividend
                # we need to make sure the quotient is even
                LogicalAnd(
                    Equal(mx * VX_SCALING, Abs(vy * VY_SCALING)),
                    Equal(Modulo(q, CI(2)), CI(1)),
                ),
                Statement(
                    ReferenceAssign(q, q + CI(1)),
                    ReferenceAssign(mx, (mx - Abs(vy)))
                )
            ),
            ReferenceAssign(mx, rem_sign * mx),
            ReferenceAssign(q,
                Modulo(TypeCast(q, precision=self.precision.get_unsigned_integer_format()), Constant(2**self.quotient_size, precision=self.precision.get_unsigned_integer_format()), tag="mod_q")
            ),
            ReferenceAssign(q, quo_sign * q),
        )

        # NOTES: Warning QuotientReturn must always preceeds RemainderReturn
        if self.mode is QUOTIENT_MODE:
            #
            QuotientReturn = Return
            RemainderReturn = lambda _: Statement()
        elif self.mode is REMAINDER_MODE:
            QuotientReturn = lambda _: Statement()
            RemainderReturn = Return
        elif self.mode is FULL_MODE:
            QuotientReturn = lambda v: ReferenceAssign(Dereference(quo, precision=int_precision), v) 
            RemainderReturn = Return
        else:
            raise NotImplemented

        # quotient invalid value
        QUO_INVALID_VALUE = 0

        mod_scheme = Statement(
            # x or y is NaN, a NaN is returned
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                ),
            ),
            #
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                ),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsZero),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx)
                ),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsInfty),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(
                Test(vy, specifier=Test.IsInfty),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx),
                )
            ),
            ConditionBlock(
                Abs(vx) < Abs(vy * 0.5),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx),
                )
            ),
            ConditionBlock(
                Equal(vx, vy),
                Statement(
                    QuotientReturn(1),
                    # 0 with the same sign as x
                    RemainderReturn(vx - vx),
                ),
            ),
            ConditionBlock(
                Equal(vx, -vy),
                Statement(
                    # quotient is -1
                    QuotientReturn(-1),
                    # 0 with the same sign as x
                    RemainderReturn(vx - vx),
                ),
            ),
            loop,
            QuotientReturn(q),
            RemainderReturn(mx),
        )

        quo_scheme = Statement(
            # x or y is NaN, a NaN is returned
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)),
                Return(QUO_INVALID_VALUE),
            ),
            #
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsZero),
                Return(0),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsInfty),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Test(vy, specifier=Test.IsInfty),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Abs(vx) < Abs(vy * 0.5),
                Return(0),
            ),
            ConditionBlock(
                Equal(vx, vy),
                Return(1),
            ),
            ConditionBlock(
                Equal(vx, -vy),
                Return(-1),
            ),
            loop,
            Return(q),

        )

        return mod_scheme
Exemple #13
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)
        vx.set_attributes(precision=self.precision,
                          tag="vx",
                          debug=debug_multi)
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m Generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def SqrtRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        C0 = Constant(0, precision=self.precision)
        C0_plus = Constant(FP_PlusZero(self.precision))

        test_NaN = Test(vx,
                        specifier=Test.IsNaN,
                        likely=False,
                        debug=debug_multi,
                        tag="is_NaN",
                        precision=ML_Bool)
        test_negative = Comparison(vx,
                                   C0,
                                   specifier=Comparison.Less,
                                   debug=debug_multi,
                                   tag="is_Negative",
                                   precision=ML_Bool,
                                   likely=False)

        test_zero = Comparison(vx,
                               C0_plus,
                               specifier=Comparison.Equal,
                               likely=False,
                               debug=debug_multi,
                               tag="Is_Zero",
                               precision=ML_Bool)
        test_inf = Test(vx,
                        specifier=Test.IsInfty,
                        likely=False,
                        debug=debug_multi,
                        tag="is_Inf",
                        precision=ML_Bool)
        test_NaN_or_Neg = LogicalOr(test_NaN,
                                    test_negative,
                                    precision=ML_Bool,
                                    likely=False)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="is_nan_or_inf",
                               precision=ML_Bool)
        test_negative_or_zero = Comparison(vx,
                                           C0,
                                           specifier=Comparison.LessOrEqual,
                                           debug=debug_multi,
                                           tag="is_Negative_or_zero",
                                           precision=ML_Bool,
                                           likely=False)

        test_std = LogicalNot(LogicalOr(test_NaN_or_Inf,
                                        test_negative_or_zero,
                                        precision=ML_Bool,
                                        likely=False),
                              precision=ML_Bool,
                              likely=True)

        return_PosZero = Statement(Return(FP_PlusInfty(self.precision)))
        return_NegZero = Statement(Return(FP_MinusInfty(self.precision)))
        return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision)))
        return_inf = Statement(Return(C0))

        NR_init = ReciprocalSquareRootSeed(vx,
                                           precision=self.precision,
                                           tag="sqrt_seed",
                                           debug=debug_multi)
        result = compute_isqrt(vx, NR_init, self.num_iter, self.precision)

        return_non_std = ConditionBlock(
            test_NaN_or_Neg, return_NaN_or_neg,
            ConditionBlock(
                test_inf, return_inf,
                ConditionBlock(test_zero, return_PosZero, return_NegZero)))

        scheme = Statement(
            ConditionBlock(test_std, Statement(Return(result)),
                           Statement(return_non_std)))

        return scheme
Exemple #14
0
    def generate_test_tables(self,
                             test_num,
                             test_ranges=[Interval(-1.0, 1.0)]):
        """ Generate inputs and output table to be shared between auto test
            and max_error tests """
        index_range = self.test_index_range
        test_total = test_num + len(self.standard_test_cases)

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))
        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, test_range)
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            self.precision,
            self.uniquify_name("output_array"),
            const=False,
            value_gen=(lambda _: FP_QNaN(self.precision)))
        return test_total, (table_size_offset_array,
                            input_tables), output_array
Exemple #15
0
  def generate_scheme(self):
    
    def compute_reciprocal(vx):
      inv_seed = ReciprocalSeed(vx, precision = self.precision, tag = "inv_seed", debug = debug_multi)
      nr_1 = 2*inv_seed - vx*inv_seed*inv_seed
      nr_2 = 2*nr_1 - vx*nr_1*nr_1
      nr_3 =2*nr_2 - vx*nr_2*nr_2
      inv_vx = 2*nr_3 - vx*nr_3*nr_3
      
      return inv_vx
      
    vx = self.implementation.add_input_variable("x", self.get_input_precision()) 

    sollya_precision = self.precision.get_sollya_object()
    
    int_precision = {
        ML_Binary32 : ML_Int32,
        ML_Binary64 : ML_Int64
      }[self.precision]
    
    hi_precision = self.precision.get_field_size() - 12
    
    half_pi = round(pi/2, sollya_precision, sollya.RN)
    half_pi_cst = Constant(half_pi, precision = self.precision)
    
    test_sign = Comparison(vx, 0, specifier = Comparison.Less, precision = ML_Bool, debug = debug_multi, tag = "Is_Negative")
    neg_vx = -vx
    
    sign = Variable("sign", precision = self.precision, var_type = Variable.Local)
    abs_vx_std = Variable("abs_vx", precision = self.precision, var_type = Variable.Local)
    red_vx_std = Variable("red_vx", precision = self.precision, var_type = Variable.Local)
    const_index_std = Variable("const_index", precision = int_precision, var_type = Variable.Local)
    
    set_sign = Statement(
        ConditionBlock(test_sign,
          Statement(ReferenceAssign(abs_vx_std, neg_vx), ReferenceAssign(sign, -1)),
          Statement(ReferenceAssign(abs_vx_std, vx), ReferenceAssign(sign, 1))
      ))
      
    if self.precision is ML_Binary32:
      bound = 24
    else:
      bound = 53
      
    test_bound = Comparison(abs_vx_std, S2**bound, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound1 = Comparison(abs_vx_std, 39.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound2 = Comparison(abs_vx_std, 19.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound3 = Comparison(abs_vx_std, 11.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound4 = Comparison(abs_vx_std, 7.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    
    
    
    set_bound = Return(sign*half_pi_cst)
    
    set_bound1 = Statement(
      ReferenceAssign(red_vx_std, -compute_reciprocal(abs_vx_std)),
      ReferenceAssign(const_index_std, 3)
    )
    
    set_bound2 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 1.5)*compute_reciprocal(1 + 1.5*abs_vx_std)),
      ReferenceAssign(const_index_std, 2)
    )
    
    set_bound3 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 1.0)*compute_reciprocal(abs_vx_std + 1.0)),
      ReferenceAssign(const_index_std, 1)
    )
    
    set_bound4 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 0.5)*compute_reciprocal(1 + abs_vx_std*0.5)),
      ReferenceAssign(const_index_std, 0)
    )
    
    set_bound5 = Statement(
      ReferenceAssign(red_vx_std, abs_vx_std),
      ReferenceAssign(const_index_std, 4)
    )
    
    
    cons_table = ML_NewTable(dimensions = [5, 2], storage_precision = self.precision, tag = self.uniquify_name("cons_table"))
    coeff_table = ML_NewTable(dimensions = [11], storage_precision = self.precision, tag = self.uniquify_name("coeff_table"))
    
    cons_hi = round(atan(0.5), hi_precision, sollya.RN)
    cons_table[0][0] = cons_hi
    cons_table[0][1] = round(atan(0.5) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(atan(1.0), hi_precision, sollya.RN)
    cons_table[1][0] = cons_hi
    cons_table[1][1] = round(atan(1.0) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(atan(1.5), hi_precision, sollya.RN)
    cons_table[2][0] = cons_hi
    cons_table[2][1] = round(atan(1.5) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(pi/2, hi_precision, sollya.RN)
    cons_table[3][0] = cons_hi
    cons_table[3][1] = round(pi/2 - cons_hi, sollya_precision, sollya.RN)
    
    cons_table[4][0] = 0.0
    cons_table[4][1] = 0.0
    
    coeff_table[0] = round(3.33333333333329318027e-01, sollya_precision, sollya.RN)
    coeff_table[1] = round(-1.99999999998764832476e-01, sollya_precision, sollya.RN)
    coeff_table[2] = round(1.42857142725034663711e-01, sollya_precision, sollya.RN)
    coeff_table[3] = round(-1.11111104054623557880e-01, sollya_precision, sollya.RN)
    coeff_table[4] = round(9.09088713343650656196e-02, sollya_precision, sollya.RN)
    coeff_table[5] = round(-7.69187620504482999495e-02, sollya_precision, sollya.RN)
    coeff_table[6] = round(6.66107313738753120669e-02, sollya_precision, sollya.RN)
    coeff_table[7] = round(-5.83357013379057348645e-02, sollya_precision, sollya.RN)
    coeff_table[8] = round(4.97687799461593236017e-02, sollya_precision, sollya.RN)
    coeff_table[9] = round(-3.65315727442169155270e-02, sollya_precision, sollya.RN)
    coeff_table[10] = round(1.62858201153657823623e-02, sollya_precision, sollya.RN)
    
    red_vx2 = red_vx_std*red_vx_std
    red_vx4 = red_vx2*red_vx2
    a0 = TableLoad(coeff_table, 0, precision = self.precision)
    a1 = TableLoad(coeff_table, 1, precision = self.precision)
    a2 = TableLoad(coeff_table, 2, precision = self.precision)
    a3 = TableLoad(coeff_table, 3, precision = self.precision)
    a4 = TableLoad(coeff_table, 4, precision = self.precision)
    a5 = TableLoad(coeff_table, 5, precision = self.precision)
    a6 = TableLoad(coeff_table, 6, precision = self.precision)
    a7 = TableLoad(coeff_table, 7, precision = self.precision)
    a8 = TableLoad(coeff_table, 8, precision = self.precision)
    a9 = TableLoad(coeff_table, 9, precision = self.precision)
    a10 = TableLoad(coeff_table, 10, precision = self.precision)
    
    poly_even = red_vx2*(a0 + red_vx4*(a2 + red_vx4*(a4 + red_vx4*(a6 + red_vx4*(a8 + red_vx4*a10)))))
    poly_odd = red_vx4*(a1 + red_vx4*(a3 + red_vx4*(a5 + red_vx4*(a7 + red_vx4*a9))))
    
    
    poly_even.set_attributes(tag = "poly_even", debug = debug_multi)
    poly_odd.set_attributes(tag = "poly_odd", debug = debug_multi)
    
    const_load_hi = TableLoad(cons_table, const_index_std, 0, tag = "const_load_hi", debug = debug_multi)
    const_load_lo = TableLoad(cons_table, const_index_std, 1, tag = "const_load_lo", debug = debug_multi)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, tag = "nan_or_inf", likely = False)
    test_nan = Test(vx, specifier = Test.IsNaN, debug = debug_multi, tag = "is_nan_test", likely = False)
    test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = debug_multi, tag = "inf_sign", likely = False)
                
        
    result = const_load_hi - ((red_vx_std*(poly_even + poly_odd) - const_load_lo) - red_vx_std)
    result.set_attributes(tag = "result", debug = debug_multi)
    
    std_scheme = Statement(
          sign,
          abs_vx_std,
          red_vx_std,
          const_index_std,
          set_sign,
          ConditionBlock(
            test_bound,
            set_bound,
            ConditionBlock(
              test_bound1,
              set_bound1,
              ConditionBlock(
                test_bound2,
                set_bound2,
                ConditionBlock(
                  test_bound3,
                  set_bound3,
                  ConditionBlock(
                    test_bound4,
                    set_bound4,
                    set_bound5
                  )
                )
              )
            )
          ),
          Return(sign*result)
        )
    infty_return = ConditionBlock(test_positive, Return(half_pi_cst), Return(-half_pi_cst))
    non_std_return = ConditionBlock(test_nan, Return(FP_QNaN(self.precision)), infty_return)
    scheme = ConditionBlock(test_NaN_or_inf, Statement(ClearException(), non_std_return), std_scheme)
    return scheme
Exemple #16
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        int_precision = self.precision.get_integer_format()

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        #---------------------
        # Approximation scheme
        #---------------------
        # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t))
        #           = log10(m.2^-t) + (e+t) log10(2)
        #  t = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r)
        #               = log10(m.r) - log10(r.2^t)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2],
                                    storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        log_table_tho[0][0] = 0.0
        log_table_tho[0][1] = 0.0
        hi_size = self.precision.get_field_size() - (
            self.precision.get_exponent_size() + 1)
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i]
            value_high = round(log_f(inv_value), hi_size, sollya.RN)
            value_low = round(
                log_f(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

            inv_value_tho = S2 * inv_approx_table[i]
            value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN)
            value_low_tho = round(
                log_f(inv_value_tho) - value_high_tho, sollya_precision,
                sollya.RN)
            log_table_tho[i][0] = value_high_tho
            log_table_tho[i][1] = value_low_tho

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            int_format = self.precision.get_integer_format()

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=int_format),
                Constant(-2, precision=int_format),
                precision=int_format),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh

        result = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme
Exemple #17
0
    def generate_scalar_scheme(self, vx):

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        int_precision = self.precision.get_integer_format()

        vx_exp = ExponentExtraction(vx,
                                    tag="vx_exp",
                                    precision=int_precision,
                                    debug=debug_multi)

        #---------------------
        # Approximation scheme
        #---------------------
        # log(x) = log(m.2^e) = log(m.2^(e-tho+tho))
        #        = log(m.2^-tho) + (e+tho) log(2)
        #  tho = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log(m.2^-tho) = log(m.r/r.2^-tho) = log(m.r) + log(2^-tho/r)
        #             = log(m.r) - log(r.2^tho)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)

        # table of the reciprocal approximation of the targeted processor
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        log_table, log_table_tho, table_index_range = self.generate_log_table(
            log_f, inv_approx_table)

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        result = self.generate_reduced_log(vx, log_f, inv_approx_table,
                                           log_table, log_table_tho)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = Constant(-100, precision=int_precision)
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = self.generate_reduced_log(vx * S2100,
                                                     log_f,
                                                     inv_approx_table,
                                                     log_table,
                                                     log_table_tho,
                                                     exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(
                ClearException(), Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision), precision=self.precision)),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision),
                               precision=self.precision),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision),
                               precision=self.precision))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision),
                                   precision=self.precision),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme
Exemple #18
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)
        vx.set_attributes(precision=self.precision,
                          tag="vx",
                          debug=debug_multi)
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m Generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        C0 = Constant(0, precision=self.precision)

        C0_plus = Constant(FP_PlusZero(self.precision))
        C0_minus = Constant(FP_MinusZero(self.precision))

        def local_test(specifier, tag):
            """ Local wrapper to generate Test operations """
            return Test(vx,
                        specifier=specifier,
                        likely=False,
                        debug=debug_multi,
                        tag="is_%s" % tag,
                        precision=ML_Bool)

        test_NaN = local_test(Test.IsNaN, "is_NaN")
        test_inf = local_test(Test.IsInfty, "is_Inf")
        test_NaN_or_Inf = local_test(Test.IsInfOrNaN, "is_Inf_Or_Nan")

        test_negative = Comparison(vx,
                                   C0,
                                   specifier=Comparison.Less,
                                   debug=debug_multi,
                                   tag="is_Negative",
                                   precision=ML_Bool,
                                   likely=False)
        test_NaN_or_Neg = LogicalOr(test_NaN, test_negative, precision=ML_Bool)

        test_std = LogicalNot(LogicalOr(test_NaN_or_Inf,
                                        test_negative,
                                        precision=ML_Bool,
                                        likely=False),
                              precision=ML_Bool,
                              likely=True)

        test_zero = Comparison(vx,
                               C0,
                               specifier=Comparison.Equal,
                               likely=False,
                               debug=debug_multi,
                               tag="Is_Zero",
                               precision=ML_Bool)

        return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision)))
        return_inf = Statement(Return(FP_PlusInfty(self.precision)))

        return_PosZero = Return(C0_plus)
        return_NegZero = Return(C0_minus)

        NR_init = ReciprocalSquareRootSeed(vx,
                                           precision=self.precision,
                                           tag="sqrt_seed",
                                           debug=debug_multi)

        result = compute_sqrt(vx,
                              NR_init,
                              int(self.num_iter),
                              precision=self.precision)

        return_non_std = ConditionBlock(
            test_NaN_or_Neg, return_NaN_or_neg,
            ConditionBlock(
                test_inf, return_inf,
                ConditionBlock(test_zero, return_PosZero, return_NegZero)))
        return_std = Return(result)

        scheme = ConditionBlock(test_std, return_std, return_non_std)
        return scheme
Exemple #19
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="sinf.c",
                 function_name="sinf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32)))

        int_precision = self.precision.get_integer_format()

        inv_pi_value = 1 / pi

        # argument reduction
        mod_pi_x = NearestInteger(vx * inv_pi_value)
        red_vx = vx - mod_pi_x * pi

        approx_interval = Interval(0, pi / 2)

        poly_degree = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            sin(sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object  #.sub_poly(start_index = 1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        pre_result = vx * _poly

        result = pre_result
        result.set_attributes(tag="result", debug=debug_lftolx)

        # main scheme
        print "MDL scheme"
        scheme = Statement(Return(result))

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Exemple #20
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = Abs(self.implementation.add_input_variable("x", self.precision),
                 tag="vx")

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        debug_precision = {
            ML_Binary32: debug_ftox,
            ML_Binary64: debug_lftolx
        }[self.precision]

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 3

        # argument reduction
        frac_pi_index = 3
        frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
        inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)
        # computing k = E(x * frac_pi)
        vx_pi = Multiplication(vx, frac_pi, precision=self.precision)
        k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True)
        fk = Conversion(k, precision=self.precision, tag="fk")

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision)

        red_vx_hi = (vx - inv_frac_pi_cst * fk)
        red_vx_hi.set_attributes(tag="red_vx_hi",
                                 debug=debug_precision,
                                 precision=self.precision)
        red_vx_lo_sub = inv_frac_pi_lo_cst * fk
        red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub",
                                     debug=debug_precision,
                                     unbreakable=True,
                                     precision=self.precision)
        vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d")
        pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
        pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi",
                                       precision=ML_Binary64,
                                       debug=debug_lftolx)
        pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d.set_attributes(tag="pre_red_vx_d",
                                    debug=debug_lftolx,
                                    precision=ML_Binary64)

        modk = Modulo(k,
                      2**(frac_pi_index + 1),
                      precision=ML_Int32,
                      tag="switch_value",
                      debug=True)

        sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)),
                      2**(frac_pi_index - 1))
        red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
        red_vx.set_attributes(tag="red_vx",
                              debug=debug_precision,
                              precision=self.precision)

        red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
        red_vx_d.set_attributes(tag="red_vx_d",
                                debug=debug_lftolx,
                                precision=ML_Binary64)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        error_goal_approx = S2**-self.precision.get_precision()

        Log.report(Log.Info, "building mathematical polynomial")
        poly_degree_vector = [None] * 2**(frac_pi_index + 1)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        index_relative = []

        poly_object_vector = [None] * 2**(frac_pi_index + 1)
        for i in range(2**(frac_pi_index + 1)):
            sub_func = cos(sollya.x + i * pi / S2**frac_pi_index)
            degree = int(
                sup(guessdegree(sub_func, approx_interval,
                                error_goal_approx))) + 1

            degree_list = range(degree + 1)
            a_interval = approx_interval
            if i == 0:
                # ad-hoc, TODO: to be cleaned
                degree = 6
                degree_list = range(0, degree + 1, 2)
            elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1):
                # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x)
                # must be generated
                degree_list = range(1, degree + 1, 2)

            if i == 3 or i == 5 or i == 7 or i == 9:
                precision_list = [sollya.binary64
                                  ] + [sollya.binary32] * (degree)
            else:
                precision_list = [sollya.binary32] * (degree + 1)

            poly_degree_vector[i] = degree

            constraint = sollya.absolute
            delta = (2**(frac_pi_index - 3))
            centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1)
            if centered_i < delta and centered_i > -delta and centered_i != 0:
                constraint = sollya.relative
                index_relative.append(i)
            Log.report(
                Log.Info, "generating approximation for %d/%d" %
                (i, 2**(frac_pi_index + 1)))
            poly_object_vector[
                i], _ = Polynomial.build_from_approximation_with_error(
                    sub_func,
                    degree_list,
                    precision_list,
                    a_interval,
                    constraint,
                    error_function=error_function)

        # unified power map for red_sx^n
        upm = {}
        rel_error_list = []

        poly_scheme_vector = [None] * (2**(frac_pi_index + 1))

        for i in range(2**(frac_pi_index + 1)):
            poly_object = poly_object_vector[i]
            poly_precision = self.precision
            if i == 3 or i == 5 or i == 7 or i == 9:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * red_vx)
                poly_hi.set_precision(ML_Binary64)
                red_vx_d_2 = red_vx_d * red_vx_d
                poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2, offset=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_attributes(unbreakable=True)
            elif i == 4:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=ML_Binary64)
                poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_precision(ML_Binary64)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    red_vx,
                    unified_precision=poly_precision,
                    power_map_=upm)
            #if i == 3:
            #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
            #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
            #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

            poly_scheme.set_attributes(tag="poly_cos%dpi%d" %
                                       (i, 2**(frac_pi_index)),
                                       debug=debug_precision)
            poly_scheme_vector[i] = poly_scheme

            #try:
            if is_gappa_installed() and i == 3:
                opt_scheme = self.opt_engine.optimization_process(
                    poly_scheme,
                    self.precision,
                    copy=True,
                    fuse_fma=self.fuse_fma)

                tag_map = {}
                self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

                gappa_vx = Variable("red_vx",
                                    precision=self.precision,
                                    interval=approx_interval)

                cg_eval_error_copy_map = {
                    tag_map["red_vx"]: gappa_vx,
                    tag_map["red_vx_d"]: gappa_vx,
                }

                print "opt_scheme"
                print opt_scheme.get_str(depth=None,
                                         display_precision=True,
                                         memoization_map={})

                eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_scheme,
                    cg_eval_error_copy_map,
                    gappa_filename="red_arg_%d.g" % i)
                poly_range = cos(approx_interval + i * pi / S2**frac_pi_index)
                rel_error_list.append(eval_error / poly_range)

        #for rel_error in rel_error_list:
        #  print sup(abs(rel_error))

        #return

        # case 17
        #poly17 = poly_object_vector[17]
        #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
        #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
        #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

        half = 2**frac_pi_index
        sub_half = 2**(frac_pi_index - 1)

        # determine if the reduced input is within the second and third quarter (not first nor fourth)
        # to negate the cosine output
        factor_cond = BitLogicAnd(BitLogicXor(
            BitLogicRightShift(modk, frac_pi_index),
            BitLogicRightShift(modk, frac_pi_index - 1)),
                                  1,
                                  tag="factor_cond",
                                  debug=True)

        CM1 = Constant(-1, precision=self.precision)
        C1 = Constant(1, precision=self.precision)
        factor = Select(factor_cond,
                        CM1,
                        C1,
                        tag="factor",
                        debug=debug_precision)
        factor2 = Select(Equal(modk, Constant(sub_half)),
                         CM1,
                         C1,
                         tag="factor2",
                         debug=debug_precision)

        switch_map = {}
        if 0:
            for i in range(2**(frac_pi_index + 1)):
                switch_map[i] = Return(poly_scheme_vector[i])
        else:
            for i in range(2**(frac_pi_index - 1)):
                switch_case = (i, half - i)
                #switch_map[i]      = Return(poly_scheme_vector[i])
                #switch_map[half-i] = Return(-poly_scheme_vector[i])
                if i != 0:
                    switch_case = switch_case + (half + i, 2 * half - i)
                    #switch_map[half+i] = Return(-poly_scheme_vector[i])
                    #switch_map[2*half-i] = Return(poly_scheme_vector[i])
                if poly_scheme_vector[i].get_precision() != self.precision:
                    poly_result = Conversion(poly_scheme_vector[i],
                                             precision=self.precision)
                else:
                    poly_result = poly_scheme_vector[i]
                switch_map[switch_case] = Return(factor * poly_result)
            #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
            #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
            switch_map[(sub_half, half + sub_half)] = Return(
                factor2 * poly_scheme_vector[sub_half])

        result = SwitchBlock(modk, switch_map)

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################

        # payne and hanek argument reduction for large arguments
        #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
        red_func_name = "payne_hanek_fp32_asm"
        payne_hanek_func_op = FunctionOperator(
            red_func_name,
            arg_map={0: FO_Arg(0)},
            require_header=["support_lib/ml_red_arg.h"])
        payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32],
                                          ML_Binary64, payne_hanek_func_op)
        payne_hanek_func_op.declare_prototype = payne_hanek_func
        #large_arg_red = FunctionCall(payne_hanek_func, vx)
        large_arg_red = payne_hanek_func(vx)
        red_bound = S2**20

        cond = Abs(vx) >= red_bound
        cond.set_attributes(tag="cond", likely=False)

        lar_neark = NearestInteger(large_arg_red, precision=ML_Int64)
        lar_modk = Modulo(lar_neark,
                          Constant(16, precision=ML_Int64),
                          tag="lar_modk",
                          debug=True)
        # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
        #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
        pre_lar_red_vx = large_arg_red - Conversion(lar_neark,
                                                    precision=ML_Binary64)
        pre_lar_red_vx.set_attributes(precision=ML_Binary64,
                                      debug=debug_lftolx,
                                      tag="pre_lar_red_vx")
        lar_red_vx = Conversion(pre_lar_red_vx,
                                precision=self.precision,
                                debug=debug_precision,
                                tag="lar_red_vx")
        lar_red_vx_lo = Conversion(
            pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64),
            precision=self.precision)
        lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo",
                                     precision=self.precision)

        lar_k = 3
        # large arg reduction Universal Power Map
        lar_upm = {}
        lar_switch_map = {}
        approx_interval = Interval(-0.5, 0.5)
        for i in range(2**(lar_k + 1)):
            frac_pi = pi / S2**lar_k
            func = cos(frac_pi * i + frac_pi * sollya.x)

            degree = 6
            error_mode = sollya.absolute
            if i % 2**(lar_k) == 2**(lar_k - 1):
                # close to sin(x) cases
                func = -sin(frac_pi * x) if i == 2**(lar_k -
                                                     1) else sin(frac_pi * x)
                degree_list = range(0, degree + 1, 2)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func / x, degree_list, precision_list, approx_interval,
                    error_mode)
                poly_object = poly_object.sub_poly(offset=-1)
            else:
                degree_list = range(degree + 1)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func, degree_list, precision_list, approx_interval,
                    error_mode)

            if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * lar_red_vx)
                poly_hi.set_precision(ML_Binary64)
                pre_poly_scheme = poly_hi + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                pre_poly_scheme.set_attributes(precision=ML_Binary64)
                poly_scheme = Conversion(pre_poly_scheme,
                                         precision=self.precision)
            elif i == 4 or i == 12:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                c3 = Constant(coeff(poly_object.get_sollya_object(), 3),
                              precision=self.precision)
                c5 = Constant(coeff(poly_object.get_sollya_object(), 5),
                              precision=self.precision)
                poly_hi = polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=3),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                poly_hi.set_attributes(tag="poly_lar_%d_hi" % i,
                                       precision=ML_Binary64)
                poly_scheme = Conversion(FusedMultiplyAdd(
                    c1, lar_red_vx, poly_hi, precision=ML_Binary64) +
                                         c1 * lar_red_vx_lo,
                                         precision=self.precision)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
            # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm)
            poly_scheme.set_attributes(tag="lar_poly_%d" % i,
                                       debug=debug_precision)
            lar_switch_map[(i, )] = Return(poly_scheme)

        lar_result = SwitchBlock(lar_modk, lar_switch_map)

        # main scheme
        #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        # scheme = Statement(ConditionBlock(cond, lar_result, result))

        Log.report(Log.Info, "Construction of the initial MDL scheme")
        scheme = Statement(pre_red_vx_d, red_vx_lo_sub,
                           ConditionBlock(cond, lar_result, result))

        return scheme
Exemple #21
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # constant computation
        invlog2 = round(1 / log(2), sollya_precision, sollya.RN)
        invlog2_cst = Constant(invlog2, precision=self.precision)

        #v_log2_hi = round(log(2), 16, sollya.RN)
        #v_log2_lo = round(log(2) - v_log2_hi, sollya_precision, sollya.RN)

        #log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
        #log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        v_log2_hi = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        v_log2_lo = round(
            log(2) - v_log2_hi, self.precision.sollya_object, sollya.RN)
        log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi")
        log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo")

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        int_precision = self.precision.get_integer_format()

        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        integer_precision = {
            ML_Binary32: ML_UInt32,
            ML_Binary64: ML_UInt64
        }[self.precision]

        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = inv_approx_table[
                i]  # (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_multi,
                                          precision=self.precision)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debug_multi),
                self.precision.get_field_size() - 7,
                debug=debug_multi),
                                      0x7f,
                                      tag="table_index",
                                      debug=debug_multi)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=integer_precision),
                Constant(-2, precision=integer_precision),
                precision=integer_precision),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0), 1.0,
                                   pre_arg_red_index)

            #_red_vx        = arg_red_index * _vx_mant - 1.0
            _red_vx = FusedMultiplyAdd(arg_red_index,
                                       _vx_mant,
                                       1.0,
                                       specifier=FusedMultiplyAdd.Subtract)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_multi)

            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_multi)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_multi)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + sollya.x) / sollya.x, poly_degree,
                [1] + [self.precision] * (poly_degree), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
            _poly = PolynomialSchemeEvaluator.generate_estrin_scheme(
                poly_object, _red_vx, unified_precision=self.precision)

            _poly.set_attributes(tag="poly", debug=debug_multi)

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_multi)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + (_red_vx +
                                         (_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_multi)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                             debug=debug_multi)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_multi)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_multi)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_multi)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_multi)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_multi)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debug_multi)

        # exp=-1 case
        Log.report(Log.Verbose, "managing exp=-1 case")

        result2 = (-log_inv_hi - log2_hi) + (
            (red_vx + poly * red_vx) - log2_lo - log_inv_lo)
        result2.set_attributes(tag="result2", debug=debug_multi)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _ = compute_log(vx * S2100,
                                                      exp_corr_factor=m100)

        Log.report(Log.Verbose, "managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_multi)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one",
                                debug=debug_multi,
                                likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme

        return scheme
Exemple #22
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def SincosRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 8
        cw_hi_precision = self.precision.get_field_size() - 4

        ext_precision = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_Binary64
        }[self.precision]

        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        if self.precision is ML_Binary32:
            ph_bound = S2**10
        else:
            ph_bound = S2**33

        test_ph_bound = Comparison(vx,
                                   ph_bound,
                                   specifier=Comparison.GreaterOrEqual,
                                   precision=ML_Bool,
                                   likely=False)

        # argument reduction
        # m
        frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision]

        C0 = Constant(0, precision=int_precision)
        C1 = Constant(1, precision=int_precision)
        C_offset = Constant(3 * S2**(frac_pi_index - 1),
                            precision=int_precision)

        # 2^m / pi
        frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN)
        frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision,
                           sollya.RN)
        # pi / 2^m, high part
        inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN)
        # pi / 2^m, low part
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)

        # computing k
        vx.set_attributes(tag="vx", debug=debug_multi)

        vx_pi = Addition(Multiplication(vx,
                                        Constant(frac_pi,
                                                 precision=self.precision),
                                        precision=self.precision),
                         Multiplication(vx,
                                        Constant(frac_pi_lo,
                                                 precision=self.precision),
                                        precision=self.precision),
                         precision=self.precision,
                         tag="vx_pi",
                         debug=debug_multi)

        k = NearestInteger(vx_pi,
                           precision=int_precision,
                           tag="k",
                           debug=debug_multi)
        # k in floating-point precision
        fk = Conversion(k,
                        precision=self.precision,
                        tag="fk",
                        debug=debug_multi)

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision,
                                   debug=debug_multi)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision,
                                      debug=debug_multi)

        # Cody-Waite reduction
        red_coeff1 = Multiplication(fk,
                                    inv_frac_pi_cst,
                                    precision=self.precision,
                                    exact=True)
        red_coeff2 = Multiplication(Negation(fk, precision=self.precision),
                                    inv_frac_pi_lo_cst,
                                    precision=self.precision,
                                    exact=True)

        # Should be exact / Sterbenz' Lemma
        pre_sub_mul = Subtraction(vx,
                                  red_coeff1,
                                  precision=self.precision,
                                  exact=True)

        # Fast2Sum
        s = Addition(pre_sub_mul,
                     red_coeff2,
                     precision=self.precision,
                     unbreakable=True,
                     tag="s",
                     debug=debug_multi)
        z = Subtraction(s,
                        pre_sub_mul,
                        precision=self.precision,
                        unbreakable=True,
                        tag="z",
                        debug=debug_multi)
        t = Subtraction(red_coeff2,
                        z,
                        precision=self.precision,
                        unbreakable=True,
                        tag="t",
                        debug=debug_multi)

        red_vx_std = Addition(s, t, precision=self.precision)
        red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi)

        # To compute sine we offset x by 3pi/2
        # which means add 3  * S2^(frac_pi_index-1) to k
        if self.sin_output:
            Log.report(Log.Info, "Computing Sin")
            offset_k = Addition(k,
                                C_offset,
                                precision=int_precision,
                                tag="offset_k")
        else:
            Log.report(Log.Info, "Computing Cos")
            offset_k = k

        modk = Variable("modk",
                        precision=int_precision,
                        var_type=Variable.Local)
        red_vx = Variable("red_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        # Faster modulo using bitwise logic
        modk_std = BitLogicAnd(offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="modk",
                               debug=debug_multi)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        red_vx.set_interval(approx_interval)

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        Log.report(Log.Info,
                   "building tabulated approximation for sin and cos")

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        table_index_size = frac_pi_index + 1
        cos_table = ML_NewTable(dimensions=[2**table_index_size, 1],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("cos_table"))

        for i in range(2**(frac_pi_index + 1)):
            local_x = i * pi / S2**frac_pi_index
            cos_local = round(cos(local_x), self.precision.get_sollya_object(),
                              sollya.RN)
            cos_table[i][0] = cos_local

        sin_index = Modulo(modk + 2**(frac_pi_index - 1),
                           2**(frac_pi_index + 1),
                           precision=int_precision,
                           tag="sin_index")  #, debug = debug_multi)
        tabulated_cos = TableLoad(cos_table,
                                  modk,
                                  C0,
                                  precision=self.precision,
                                  tag="tab_cos",
                                  debug=debug_multi)
        tabulated_sin = -TableLoad(cos_table,
                                   sin_index,
                                   C0,
                                   precision=self.precision,
                                   tag="tab_sin",
                                   debug=debug_multi)

        poly_degree_cos = sup(
            guessdegree(cos(sollya.x), approx_interval, S2**
                        -self.precision.get_precision()) + 2)
        poly_degree_sin = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -self.precision.get_precision()) + 2)

        poly_degree_cos_list = range(0, int(poly_degree_cos) + 3)
        poly_degree_sin_list = range(0, int(poly_degree_sin) + 3)

        # cosine polynomial: limiting first and second coefficient precision to 1-bit
        poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list)
        # sine polynomial: limiting first coefficient precision to 1-bit
        poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
        Log.report(Log.Info,
                   "building mathematical polynomials for sin and cos")
        # Polynomial approximations
        Log.report(Log.Info, "cos")
        poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error(
            cos(sollya.x),
            poly_degree_cos_list,
            poly_cos_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(Log.Info, "sin")
        poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error(
            sin(sollya.x),
            poly_degree_sin_list,
            poly_sin_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        Log.report(
            Log.Info, "poly error cos: {} / {:d}".format(
                poly_error_cos, int(sollya.log2(poly_error_cos))))
        Log.report(
            Log.Info, "poly error sin: {0} / {1:d}".format(
                poly_error_sin, int(sollya.log2(poly_error_sin))))
        Log.report(Log.Info, "poly cos : %s" % poly_object_cos)
        Log.report(Log.Info, "poly sin : %s" % poly_object_sin)

        # Polynomial evaluation scheme
        poly_cos = polynomial_scheme_builder(
            poly_object_cos.sub_poly(start_index=1),
            red_vx,
            unified_precision=self.precision)
        poly_sin = polynomial_scheme_builder(
            poly_object_sin.sub_poly(start_index=2),
            red_vx,
            unified_precision=self.precision)
        poly_cos.set_attributes(tag="poly_cos", debug=debug_multi)
        poly_sin.set_attributes(tag="poly_sin",
                                debug=debug_multi,
                                unbreakable=True)

        # TwoProductFMA
        mul_cos_x = tabulated_cos * poly_cos
        mul_cos_y = FusedMultiplyAdd(tabulated_cos,
                                     poly_cos,
                                     -mul_cos_x,
                                     precision=self.precision)

        mul_sin_x = tabulated_sin * poly_sin
        mul_sin_y = FusedMultiplyAdd(tabulated_sin,
                                     poly_sin,
                                     -mul_sin_x,
                                     precision=self.precision)

        mul_coeff_sin_hi = tabulated_sin * red_vx
        mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx,
                                            -mul_coeff_sin_hi)

        mul_cos = Addition(mul_cos_x,
                           mul_cos_y,
                           precision=self.precision,
                           tag="mul_cos")  #, debug = debug_multi)
        mul_sin = Negation(Addition(mul_sin_x,
                                    mul_sin_y,
                                    precision=self.precision),
                           precision=self.precision,
                           tag="mul_sin")  #, debug = debug_multi)
        mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi,
                                          mul_coeff_sin_lo,
                                          precision=self.precision),
                                 precision=self.precision,
                                 tag="mul_coeff_sin")  #, debug = debug_multi)

        mul_cos_x.set_attributes(
            tag="mul_cos_x", precision=self.precision)  #, debug = debug_multi)
        mul_cos_y.set_attributes(
            tag="mul_cos_y", precision=self.precision)  #, debug = debug_multi)
        mul_sin_x.set_attributes(
            tag="mul_sin_x", precision=self.precision)  #, debug = debug_multi)
        mul_sin_y.set_attributes(
            tag="mul_sin_y", precision=self.precision)  #, debug = debug_multi)

        cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos)

        cos_eval_d_1.set_attributes(tag="cos_eval_d_1",
                                    precision=self.precision,
                                    debug=debug_multi)

        result_1 = Statement(Return(cos_eval_d_1))

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################
        # payne and hanek argument reduction for large arguments
        ph_k = frac_pi_index
        ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN)
        ph_inv_frac_pi = pi / S2**ph_k

        ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx,
                                                                ph_frac_pi,
                                                                self.precision,
                                                                n=100,
                                                                k=ph_k)

        # assigning Large Argument Reduction reduced variable
        lar_vx = Variable("lar_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        lar_red_vx = Addition(Multiplication(lar_vx,
                                             inv_frac_pi,
                                             precision=self.precision),
                              Multiplication(lar_vx,
                                             inv_frac_pi_lo,
                                             precision=self.precision),
                              precision=self.precision,
                              tag="lar_red_vx",
                              debug=debug_multi)

        C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32")
        ph_acc_int_red = Select(ph_acc_int < C0,
                                C32 + ph_acc_int,
                                ph_acc_int,
                                precision=int_precision,
                                tag="ph_acc_int_red")
        if self.sin_output:
            lar_offset_k = Addition(ph_acc_int_red,
                                    C_offset,
                                    precision=int_precision,
                                    tag="lar_offset_k")
        else:
            lar_offset_k = ph_acc_int_red

        ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi)
        lar_modk = BitLogicAnd(lar_offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="lar_modk",
                               debug=debug_multi)

        lar_statement = Statement(ph_statement,
                                  ReferenceAssign(lar_vx,
                                                  ph_acc,
                                                  debug=debug_multi),
                                  ReferenceAssign(red_vx,
                                                  lar_red_vx,
                                                  debug=debug_multi),
                                  ReferenceAssign(modk, lar_modk),
                                  prevent_optimization=True)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               tag="NaN_or_Inf",
                               debug=debug_multi)
        return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision)))

        scheme = ConditionBlock(
            test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf),
            Statement(
                modk, red_vx,
                ConditionBlock(
                    test_ph_bound, lar_statement,
                    Statement(
                        ReferenceAssign(modk, modk_std),
                        ReferenceAssign(red_vx, red_vx_std),
                    )), result_1))

        return scheme
Exemple #23
0
  def generate_scheme(self): 
    # declaring CodeFunction and retrieving input variable
    vx = Abs(self.implementation.add_input_variable("x", self.precision), tag = "vx") 


    Log.report(Log.Info, "generating implementation scheme")
    if self.debug_flag: 
        Log.report(Log.Info, "debug has been enabled")

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    debug_precision = {ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx}[self.precision]


    test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
    test_nan        = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
    test_positive   = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

    test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
    return_snan        = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

    # return in case of infinity input
    infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
    # return in case of specific value input (NaN or inf)
    specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
    # return in case of standard (non-special) input

    sollya_precision = self.precision.get_sollya_object()
    hi_precision = self.precision.get_field_size() - 3


    

    # argument reduction
    frac_pi_index = 3
    frac_pi     = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
    inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
    inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN)
    # computing k = E(x * frac_pi)
    vx_pi = Multiplication(vx, frac_pi, precision = self.precision)
    k = NearestInteger(vx_pi, precision = ML_Int32, tag = "k", debug = True)
    fk = Conversion(k, precision = self.precision, tag = "fk")

    inv_frac_pi_cst    = Constant(inv_frac_pi, tag = "inv_frac_pi", precision = self.precision)
    inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag = "inv_frac_pi_lo", precision = self.precision)

    red_vx_hi = (vx - inv_frac_pi_cst * fk)
    red_vx_hi.set_attributes(tag = "red_vx_hi", debug = debug_precision, precision = self.precision)
    red_vx_lo_sub = inv_frac_pi_lo_cst * fk
    red_vx_lo_sub.set_attributes(tag = "red_vx_lo_sub", debug = debug_precision, unbreakable = True, precision = self.precision)
    vx_d = Conversion(vx, precision = ML_Binary64, tag = "vx_d")
    pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
    pre_red_vx_d_hi.set_attributes(tag = "pre_red_vx_d_hi", precision = ML_Binary64, debug = debug_lftolx)
    pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d.set_attributes(tag = "pre_red_vx_d", debug = debug_lftolx, precision = ML_Binary64)


    modk = Modulo(k, 2**(frac_pi_index+1), precision = ML_Int32, tag = "switch_value", debug = True)

    sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index-1)), 2**(frac_pi_index-1))
    red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
    red_vx.set_attributes(tag = "red_vx", debug = debug_precision, precision = self.precision)

    red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
    red_vx_d.set_attributes(tag = "red_vx_d", debug = debug_lftolx, precision = ML_Binary64)

    approx_interval = Interval(-pi/(S2**(frac_pi_index+1)), pi / S2**(frac_pi_index+1))

    Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

    error_goal_approx = S2**-self.precision.get_precision()


    Log.report(Log.Info, "building mathematical polynomial")
    poly_degree_vector = [None] * 2**(frac_pi_index+1)



    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

    #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

    index_relative = []

    poly_object_vector = [None] * 2**(frac_pi_index+1)
    for i in range(2**(frac_pi_index+1)):
      sub_func = cos(sollya.x+i*pi/S2**frac_pi_index)
      degree = int(sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1

      degree_list = range(degree+1)
      a_interval = approx_interval
      if i == 0:
        # ad-hoc, TODO: to be cleaned
        degree = 6
        degree_list = range(0, degree+1, 2)
      elif i % 2**(frac_pi_index) == 2**(frac_pi_index-1):
        # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x) 
        # must be generated
        degree_list = range(1, degree+1, 2)

      if i == 3 or i == 5 or i == 7 or i == 9: 
        precision_list =  [sollya.binary64] + [sollya.binary32] *(degree)
      else:
        precision_list = [sollya.binary32] * (degree+1)

      poly_degree_vector[i] = degree 

      constraint = sollya.absolute
      delta = (2**(frac_pi_index - 3))
      centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index-1)
      if centered_i < delta and centered_i > -delta and centered_i != 0:
        constraint = sollya.relative
        index_relative.append(i)
      Log.report(Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index+1)))
      poly_object_vector[i], _ = Polynomial.build_from_approximation_with_error(sub_func, degree_list, precision_list, a_interval, constraint, error_function = error_function) 


    # unified power map for red_sx^n
    upm = {}
    rel_error_list = []

    poly_scheme_vector = [None] * (2**(frac_pi_index+1))

    for i in range(2**(frac_pi_index+1)):
      poly_object = poly_object_vector[i]
      poly_precision = self.precision
      if i == 3 or i == 5 or i == 7 or i == 9: 
          poly_precision = ML_Binary64
          c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = ML_Binary64)
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
          poly_hi = (c0 + c1 * red_vx)
          poly_hi.set_precision(ML_Binary64)
          red_vx_d_2 = red_vx_d * red_vx_d
          poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(poly_object.sub_poly(start_index = 2, offset = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_attributes(unbreakable = True)
      elif i == 4:
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = ML_Binary64)
          poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_precision(ML_Binary64)
      else:
          poly_scheme = polynomial_scheme_builder(poly_object, red_vx, unified_precision = poly_precision, power_map_ = upm)
      #if i == 3:
      #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
      #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
      #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

      poly_scheme.set_attributes(tag = "poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug = debug_precision)
      poly_scheme_vector[i] = poly_scheme



      #try:
      if is_gappa_installed() and i == 3:
          opt_scheme = self.opt_engine.optimization_process(poly_scheme, self.precision, copy = True, fuse_fma = self.fuse_fma)

          tag_map = {}
          self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

          gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval)

          cg_eval_error_copy_map = {
              tag_map["red_vx"]:    gappa_vx, 
              tag_map["red_vx_d"]:  gappa_vx,
          }
Exemple #24
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # testing special value inputs
        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")
        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        # if input is a signaling NaN, raise an invalid exception and returns
        # a quiet NaN
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # log2(vx)
        # r = vx_mant
        # e = vx_exp
        # vx reduced to r in [1, 2[
        # log2(vx) = log2(r * 2^e)
        #          = log2(r) + e
        #
        ## log2(r) is approximated by
        #  log2(r) = log2(inv_seed(r) * r / inv_seed(r)
        #          = log2(inv_seed(r) * r) - log2(inv_seed(r))
        # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0]
        #
        # inv_seed(r) * r ~ 1
        # we can easily tabulate -log2(inv_seed(r))
        #

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)
        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        # value for index 0 is set to 0.0
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            #print inv_approx_table[i][0], inv_value
            inv_value = inv_approx_table[i][0]
            value_high_bitsize = self.precision.get_field_size() - (
                self.precision.get_exponent_size() + 1)
            value_high = round(log2(inv_value), value_high_bitsize, sollya.RN)
            value_low = round(
                log2(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            # The main table is indexed by the 7 most significant bits
            # of the mantissa
            table_index = inv_approx_table.index_function(_vx_mant)
            table_index.set_attributes(tag="table_index", debug=debuglld)

            # argument reduction
            # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-inv_approx_table.index_size
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() * 1.1))) + 1
            sollya.settings.display = sollya.hexadecimal
            global_poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                log2(1 + sollya.x) / sollya.x,
                poly_degree, [self.precision] * (poly_degree + 1),
                approx_interval,
                sollya.absolute,
                error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
                    p - f, ai))
            Log.report(
                Log.Info, "poly_degree={}, approx_error={}".format(
                    poly_degree, approx_error))
            poly_object = global_poly_object.sub_poly(start_index=1, offset=1)
            #poly_object = global_poly_object.sub_poly(start_index=0,offset=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly = FMA(pre_poly, _red_vx,
                        global_poly_object.get_cst_coeff(0, self.precision))
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            Log.report(
                Log.Verbose, "sollya global_poly_object: {}".format(
                    global_poly_object.get_sollya_object()))
            Log.report(
                Log.Verbose, "sollya poly_object: {}".format(
                    poly_object.get_sollya_object()))

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision)
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx

        result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_lftolx)

        # specific input value predicate
        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="vx_snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="vx_inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # Specific specific for the case exp == -1
        # log2(x) = log2(m) - 1
        #
        # as m in [1, 2[, log2(m) in [0, 1[
        # if r is close to 2, a catastrophic cancellation can occur
        #
        # r = seed(m)
        # log2(x) = log2(seed(m) * m / seed(m)) - 1
        #         = log2(seed(m) * m) - log2(seed(m)) - 1
        #
        # for m really close to 2 => seed(m) = 0.5
        #     => log2(x) = log2(0.5 * m)
        #                =
        result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo)
        result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _ = compute_log(vx * S2100,
                                                   exp_corr_factor=m100)
        result_subnormal.set_attributes(tag="result_subnormal",
                                        debug=debug_lftolx)

        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + x) / x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ),
                        Statement(ClearException(), result_subnormal,
                                  Return(result_subnormal))),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result_exp_m1),
                                       Return(result))))))
        scheme = Statement(result, pre_scheme)
        return scheme
Exemple #25
0
  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme
Exemple #26
0
    def generate_scheme(self):
        #func_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log10(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log10(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i][0]
            value_high = round(
                log10(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log10(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debuglx),
                self.precision.get_field_size() - 7,
                debug=debuglx),
                                      0x7f,
                                      tag="table_index",
                                      debug=debuglld)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            #if not processor.is_supported_operation(arg_red_index):
            #    if self.precision != ML_Binary32:
            #        arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32,
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_lftolx,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            print("building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log10(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log10(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object  #.sub_poly(start_index = 1)

            print("generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            print(global_poly_object.get_sollya_object())

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_ddtolx)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + ((_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_lftolx)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_lftolx)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_lftolx)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one, corr_exp

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one, corr_exp = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_lftolx)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_lftolx)

        # building eval error map
        eval_error_map = {
            red_vx:
            Variable("red_vx",
                     precision=self.precision,
                     interval=red_vx.get_interval()),
            log_inv_hi:
            Variable("log_inv_hi",
                     precision=self.precision,
                     interval=table_high_interval),
            log_inv_lo:
            Variable("log_inv_lo",
                     precision=self.precision,
                     interval=table_low_interval),
            corr_exp:
            Variable("corr_exp_g",
                     precision=self.precision,
                     interval=self.precision.get_exponent_interval()),
        }
        # computing gappa error
        if is_gappa_installed():
            poly_eval_error = self.get_eval_error(result, eval_error_map)
            print("poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # exp=-1 case
        print("managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_lftolx)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        log_subtract = -log_inv_hi - log2_hi
        log_subtract.set_attributes(tag="log_subtract", debug=debug_lftolx)
        result2 = (log_subtract) + ((poly * red_vx) - (log_inv_lo + log2_lo))
        result2.set_attributes(tag="result2", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _, _ = compute_log(vx * S2100,
                                                         exp_corr_factor=m100)

        print("managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log10(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log10(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        print("MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme
        return scheme
Exemple #27
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
Exemple #28
0
    def standard_test_cases(self):
        general_list = [
            # ERROR: rootn: inf ulp error at {inf, -2}: *0x0p+0 vs. inf (0x7f800000) at index: 1226
            (FP_PlusInfty(self.precision), -2, FP_PlusZero(self.precision)),
            # ERROR: rootn: inf ulp error at {inf, -2147483648}: *0x0.0000000000000p+0 vs. inf
            (FP_PlusInfty(self.precision), -2147483648,
             FP_PlusZero(self.precision)),
            #
            (FP_PlusZero(self.precision), -1, FP_PlusInfty(self.precision)),
            (FP_MinusInfty(self.precision), 1, FP_MinusInfty(self.precision)),
            (FP_MinusInfty(self.precision), -1, FP_MinusZero(self.precision)),
            # ERROR coucou7: rootn: -inf ulp error at {inf 7f800000, 479638026}: *inf vs. 0x1.000018p+0 (0x3f80000c) at index: 2367
            (FP_PlusInfty(self.precision), 479638026,
             FP_PlusInfty(self.precision)),
            (FP_MinusInfty(self.precision), 479638026),
            #(FP_MinusInfty(self.precision), -479638026),
            #(FP_PlusInfty(self.precision), -479638026),
            # rootn( ±0, n) is ±∞ for odd n< 0.
            (FP_PlusZero(self.precision), -1337, FP_PlusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1337,
             FP_MinusInfty(self.precision)),
            # rootn( ±0, n) is +∞ for even n< 0.
            (FP_PlusZero(self.precision), -1330, FP_PlusInfty(self.precision)),
            # rootn( ±0, n) is +0 for even n> 0.
            (FP_PlusZero(self.precision), random.randrange(0, 2**31, 2),
             FP_PlusZero(self.precision)),
            (FP_MinusZero(self.precision), random.randrange(0, 2**31, 2),
             FP_PlusZero(self.precision)),
            # rootn( ±0, n) is ±0 for odd n> 0.
            (FP_PlusZero(self.precision), random.randrange(1, 2**31, 2),
             FP_PlusZero(self.precision)),
            (FP_MinusZero(self.precision), random.randrange(1, 2**31, 2),
             FP_MinusZero(self.precision)),
            # rootn( x, n) returns a NaN for x< 0 and n is even.
            (-random.random(), 2 * random.randrange(1, 2**30),
             FP_QNaN(self.precision)),
            # rootn( x, 0 ) returns a NaN
            (random.random(), 0, FP_QNaN(self.precision)),
            # vx=nan
            (sollya.parse("-nan"), -1811577079, sollya.parse("nan")),
            (sollya.parse("-nan"), 832501219, sollya.parse("nan")),
            (sollya.parse("-nan"), -857435762, sollya.parse("nan")),
            (sollya.parse("-nan"), -1503049611, sollya.parse("nan")),
            (sollya.parse("-nan"), 2105620996, sollya.parse("nan")),
            #ERROR: rootn: inf ulp error at {-nan, 832501219}: *-nan vs. -0x1.00000df2bed98p+1
            #ERROR: rootn: inf ulp error at {-nan, -857435762}: *-nan vs. 0x1.0000000000000p+1
            #ERROR: rootn: inf ulp error at {-nan, -1503049611}: *-nan vs. -0x1.0000000000000p+1
            #ERROR: rootn: inf ulp error at {-nan, 2105620996}: *-nan vs. 0x1.00000583c4b7ap+1
            (sollya.parse("-0x1.cd150ap-105"), 105297051),
            (sollya.parse("0x1.ec3bf8p+71"), -1650769017),
            # test-case #12
            (0.1, 17),
            # test-case #11, fails in OpenCL CTS
            (sollya.parse("0x0.000000001d600p-1022"), 14),
            # test-case #10, fails test with dar(2**-23)
            (sollya.parse("-0x1.20aadp-114"), 17),
            # test-case #9
            (sollya.parse("0x1.a44d8ep+121"), 7),
            # test-case #8
            (sollya.parse("-0x1.3ef124p+103"), 3),
            # test-case #7
            (sollya.parse("-0x1.01047ep-2"), 39),
            # test-case #6
            (sollya.parse("-0x1.0105bp+67"), 23),
            # test-case #5
            (sollya.parse("0x1.c1f72p+51"), 6),
            # special cases
            (sollya.parse("0x0p+0"), 1),
            (sollya.parse("0x0p+0"), 0),
            # test-case #3, catastrophic error for n=1
            (sollya.parse("0x1.fc61a2p-121"), 1.0),
            # test-case #4 , k=14 < 0 not supported by bigfloat
            # (sollya.parse("0x1.ad067ap-66"), -14),
        ]
        # NOTE: expected value assumed 32-bit precision output
        fp_32_only = [
            #
            (sollya.parse("0x1.80bb0ep+70"), 377778829,
             sollya.parse("0x1.000002p+0")),
        ]
        # NOTE: the following test-case are only valid if meta-function supports 64-bit integer
        #       2nd_input
        fp_64_only = [
            (sollya.parse("0x1.fffffffffffffp+1023"), -1,
             sollya.parse("0x0.4000000000000p-1022")),
            (sollya.parse("-0x1.fffffffffffffp1023"), -1,
             sollya.parse("-0x0.4000000000000p-1022")),
            #(sollya.parse("-0x1.fffffffffffffp+1023"), 1),
            #(sollya.parse("0x1.fffffffffffffp+1023"), -1),
            # ERROR coucou8: rootn: inf ulp error at {-inf, 1854324695}: *-inf vs. -0x1.0000066bfdd60p+0
            (FP_MinusInfty(self.precision), 1854324695,
             FP_MinusInfty(self.precision)),
            # ERROR: rootn: -60.962402 ulp error at {0x0.000000001d600p-1022, 14}: *0x1.67d4ff97d1fd9p-76 vs. 0x1.67d4ff97d1f9cp-76
            (sollya.parse("0x0.000000001d600p-1022"), 14,
             sollya.parse("0x1.67d4ff97d1fd9p-76")),
            # ERROR: rootn: -430452000.000000 ulp error at {0x1.ffffffff38c00p-306, 384017876}: *0x1.ffffed870ff01p-1 vs. 0x1.ffffebec8d1d2p-1
            (sollya.parse("0x1.ffffffff38c00p-306"), 384017876,
             sollya.parse("0x1.ffffed870ff01p-1")),  # vs. 0x1.ffffebec8d1d2p-1
            # ERROR: rootn: 92996584.000000 ulp error at {0x1.ffffffffdae80p-858, -888750231}: *0x1.00000b36b1173p+0 vs. 0x1.00000b8f6155ep+0
            (sollya.parse("0x1.ffffffffdae80p-858"), -888750231,
             sollya.parse("0x1.00000b36b1173p+0")),
            # ERROR: rootn: 379474.906250 ulp error at {0x0.0000000000022p-1022, -1538297900}: *0x1.00000814a68ffp+0 vs. 0x1.0000081503352p+0
            (sollya.parse("0x0.00000006abfffp-1022"), -1221802473,
             sollya.parse("0x1.00000a01818a4p+0")),
            (sollya.parse("0x1.ffffffffd0a00p-260"), 1108043946,
             sollya.parse("0x1.fffffa9042997p-1")),
            (sollya.parse("0x1.3fffffffff1c0p-927"), -1997086266,
             sollya.parse("0x1.0000056564c5ep+0")),
            (sollya.parse("0x1.ffffffff38c00p-306"), 384017876,
             sollya.parse("0x1.ffffed870ff01p-1")),
            (sollya.parse("0x0.15c000000002ap-1022"), 740015941,
             sollya.parse("0x1.ffffdfc47b57ep-1")),
            (sollya.parse("0x0.00000000227ffp-1022"), -1859058847,
             sollya.parse("0x1.0000069c7a01bp+0")),
            (sollya.parse("0x0.0568000000012p-1022"), -447352599,
             sollya.parse("0x1.00001ab640c38p+0")),
            (sollya.parse("0x0.000000000000dp-1022"), 132283432,
             sollya.parse("0x1.ffff43d1db82ap-1")),
            (sollya.parse("-0x1.c80000000026ap+1023"), 275148531,
             sollya.parse("-0x1.00002b45a7314p+0")),
            (sollya.parse("0x0.022200000000ep-1022"), -1969769414,
             sollya.parse("0x1.000006130e858p+0")),
            (sollya.parse("0x0.0000000000011p-1022"), 851990770,
             sollya.parse("0x1.ffffe2cafaff6p-1")),
            (sollya.parse("0x1.8fffffffff348p-1010"), 526938360,
             sollya.parse("0x1.ffffd372e2b81p-1")),
            (sollya.parse("0x0.0000000000317p-1022"), -1315106194,
             sollya.parse("0x1.0000096973ac9p+0")),
            (sollya.parse("0x1.1ffffffff2d20p-971"), 378658008,
             sollya.parse("0x1.ffffc45e803b2p-1")),
            #
            (sollya.parse("0x0.0568000000012p-1022"), -447352599,
             sollya.parse("0x1.00001ab640c38p+0")),
            #
            (sollya.parse("0x1.ffffffffd0a00p-260"), 1108043946,
             sollya.parse("0x1.fffffa9042997p-1")),
            (FP_MinusZero(self.precision), -21015979,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -85403731,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -180488973,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1365227287,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1802885579,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1681209663,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1152797721,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -1614890585,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -812655517,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -628647891,
             FP_MinusInfty(self.precision)),
            (sollya.parse("0x1.ffffffffdae80p-858"), -888750231,
             sollya.parse("0x1.00000b36b1173p+0")),
            (sollya.parse("0x0.0568000000012p-1022"), -447352599,
             sollya.parse("0x1.00001ab640c38p+0")),
            (sollya.parse("0x0.00000006abfffp-1022"), -1221802473,
             sollya.parse("0x1.00000a01818a4p+0")),
            (sollya.parse("0x0.0000000000022p-1022"), -1538297900,
             sollya.parse("0x1.00000814a68ffp+0")),
            #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -1889147085}: *-inf vs. inf
            #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -373548013}: *-inf vs. inf
            (FP_MinusZero(self.precision), -1889147085,
             FP_MinusInfty(self.precision)),
            (FP_MinusZero(self.precision), -373548013,
             FP_MinusInfty(self.precision)),
            #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -1889147085}: *-inf vs. inf
            #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -373548013}: *-inf vs. inf
            # [email protected]: PE 0: error[84]: ml_rootn(-0x1.b1a6765727e72p-902, -7.734955e+08/-773495525), result is -0x1.00000d8cb5b3cp+0 vs expected [nan;nan]
            (sollya.parse("-0x1.b1a6765727e72p-902"), -773495525),
            # ERROR: rootn: -40564819207303340847894502572032.000000 ulp error at {-0x0.fffffffffffffp-1022, 1}: *-0x0.fffffffffffffp-1022 vs. -0x1.ffffffffffffep-970
            (sollya.parse("-0x0.fffffffffffffp-1022 "), 1,
             sollya.parse("-0x0.fffffffffffffp-1022 ")),
            # ERROR: rootn: 1125899906842624.000000 ulp error at {-0x1.fffffffffffffp+1023, -1}: *-0x0.4000000000000p-1022 vs. -0x0.0000000000000p+0
            (sollya.parse("-0x1.fffffffffffffp+1023"), -1,
             sollya.parse("-0x0.4000000000000p-1022")),
            (sollya.parse("0x1.fffffffffffffp+1023"), -1,
             sollya.parse("0x0.4000000000000p-1022")),
        ]

        return (fp_64_only if self.precision.get_bit_size() >= 64 else []) \
               + (fp_32_only if self.precision.get_bit_size() == 32 else []) \
               + general_list
Exemple #29
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Exemple #30
0
    def generate_scalar_scheme(self, vx, n):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        n.set_attributes(tag="n")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # rootn(x, n) = x^(1/n)
        #             = exp(1/n * log(x))
        #             = 2^(1/n * log2(x))
        #             = 2^(1/n * (log2(m) + e))
        #

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        use_reciprocal = False

        # non-scaled vx used to compute vx^1
        unmodified_vx = vx

        is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal")
        exp_correction_factor = self.precision.get_mantissa_size()
        mantissa_factor = Constant(2**exp_correction_factor,
                                   tag="mantissa_factor")
        vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx")

        m = MantissaExtraction(vx, tag="m", precision=self.precision)
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        e = Select(is_subnormal,
                   e - exp_correction_factor,
                   e,
                   tag="corrected_e")

        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision,
                                                     basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(
            log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(
            Abs(m, precision=self.precision), log_f, inv_approx_table,
            log_table)
        # floating-point version of n
        n_f = Conversion(n, precision=self.precision, tag="n_f")
        inv_n = Division(Constant(1, precision=self.precision), n_f)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision),
                            log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        if use_reciprocal:
            r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi)
        else:
            r = Division(log_approx, n_f, tag="r", debug=debug_multi)

        # e_n ~ e / n
        e_f = Conversion(e, precision=self.precision, tag="e_f")
        if use_reciprocal:
            e_n = Multiplication(e_f, inv_n, tag="e_n")
        else:
            e_n = Division(e_f, n_f, tag="e_n")
        error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n")
        e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int")
        pre_e_n_frac = e_n - e_n_int
        pre_e_n_frac.set_attributes(tag="pre_e_n_frac")
        e_n_frac = pre_e_n_frac + error_e_n * inv_n
        e_n_frac.set_attributes(tag="e_n_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)
        exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True)
        exp2_r.set_attributes(tag="exp2_r", debug=debug_multi)

        exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac,
                                                       inline_select=True)
        exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi)

        exp2_e_n_int = ExponentInsertion(Conversion(e_n_int,
                                                    precision=int_precision),
                                         precision=self.precision,
                                         tag="exp2_e_n_int")

        n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi)
        n_is_odd = LogicalNot(n_is_even, tag="n_is_odd")
        result_sign = Select(
            n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        # managing n == -1
        if self.expand_div:
            ml_division_args = ML_Division.get_default_args(
                precision=self.precision, input_formats=[self.precision] * 2)
            ml_division = ML_Division(ml_division_args)
            self.division_implementation = ml_division.implementation
            self.division_implementation.set_scheme(
                ml_division.generate_scheme())
            ml_division_fct = self.division_implementation.get_function_object(
            )
        else:
            ml_division_fct = Division

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = ConditionBlock(
            LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)),
                      LogicalAnd(n_is_even, vx < 0)),
            Return(FP_QNaN(self.precision)),
            Statement(
                ConditionBlock(
                    Equal(n, -1, tag="n_is_mone"),
                    #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)),
                    Return(
                        ml_division_fct(Constant(1, precision=self.precision),
                                        unmodified_vx,
                                        tag="div_res",
                                        precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±inf, n) is +∞ for even n< 0.
                    Test(vx, specifier=Test.IsInfty),
                    Statement(
                        ConditionBlock(
                            n < 0,
                            #LogicalAnd(n_is_odd, n < 0),
                            Return(
                                Select(Test(vx,
                                            specifier=Test.IsPositiveInfty),
                                       Constant(FP_PlusZero(self.precision),
                                                precision=self.precision),
                                       Constant(FP_MinusZero(self.precision),
                                                precision=self.precision),
                                       precision=self.precision)),
                            Return(vx),
                        ), ),
                ),
                ConditionBlock(
                    # rootn(±0, n) is ±∞ for odd n < 0.
                    LogicalAnd(LogicalAnd(n_is_odd, n < 0),
                               Equal(vx, 0),
                               tag="n_is_odd_and_neg"),
                    Return(
                        Select(Test(vx, specifier=Test.IsPositiveZero),
                               Constant(FP_PlusInfty(self.precision),
                                        precision=self.precision),
                               Constant(FP_MinusInfty(self.precision),
                                        precision=self.precision),
                               precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±0, n) is +∞ for even n< 0.
                    LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)),
                    Return(FP_PlusInfty(self.precision))),
                ConditionBlock(
                    # rootn(±0, n) is +0 for even n > 0.
                    LogicalAnd(n_is_even, Equal(vx, 0)),
                    Return(vx)),
                ConditionBlock(
                    Equal(n, 1), Return(unmodified_vx),
                    Return(result_sign * exp2_r * exp2_e_n_int *
                           exp2_e_n_frac))))
        return result