Ejemplo n.º 1
0
    def generate_approx_poly_near_zero(self, function, high_bound, error_bound,
                                       variable):
        """ Generate polynomial approximation scheme """
        error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
            p - f, ai)
        # Some issues encountered when 0 is one of the interval bound
        # so we use a symetric interval around it
        approx_interval = Interval(2**-100, high_bound)
        local_function = function / sollya.x

        degree = sollya.sup(
            sollya.guessdegree(local_function, approx_interval, error_bound))
        degree_list = range(0, int(degree) + 4, 2)

        poly_object, approx_error = Polynomial.build_from_approximation_with_error(
            function / sollya.x,
            degree_list, [1] + [self.precision] * (len(degree_list) - 1),
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(
            Log.Info, "approximation poly: {}\n  with error {}".format(
                poly_object, approx_error))

        poly_scheme = Multiplication(
            variable,
            PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, variable, self.precision))
        return poly_scheme, approx_error
Ejemplo n.º 2
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Ejemplo n.º 3
0
        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            # The main table is indexed by the 7 most significant bits
            # of the mantissa
            table_index = inv_approx_table.index_function(_vx_mant)
            table_index.set_attributes(tag="table_index", debug=debuglld)

            # argument reduction
            # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-inv_approx_table.index_size
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() * 1.1))) + 1
            sollya.settings.display = sollya.hexadecimal
            global_poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                log2(1 + sollya.x) / sollya.x,
                poly_degree, [self.precision] * (poly_degree + 1),
                approx_interval,
                sollya.absolute,
                error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
                    p - f, ai))
            Log.report(
                Log.Info, "poly_degree={}, approx_error={}".format(
                    poly_degree, approx_error))
            poly_object = global_poly_object.sub_poly(start_index=1, offset=1)
            #poly_object = global_poly_object.sub_poly(start_index=0,offset=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly = FMA(pre_poly, _red_vx,
                        global_poly_object.get_cst_coeff(0, self.precision))
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            Log.report(
                Log.Verbose, "sollya global_poly_object: {}".format(
                    global_poly_object.get_sollya_object()))
            Log.report(
                Log.Verbose, "sollya poly_object: {}".format(
                    poly_object.get_sollya_object()))

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision)
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx
Ejemplo n.º 4
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
Ejemplo n.º 5
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Ejemplo n.º 6
0
    def generate_scalar_scheme(self, vx):
        # approximation the gamma function
        abs_vx = Abs(vx, precision=self.precision)

        FCT_LIMIT = 1.0

        omega_value = self.precision.get_omega()

        def sollya_wrap_bigfloat_fct(bfct):
            """ wrap bigfloat's function <bfct> such that is can be used
                on SollyaObject inputs and returns SollyaObject results """
            def fct(x):
                return sollya.SollyaObject(bfct(SollyaObject(x).bigfloat()))

            return fct

        sollya_gamma = sollya_wrap_bigfloat_fct(bigfloat.gamma)
        sollya_digamma = sollya_wrap_bigfloat_fct(bigfloat.digamma)
        # first derivative of gamma is digamma * gamma
        bigfloat_gamma_d0 = lambda x: bigfloat.gamma(x) * bigfloat.digamma(x)
        sollya_gamma_d0 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d0)

        # approximating trigamma with straightforward derivatives formulae of digamma
        U = 2**-64
        bigfloat_trigamma = lambda x: (
            (bigfloat.digamma(x * (1 + U)) - bigfloat.digamma(x)) / (x * U))
        sollya_trigamma = sollya_wrap_bigfloat_fct(bigfloat_trigamma)

        bigfloat_gamma_d1 = lambda x: (bigfloat_trigamma(x) * bigfloat.gamma(
            x) + bigfloat_gamma_d0(x) * bigfloat.digamma(x))
        sollya_gamma_d1 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d1)

        def sollya_gamma_fct(x, diff_order, prec):
            """ wrapper to use bigfloat implementation of exponential
                rather than sollya's implementation directly.
                This wrapper implements sollya's function API.

                :param x: numerical input value (may be an Interval)
                :param diff_order: differential order
                :param prec: numerical precision expected (min)
            """
            fct = None
            if diff_order == 0:
                fct = sollya_gamma
            elif diff_order == 1:
                fct = sollya_gamma_d0
            elif diff_order == 2:
                fct = sollya_gamma_d1
            else:
                raise NotImplementedError
            with bigfloat.precision(prec):
                if x.is_range():
                    lo = sollya.inf(x)
                    hi = sollya.sup(x)
                    return sollya.Interval(fct(lo), fct(hi))
                else:
                    return fct(x)

        # search the lower x such that gamma(x) >= omega
        omega_upper_limit = search_bound_threshold(sollya_gamma, omega_value,
                                                   2, 1000.0, self.precision)
        Log.report(Log.Debug, "gamma(x) = {} limit is {}", omega_value,
                   omega_upper_limit)

        # evaluate gamma(<min-normal-value>)
        lower_x_bound = self.precision.get_min_normal_value()
        value_min = sollya_gamma(lower_x_bound)
        Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound,
                   value_min, int(sollya.log2(value_min)))

        # evaluate gamma(<min-subnormal-value>)
        lower_x_bound = self.precision.get_min_subnormal_value()
        value_min = sollya_gamma(lower_x_bound)
        Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound,
                   value_min, int(sollya.log2(value_min)))

        # Gamma is defined such that gamma(x+1) = x * gamma(x)
        #
        # we approximate gamma over [1, 2]
        # y in [1, 2]
        # gamma(y) = (y-1) * gamma(y-1)
        # gamma(y-1) = gamma(y) / (y-1)
        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(1, 2)
        approx_fct = sollya.function(sollya_gamma_fct)
        poly_degree = int(
            sup(
                guessdegree(approx_fct, approx_interval, S2**
                            -(self.precision.get_field_size() + 5)))) + 1
        Log.report(Log.Debug, "approximation's poly degree over [1, 2] is {}",
                   poly_degree)

        sys.exit(1)

        poly_degree_list = list(range(1, poly_degree, 2))
        Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree,
                   poly_degree_list)
        global_poly_object = Polynomial.build_from_approximation(
            approx_fct, poly_degree_list,
            [self.precision] * len(poly_degree_list), approx_interval,
            sollya.relative)
        Log.report(
            Log.Debug, "inform is {}",
            dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(),
                         approx_interval))
        poly_object = global_poly_object.sub_poly(start_index=1, offset=1)

        ext_precision = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
        }[self.precision]

        pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, abs_vx, unified_precision=self.precision)

        result = FMA(pre_poly, abs_vx, abs_vx)
        result.set_attributes(tag="result", debug=debug_multi)

        eps_target = S2**-(self.precision.get_field_size() + 5)

        def offset_div_function(fct):
            return lambda offset: fct(sollya.x + offset)

        # empiral numbers
        field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision]

        near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision)
        near_approx = generic_poly_split(offset_div_function(sollya.erf),
                                         near_indexing, eps_target,
                                         self.precision, abs_vx)
        near_approx.set_attributes(tag="near_approx", debug=debug_multi)

        def offset_function(fct):
            return lambda offset: fct(sollya.x + offset)

        medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision)

        medium_approx = generic_poly_split(offset_function(sollya.erf),
                                           medium_indexing, eps_target,
                                           self.precision, abs_vx)
        medium_approx.set_attributes(tag="medium_approx", debug=debug_multi)

        # approximation for positive values
        scheme = ConditionBlock(
            abs_vx < eps, Return(result),
            ConditionBlock(
                abs_vx < near_indexing.get_max_bound(), Return(near_approx),
                ConditionBlock(abs_vx < medium_indexing.get_max_bound(),
                               Return(medium_approx),
                               Return(Constant(1.0,
                                               precision=self.precision)))))
        return scheme
Ejemplo n.º 7
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
Ejemplo n.º 8
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def SincosRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 8
        cw_hi_precision = self.precision.get_field_size() - 4

        ext_precision = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_Binary64
        }[self.precision]

        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        if self.precision is ML_Binary32:
            ph_bound = S2**10
        else:
            ph_bound = S2**33

        test_ph_bound = Comparison(vx,
                                   ph_bound,
                                   specifier=Comparison.GreaterOrEqual,
                                   precision=ML_Bool,
                                   likely=False)

        # argument reduction
        # m
        frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision]

        C0 = Constant(0, precision=int_precision)
        C1 = Constant(1, precision=int_precision)
        C_offset = Constant(3 * S2**(frac_pi_index - 1),
                            precision=int_precision)

        # 2^m / pi
        frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN)
        frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision,
                           sollya.RN)
        # pi / 2^m, high part
        inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN)
        # pi / 2^m, low part
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)

        # computing k
        vx.set_attributes(tag="vx", debug=debug_multi)

        vx_pi = Addition(Multiplication(vx,
                                        Constant(frac_pi,
                                                 precision=self.precision),
                                        precision=self.precision),
                         Multiplication(vx,
                                        Constant(frac_pi_lo,
                                                 precision=self.precision),
                                        precision=self.precision),
                         precision=self.precision,
                         tag="vx_pi",
                         debug=debug_multi)

        k = NearestInteger(vx_pi,
                           precision=int_precision,
                           tag="k",
                           debug=debug_multi)
        # k in floating-point precision
        fk = Conversion(k,
                        precision=self.precision,
                        tag="fk",
                        debug=debug_multi)

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision,
                                   debug=debug_multi)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision,
                                      debug=debug_multi)

        # Cody-Waite reduction
        red_coeff1 = Multiplication(fk,
                                    inv_frac_pi_cst,
                                    precision=self.precision,
                                    exact=True)
        red_coeff2 = Multiplication(Negation(fk, precision=self.precision),
                                    inv_frac_pi_lo_cst,
                                    precision=self.precision,
                                    exact=True)

        # Should be exact / Sterbenz' Lemma
        pre_sub_mul = Subtraction(vx,
                                  red_coeff1,
                                  precision=self.precision,
                                  exact=True)

        # Fast2Sum
        s = Addition(pre_sub_mul,
                     red_coeff2,
                     precision=self.precision,
                     unbreakable=True,
                     tag="s",
                     debug=debug_multi)
        z = Subtraction(s,
                        pre_sub_mul,
                        precision=self.precision,
                        unbreakable=True,
                        tag="z",
                        debug=debug_multi)
        t = Subtraction(red_coeff2,
                        z,
                        precision=self.precision,
                        unbreakable=True,
                        tag="t",
                        debug=debug_multi)

        red_vx_std = Addition(s, t, precision=self.precision)
        red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi)

        # To compute sine we offset x by 3pi/2
        # which means add 3  * S2^(frac_pi_index-1) to k
        if self.sin_output:
            Log.report(Log.Info, "Computing Sin")
            offset_k = Addition(k,
                                C_offset,
                                precision=int_precision,
                                tag="offset_k")
        else:
            Log.report(Log.Info, "Computing Cos")
            offset_k = k

        modk = Variable("modk",
                        precision=int_precision,
                        var_type=Variable.Local)
        red_vx = Variable("red_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        # Faster modulo using bitwise logic
        modk_std = BitLogicAnd(offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="modk",
                               debug=debug_multi)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        red_vx.set_interval(approx_interval)

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        Log.report(Log.Info,
                   "building tabulated approximation for sin and cos")

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        table_index_size = frac_pi_index + 1
        cos_table = ML_NewTable(dimensions=[2**table_index_size, 1],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("cos_table"))

        for i in range(2**(frac_pi_index + 1)):
            local_x = i * pi / S2**frac_pi_index
            cos_local = round(cos(local_x), self.precision.get_sollya_object(),
                              sollya.RN)
            cos_table[i][0] = cos_local

        sin_index = Modulo(modk + 2**(frac_pi_index - 1),
                           2**(frac_pi_index + 1),
                           precision=int_precision,
                           tag="sin_index")  #, debug = debug_multi)
        tabulated_cos = TableLoad(cos_table,
                                  modk,
                                  C0,
                                  precision=self.precision,
                                  tag="tab_cos",
                                  debug=debug_multi)
        tabulated_sin = -TableLoad(cos_table,
                                   sin_index,
                                   C0,
                                   precision=self.precision,
                                   tag="tab_sin",
                                   debug=debug_multi)

        poly_degree_cos = sup(
            guessdegree(cos(sollya.x), approx_interval, S2**
                        -self.precision.get_precision()) + 2)
        poly_degree_sin = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -self.precision.get_precision()) + 2)

        poly_degree_cos_list = range(0, int(poly_degree_cos) + 3)
        poly_degree_sin_list = range(0, int(poly_degree_sin) + 3)

        # cosine polynomial: limiting first and second coefficient precision to 1-bit
        poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list)
        # sine polynomial: limiting first coefficient precision to 1-bit
        poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
        Log.report(Log.Info,
                   "building mathematical polynomials for sin and cos")
        # Polynomial approximations
        Log.report(Log.Info, "cos")
        poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error(
            cos(sollya.x),
            poly_degree_cos_list,
            poly_cos_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(Log.Info, "sin")
        poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error(
            sin(sollya.x),
            poly_degree_sin_list,
            poly_sin_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        Log.report(
            Log.Info, "poly error cos: {} / {:d}".format(
                poly_error_cos, int(sollya.log2(poly_error_cos))))
        Log.report(
            Log.Info, "poly error sin: {0} / {1:d}".format(
                poly_error_sin, int(sollya.log2(poly_error_sin))))
        Log.report(Log.Info, "poly cos : %s" % poly_object_cos)
        Log.report(Log.Info, "poly sin : %s" % poly_object_sin)

        # Polynomial evaluation scheme
        poly_cos = polynomial_scheme_builder(
            poly_object_cos.sub_poly(start_index=1),
            red_vx,
            unified_precision=self.precision)
        poly_sin = polynomial_scheme_builder(
            poly_object_sin.sub_poly(start_index=2),
            red_vx,
            unified_precision=self.precision)
        poly_cos.set_attributes(tag="poly_cos", debug=debug_multi)
        poly_sin.set_attributes(tag="poly_sin",
                                debug=debug_multi,
                                unbreakable=True)

        # TwoProductFMA
        mul_cos_x = tabulated_cos * poly_cos
        mul_cos_y = FusedMultiplyAdd(tabulated_cos,
                                     poly_cos,
                                     -mul_cos_x,
                                     precision=self.precision)

        mul_sin_x = tabulated_sin * poly_sin
        mul_sin_y = FusedMultiplyAdd(tabulated_sin,
                                     poly_sin,
                                     -mul_sin_x,
                                     precision=self.precision)

        mul_coeff_sin_hi = tabulated_sin * red_vx
        mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx,
                                            -mul_coeff_sin_hi)

        mul_cos = Addition(mul_cos_x,
                           mul_cos_y,
                           precision=self.precision,
                           tag="mul_cos")  #, debug = debug_multi)
        mul_sin = Negation(Addition(mul_sin_x,
                                    mul_sin_y,
                                    precision=self.precision),
                           precision=self.precision,
                           tag="mul_sin")  #, debug = debug_multi)
        mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi,
                                          mul_coeff_sin_lo,
                                          precision=self.precision),
                                 precision=self.precision,
                                 tag="mul_coeff_sin")  #, debug = debug_multi)

        mul_cos_x.set_attributes(
            tag="mul_cos_x", precision=self.precision)  #, debug = debug_multi)
        mul_cos_y.set_attributes(
            tag="mul_cos_y", precision=self.precision)  #, debug = debug_multi)
        mul_sin_x.set_attributes(
            tag="mul_sin_x", precision=self.precision)  #, debug = debug_multi)
        mul_sin_y.set_attributes(
            tag="mul_sin_y", precision=self.precision)  #, debug = debug_multi)

        cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos)

        cos_eval_d_1.set_attributes(tag="cos_eval_d_1",
                                    precision=self.precision,
                                    debug=debug_multi)

        result_1 = Statement(Return(cos_eval_d_1))

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################
        # payne and hanek argument reduction for large arguments
        ph_k = frac_pi_index
        ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN)
        ph_inv_frac_pi = pi / S2**ph_k

        ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx,
                                                                ph_frac_pi,
                                                                self.precision,
                                                                n=100,
                                                                k=ph_k)

        # assigning Large Argument Reduction reduced variable
        lar_vx = Variable("lar_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        lar_red_vx = Addition(Multiplication(lar_vx,
                                             inv_frac_pi,
                                             precision=self.precision),
                              Multiplication(lar_vx,
                                             inv_frac_pi_lo,
                                             precision=self.precision),
                              precision=self.precision,
                              tag="lar_red_vx",
                              debug=debug_multi)

        C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32")
        ph_acc_int_red = Select(ph_acc_int < C0,
                                C32 + ph_acc_int,
                                ph_acc_int,
                                precision=int_precision,
                                tag="ph_acc_int_red")
        if self.sin_output:
            lar_offset_k = Addition(ph_acc_int_red,
                                    C_offset,
                                    precision=int_precision,
                                    tag="lar_offset_k")
        else:
            lar_offset_k = ph_acc_int_red

        ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi)
        lar_modk = BitLogicAnd(lar_offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="lar_modk",
                               debug=debug_multi)

        lar_statement = Statement(ph_statement,
                                  ReferenceAssign(lar_vx,
                                                  ph_acc,
                                                  debug=debug_multi),
                                  ReferenceAssign(red_vx,
                                                  lar_red_vx,
                                                  debug=debug_multi),
                                  ReferenceAssign(modk, lar_modk),
                                  prevent_optimization=True)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               tag="NaN_or_Inf",
                               debug=debug_multi)
        return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision)))

        scheme = ConditionBlock(
            test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf),
            Statement(
                modk, red_vx,
                ConditionBlock(
                    test_ph_bound, lar_statement,
                    Statement(
                        ReferenceAssign(modk, modk_std),
                        ReferenceAssign(red_vx, red_vx_std),
                    )), result_1))

        return scheme
Ejemplo n.º 9
0
    def generate_scalar_scheme(self, vx):
        abs_vx = Abs(vx, precision=self.precision)

        FCT_LIMIT = 1.0

        one_limit = search_bound_threshold(sollya.erf, FCT_LIMIT, 1.0, 10.0,
                                           self.precision)
        one_limit_exp = int(sollya.floor(sollya.log2(one_limit)))
        Log.report(Log.Debug, "erf(x) = 1.0 limit is {}, with exp={}",
                   one_limit, one_limit_exp)

        upper_approx_bound = 10

        # empiral numbers
        eps_exp = {ML_Binary32: -3, ML_Binary64: -5}[self.precision]
        eps = S2**eps_exp

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(0, eps)
        # fonction to approximate is erf(x) / x
        # it is an even function erf(x) / x = erf(-x) / (-x)
        approx_fct = sollya.erf(sollya.x) - (sollya.x)
        poly_degree = int(
            sup(
                guessdegree(approx_fct, approx_interval, S2**
                            -(self.precision.get_field_size() + 5)))) + 1

        poly_degree_list = list(range(1, poly_degree, 2))
        Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree,
                   poly_degree_list)
        global_poly_object = Polynomial.build_from_approximation(
            approx_fct, poly_degree_list,
            [self.precision] * len(poly_degree_list), approx_interval,
            sollya.relative)
        Log.report(
            Log.Debug, "inform is {}",
            dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(),
                         approx_interval))
        poly_object = global_poly_object.sub_poly(start_index=1, offset=1)

        ext_precision = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
        }[self.precision]

        pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, abs_vx, unified_precision=self.precision)

        result = FMA(pre_poly, abs_vx, abs_vx)
        result.set_attributes(tag="result", debug=debug_multi)

        eps_target = S2**-(self.precision.get_field_size() + 5)

        def offset_div_function(fct):
            return lambda offset: fct(sollya.x + offset)

        # empiral numbers
        field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision]

        near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision)
        near_approx = generic_poly_split(offset_div_function(sollya.erf),
                                         near_indexing, eps_target,
                                         self.precision, abs_vx)
        near_approx.set_attributes(tag="near_approx", debug=debug_multi)

        def offset_function(fct):
            return lambda offset: fct(sollya.x + offset)

        medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision)

        medium_approx = generic_poly_split(offset_function(sollya.erf),
                                           medium_indexing, eps_target,
                                           self.precision, abs_vx)
        medium_approx.set_attributes(tag="medium_approx", debug=debug_multi)

        # approximation for positive values
        scheme = ConditionBlock(
            abs_vx < eps, Return(result),
            ConditionBlock(
                abs_vx < near_indexing.get_max_bound(), Return(near_approx),
                ConditionBlock(abs_vx < medium_indexing.get_max_bound(),
                               Return(medium_approx),
                               Return(Constant(1.0,
                                               precision=self.precision)))))
        return scheme
Ejemplo n.º 10
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_intervals[0]))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.get_output_precision()
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(sollya.x), [0, 2],
            [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.get_output_precision())

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
Ejemplo n.º 11
0
  def generate_scheme(self): 
    # declaring CodeFunction and retrieving input variable
    vx = Abs(self.implementation.add_input_variable("x", self.precision), tag = "vx") 


    Log.report(Log.Info, "generating implementation scheme")
    if self.debug_flag: 
        Log.report(Log.Info, "debug has been enabled")

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    debug_precision = {ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx}[self.precision]


    test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
    test_nan        = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
    test_positive   = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

    test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
    return_snan        = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

    # return in case of infinity input
    infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
    # return in case of specific value input (NaN or inf)
    specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
    # return in case of standard (non-special) input

    sollya_precision = self.precision.get_sollya_object()
    hi_precision = self.precision.get_field_size() - 3


    

    # argument reduction
    frac_pi_index = 3
    frac_pi     = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
    inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
    inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN)
    # computing k = E(x * frac_pi)
    vx_pi = Multiplication(vx, frac_pi, precision = self.precision)
    k = NearestInteger(vx_pi, precision = ML_Int32, tag = "k", debug = True)
    fk = Conversion(k, precision = self.precision, tag = "fk")

    inv_frac_pi_cst    = Constant(inv_frac_pi, tag = "inv_frac_pi", precision = self.precision)
    inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag = "inv_frac_pi_lo", precision = self.precision)

    red_vx_hi = (vx - inv_frac_pi_cst * fk)
    red_vx_hi.set_attributes(tag = "red_vx_hi", debug = debug_precision, precision = self.precision)
    red_vx_lo_sub = inv_frac_pi_lo_cst * fk
    red_vx_lo_sub.set_attributes(tag = "red_vx_lo_sub", debug = debug_precision, unbreakable = True, precision = self.precision)
    vx_d = Conversion(vx, precision = ML_Binary64, tag = "vx_d")
    pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
    pre_red_vx_d_hi.set_attributes(tag = "pre_red_vx_d_hi", precision = ML_Binary64, debug = debug_lftolx)
    pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d.set_attributes(tag = "pre_red_vx_d", debug = debug_lftolx, precision = ML_Binary64)


    modk = Modulo(k, 2**(frac_pi_index+1), precision = ML_Int32, tag = "switch_value", debug = True)

    sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index-1)), 2**(frac_pi_index-1))
    red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
    red_vx.set_attributes(tag = "red_vx", debug = debug_precision, precision = self.precision)

    red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
    red_vx_d.set_attributes(tag = "red_vx_d", debug = debug_lftolx, precision = ML_Binary64)

    approx_interval = Interval(-pi/(S2**(frac_pi_index+1)), pi / S2**(frac_pi_index+1))

    Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

    error_goal_approx = S2**-self.precision.get_precision()


    Log.report(Log.Info, "building mathematical polynomial")
    poly_degree_vector = [None] * 2**(frac_pi_index+1)



    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

    #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

    index_relative = []

    poly_object_vector = [None] * 2**(frac_pi_index+1)
    for i in range(2**(frac_pi_index+1)):
      sub_func = cos(sollya.x+i*pi/S2**frac_pi_index)
      degree = int(sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1

      degree_list = range(degree+1)
      a_interval = approx_interval
      if i == 0:
        # ad-hoc, TODO: to be cleaned
        degree = 6
        degree_list = range(0, degree+1, 2)
      elif i % 2**(frac_pi_index) == 2**(frac_pi_index-1):
        # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x) 
        # must be generated
        degree_list = range(1, degree+1, 2)

      if i == 3 or i == 5 or i == 7 or i == 9: 
        precision_list =  [sollya.binary64] + [sollya.binary32] *(degree)
      else:
        precision_list = [sollya.binary32] * (degree+1)

      poly_degree_vector[i] = degree 

      constraint = sollya.absolute
      delta = (2**(frac_pi_index - 3))
      centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index-1)
      if centered_i < delta and centered_i > -delta and centered_i != 0:
        constraint = sollya.relative
        index_relative.append(i)
      Log.report(Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index+1)))
      poly_object_vector[i], _ = Polynomial.build_from_approximation_with_error(sub_func, degree_list, precision_list, a_interval, constraint, error_function = error_function) 


    # unified power map for red_sx^n
    upm = {}
    rel_error_list = []

    poly_scheme_vector = [None] * (2**(frac_pi_index+1))

    for i in range(2**(frac_pi_index+1)):
      poly_object = poly_object_vector[i]
      poly_precision = self.precision
      if i == 3 or i == 5 or i == 7 or i == 9: 
          poly_precision = ML_Binary64
          c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = ML_Binary64)
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
          poly_hi = (c0 + c1 * red_vx)
          poly_hi.set_precision(ML_Binary64)
          red_vx_d_2 = red_vx_d * red_vx_d
          poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(poly_object.sub_poly(start_index = 2, offset = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_attributes(unbreakable = True)
      elif i == 4:
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = ML_Binary64)
          poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_precision(ML_Binary64)
      else:
          poly_scheme = polynomial_scheme_builder(poly_object, red_vx, unified_precision = poly_precision, power_map_ = upm)
      #if i == 3:
      #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
      #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
      #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

      poly_scheme.set_attributes(tag = "poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug = debug_precision)
      poly_scheme_vector[i] = poly_scheme



      #try:
      if is_gappa_installed() and i == 3:
          opt_scheme = self.opt_engine.optimization_process(poly_scheme, self.precision, copy = True, fuse_fma = self.fuse_fma)

          tag_map = {}
          self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

          gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval)

          cg_eval_error_copy_map = {
              tag_map["red_vx"]:    gappa_vx, 
              tag_map["red_vx_d"]:  gappa_vx,
          }
Ejemplo n.º 12
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        approx_interval = Interval(0.0, 2**-index_size)
        error_goal_approx = 2**-(self.precision.get_precision())
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        vx_int = Floor(vx * 2**index_size,
                       precision=self.precision,
                       tag="vx_int",
                       debug=debug_multi)
        vx_frac = vx - (vx_int * 2**-index_size)
        vx_frac.set_attributes(tag="vx_frac",
                               debug=debug_multi,
                               unbreakable=True)
        poly_degree = sup(
            guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1
        precision_list = [1] + [self.precision] * (poly_degree)

        vx_integer = Conversion(vx_int,
                                precision=int_precision,
                                tag="vx_integer",
                                debug=debug_multi)
        vx_int_hi = BitLogicRightShift(vx_integer,
                                       Constant(index_size),
                                       tag="vx_int_hi",
                                       debug=debug_multi)
        vx_int_lo = Modulo(vx_integer,
                           2**index_size,
                           tag="vx_int_lo",
                           debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(vx_int_hi,
                                               precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp2_table = ML_Table(dimensions=[2 * 2**index_size, 2],
                              storage_precision=self.precision,
                              tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            exp2_value = SollyaObject(2)**((input_value) * 2**-index_size)
            hi_value = round(exp2_value, self.precision.get_sollya_object(),
                             RN)
            lo_value = round(exp2_value - hi_value,
                             self.precision.get_sollya_object(), RN)
            exp2_table[i][0] = lo_value
            exp2_table[i][1] = hi_value

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_frac,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        table_index = Addition(vx_int_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        lo_value_load = TableLoad(exp2_table,
                                  table_index,
                                  0,
                                  tag="lo_value_load",
                                  debug=debug_multi)
        hi_value_load = TableLoad(exp2_table,
                                  table_index,
                                  1,
                                  tag="hi_value_load",
                                  debug=debug_multi)

        result = (hi_value_load +
                  (hi_value_load * poly +
                   (lo_value_load + lo_value_load * poly))) * pow_exp
        ov_flag = Comparison(vx_int_hi,
                             Constant(self.precision.get_emax(),
                                      precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme
Ejemplo n.º 13
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = Abs(self.implementation.add_input_variable("x", self.precision),
                 tag="vx")

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        debug_precision = {
            ML_Binary32: debug_ftox,
            ML_Binary64: debug_lftolx
        }[self.precision]

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 3

        # argument reduction
        frac_pi_index = 3
        frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
        inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)
        # computing k = E(x * frac_pi)
        vx_pi = Multiplication(vx, frac_pi, precision=self.precision)
        k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True)
        fk = Conversion(k, precision=self.precision, tag="fk")

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision)

        red_vx_hi = (vx - inv_frac_pi_cst * fk)
        red_vx_hi.set_attributes(tag="red_vx_hi",
                                 debug=debug_precision,
                                 precision=self.precision)
        red_vx_lo_sub = inv_frac_pi_lo_cst * fk
        red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub",
                                     debug=debug_precision,
                                     unbreakable=True,
                                     precision=self.precision)
        vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d")
        pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
        pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi",
                                       precision=ML_Binary64,
                                       debug=debug_lftolx)
        pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d.set_attributes(tag="pre_red_vx_d",
                                    debug=debug_lftolx,
                                    precision=ML_Binary64)

        modk = Modulo(k,
                      2**(frac_pi_index + 1),
                      precision=ML_Int32,
                      tag="switch_value",
                      debug=True)

        sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)),
                      2**(frac_pi_index - 1))
        red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
        red_vx.set_attributes(tag="red_vx",
                              debug=debug_precision,
                              precision=self.precision)

        red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
        red_vx_d.set_attributes(tag="red_vx_d",
                                debug=debug_lftolx,
                                precision=ML_Binary64)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        error_goal_approx = S2**-self.precision.get_precision()

        Log.report(Log.Info, "building mathematical polynomial")
        poly_degree_vector = [None] * 2**(frac_pi_index + 1)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        index_relative = []

        poly_object_vector = [None] * 2**(frac_pi_index + 1)
        for i in range(2**(frac_pi_index + 1)):
            sub_func = cos(sollya.x + i * pi / S2**frac_pi_index)
            degree = int(
                sup(guessdegree(sub_func, approx_interval,
                                error_goal_approx))) + 1

            degree_list = range(degree + 1)
            a_interval = approx_interval
            if i == 0:
                # ad-hoc, TODO: to be cleaned
                degree = 6
                degree_list = range(0, degree + 1, 2)
            elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1):
                # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x)
                # must be generated
                degree_list = range(1, degree + 1, 2)

            if i == 3 or i == 5 or i == 7 or i == 9:
                precision_list = [sollya.binary64
                                  ] + [sollya.binary32] * (degree)
            else:
                precision_list = [sollya.binary32] * (degree + 1)

            poly_degree_vector[i] = degree

            constraint = sollya.absolute
            delta = (2**(frac_pi_index - 3))
            centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1)
            if centered_i < delta and centered_i > -delta and centered_i != 0:
                constraint = sollya.relative
                index_relative.append(i)
            Log.report(
                Log.Info, "generating approximation for %d/%d" %
                (i, 2**(frac_pi_index + 1)))
            poly_object_vector[
                i], _ = Polynomial.build_from_approximation_with_error(
                    sub_func,
                    degree_list,
                    precision_list,
                    a_interval,
                    constraint,
                    error_function=error_function)

        # unified power map for red_sx^n
        upm = {}
        rel_error_list = []

        poly_scheme_vector = [None] * (2**(frac_pi_index + 1))

        for i in range(2**(frac_pi_index + 1)):
            poly_object = poly_object_vector[i]
            poly_precision = self.precision
            if i == 3 or i == 5 or i == 7 or i == 9:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * red_vx)
                poly_hi.set_precision(ML_Binary64)
                red_vx_d_2 = red_vx_d * red_vx_d
                poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2, offset=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_attributes(unbreakable=True)
            elif i == 4:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=ML_Binary64)
                poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_precision(ML_Binary64)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    red_vx,
                    unified_precision=poly_precision,
                    power_map_=upm)
            #if i == 3:
            #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
            #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
            #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

            poly_scheme.set_attributes(tag="poly_cos%dpi%d" %
                                       (i, 2**(frac_pi_index)),
                                       debug=debug_precision)
            poly_scheme_vector[i] = poly_scheme

            #try:
            if is_gappa_installed() and i == 3:
                opt_scheme = self.opt_engine.optimization_process(
                    poly_scheme,
                    self.precision,
                    copy=True,
                    fuse_fma=self.fuse_fma)

                tag_map = {}
                self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

                gappa_vx = Variable("red_vx",
                                    precision=self.precision,
                                    interval=approx_interval)

                cg_eval_error_copy_map = {
                    tag_map["red_vx"]: gappa_vx,
                    tag_map["red_vx_d"]: gappa_vx,
                }

                print "opt_scheme"
                print opt_scheme.get_str(depth=None,
                                         display_precision=True,
                                         memoization_map={})

                eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_scheme,
                    cg_eval_error_copy_map,
                    gappa_filename="red_arg_%d.g" % i)
                poly_range = cos(approx_interval + i * pi / S2**frac_pi_index)
                rel_error_list.append(eval_error / poly_range)

        #for rel_error in rel_error_list:
        #  print sup(abs(rel_error))

        #return

        # case 17
        #poly17 = poly_object_vector[17]
        #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
        #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
        #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

        half = 2**frac_pi_index
        sub_half = 2**(frac_pi_index - 1)

        # determine if the reduced input is within the second and third quarter (not first nor fourth)
        # to negate the cosine output
        factor_cond = BitLogicAnd(BitLogicXor(
            BitLogicRightShift(modk, frac_pi_index),
            BitLogicRightShift(modk, frac_pi_index - 1)),
                                  1,
                                  tag="factor_cond",
                                  debug=True)

        CM1 = Constant(-1, precision=self.precision)
        C1 = Constant(1, precision=self.precision)
        factor = Select(factor_cond,
                        CM1,
                        C1,
                        tag="factor",
                        debug=debug_precision)
        factor2 = Select(Equal(modk, Constant(sub_half)),
                         CM1,
                         C1,
                         tag="factor2",
                         debug=debug_precision)

        switch_map = {}
        if 0:
            for i in range(2**(frac_pi_index + 1)):
                switch_map[i] = Return(poly_scheme_vector[i])
        else:
            for i in range(2**(frac_pi_index - 1)):
                switch_case = (i, half - i)
                #switch_map[i]      = Return(poly_scheme_vector[i])
                #switch_map[half-i] = Return(-poly_scheme_vector[i])
                if i != 0:
                    switch_case = switch_case + (half + i, 2 * half - i)
                    #switch_map[half+i] = Return(-poly_scheme_vector[i])
                    #switch_map[2*half-i] = Return(poly_scheme_vector[i])
                if poly_scheme_vector[i].get_precision() != self.precision:
                    poly_result = Conversion(poly_scheme_vector[i],
                                             precision=self.precision)
                else:
                    poly_result = poly_scheme_vector[i]
                switch_map[switch_case] = Return(factor * poly_result)
            #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
            #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
            switch_map[(sub_half, half + sub_half)] = Return(
                factor2 * poly_scheme_vector[sub_half])

        result = SwitchBlock(modk, switch_map)

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################

        # payne and hanek argument reduction for large arguments
        #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
        red_func_name = "payne_hanek_fp32_asm"
        payne_hanek_func_op = FunctionOperator(
            red_func_name,
            arg_map={0: FO_Arg(0)},
            require_header=["support_lib/ml_red_arg.h"])
        payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32],
                                          ML_Binary64, payne_hanek_func_op)
        payne_hanek_func_op.declare_prototype = payne_hanek_func
        #large_arg_red = FunctionCall(payne_hanek_func, vx)
        large_arg_red = payne_hanek_func(vx)
        red_bound = S2**20

        cond = Abs(vx) >= red_bound
        cond.set_attributes(tag="cond", likely=False)

        lar_neark = NearestInteger(large_arg_red, precision=ML_Int64)
        lar_modk = Modulo(lar_neark,
                          Constant(16, precision=ML_Int64),
                          tag="lar_modk",
                          debug=True)
        # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
        #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
        pre_lar_red_vx = large_arg_red - Conversion(lar_neark,
                                                    precision=ML_Binary64)
        pre_lar_red_vx.set_attributes(precision=ML_Binary64,
                                      debug=debug_lftolx,
                                      tag="pre_lar_red_vx")
        lar_red_vx = Conversion(pre_lar_red_vx,
                                precision=self.precision,
                                debug=debug_precision,
                                tag="lar_red_vx")
        lar_red_vx_lo = Conversion(
            pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64),
            precision=self.precision)
        lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo",
                                     precision=self.precision)

        lar_k = 3
        # large arg reduction Universal Power Map
        lar_upm = {}
        lar_switch_map = {}
        approx_interval = Interval(-0.5, 0.5)
        for i in range(2**(lar_k + 1)):
            frac_pi = pi / S2**lar_k
            func = cos(frac_pi * i + frac_pi * sollya.x)

            degree = 6
            error_mode = sollya.absolute
            if i % 2**(lar_k) == 2**(lar_k - 1):
                # close to sin(x) cases
                func = -sin(frac_pi * x) if i == 2**(lar_k -
                                                     1) else sin(frac_pi * x)
                degree_list = range(0, degree + 1, 2)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func / x, degree_list, precision_list, approx_interval,
                    error_mode)
                poly_object = poly_object.sub_poly(offset=-1)
            else:
                degree_list = range(degree + 1)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func, degree_list, precision_list, approx_interval,
                    error_mode)

            if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * lar_red_vx)
                poly_hi.set_precision(ML_Binary64)
                pre_poly_scheme = poly_hi + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                pre_poly_scheme.set_attributes(precision=ML_Binary64)
                poly_scheme = Conversion(pre_poly_scheme,
                                         precision=self.precision)
            elif i == 4 or i == 12:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                c3 = Constant(coeff(poly_object.get_sollya_object(), 3),
                              precision=self.precision)
                c5 = Constant(coeff(poly_object.get_sollya_object(), 5),
                              precision=self.precision)
                poly_hi = polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=3),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                poly_hi.set_attributes(tag="poly_lar_%d_hi" % i,
                                       precision=ML_Binary64)
                poly_scheme = Conversion(FusedMultiplyAdd(
                    c1, lar_red_vx, poly_hi, precision=ML_Binary64) +
                                         c1 * lar_red_vx_lo,
                                         precision=self.precision)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
            # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm)
            poly_scheme.set_attributes(tag="lar_poly_%d" % i,
                                       debug=debug_precision)
            lar_switch_map[(i, )] = Return(poly_scheme)

        lar_result = SwitchBlock(lar_modk, lar_switch_map)

        # main scheme
        #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        # scheme = Statement(ConditionBlock(cond, lar_result, result))

        Log.report(Log.Info, "Construction of the initial MDL scheme")
        scheme = Statement(pre_red_vx_d, red_vx_lo_sub,
                           ConditionBlock(cond, lar_result, result))

        return scheme
Ejemplo n.º 14
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=sollya.S2**-24):
    """ To be documented """
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(dimensions=[num_intervals, max_degree + 1],
                              storage_precision=precision,
                              tag="coeff_table")

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        #local_function = function(sollya.x)
        #local_interval = Interval(subint_low, subint_high)
        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = sollya.guessdegree(local_function, local_interval,
                                          error_threshold)
        degree = min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            degree_list = range(1, degree + 1)
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x),
                degree_list, [precision] * len(degree_list),
                Interval(-subint_high, subint_high),
                sollya.absolute,
                error_function=error_function)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                print("degree: {}".format(degree))
                raise err
        for ci in range(degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       precision=precision)
    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(Multiplication(diff,
                                                  delta_ratio,
                                                  precision=precision),
                                   precision=ML_Int32), num_intervals - 1),
                tag="index",
                debug=True,
                precision=ML_Int32)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=True)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Ejemplo n.º 15
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=int_precision)
            })
        print "r_eval_error: ", r_eval_error

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_Table(dimensions=[2 * 2**index_size, 4],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, self.precision.get_sollya_object(),
                                 RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value,
                                 self.precision.get_sollya_object(), RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        #
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        pos_exp = (
            pos_value_load_hi +
            (pos_value_load_hi * poly_pos +
             (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi +
            (neg_value_load_hi * poly_neg +
             (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(pos_exp,
                          neg_exp,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme