예제 #1
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
예제 #2
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
예제 #3
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        r_interval = Interval(-0.5, 0.5)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        Log.report(Log.Info, "ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        Log.report(Log.Info, "error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        #Argument Reduction
        vx_int = NearestInteger(vx,
                                precision=int_precision,
                                tag='vx_int',
                                debug=debug_multi)
        vx_intf = Conversion(vx_int, precision=self.precision)
        vx_r = vx - vx_intf
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        Log.report(Log.Info, "poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_r,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        Log.report(Log.Info, "oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False)

        #Reconstruction
        result = exp_X * poly + exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        return_inf = Return(FP_PlusInfty(self.precision))
        return_C0 = Return(C0)
        return_sub = Return(subnormal_result)
        return_std = Return(result)

        non_std_statement = Statement(
            ConditionBlock(
                test_overflow, return_inf,
                ConditionBlock(test_subnormal, return_sub, return_C0)))

        scheme = Statement(
            ConditionBlock(test_std, non_std_statement, return_std))

        return scheme
예제 #4
0
    def generate_scalar_scheme(self, vx, inline_select=False):
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # r_interval = Interval(0, 1.0)
        index_size = 3
        r_interval = Interval(-2**(-index_size), 2**-index_size)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        Log.report(Log.Info, "ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        Log.report(Log.Info, "error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # Argument Reduction
        # r = x - floor(x), r >= 0
        vx_floor = Floor(vx,
                         precision=self.precision,
                         tag='vx_floor',
                         debug=debug_multi)
        vx_int = Conversion(vx_floor,
                            precision=int_precision,
                            tag="vx_int",
                            debug=debug_multi)
        vx_intf = vx_floor  # Conversion(vx_int, precision = self.precision)
        vx_r = vx - vx_intf
        r_hi = NearestInteger(vx_r * 2**index_size,
                              precision=self.precision,
                              tag="r_hi",
                              debug=debug_multi)
        # clamping r_hi_int within table-size to make sure
        # it does not exceeds hi_part_table when used to index it
        r_hi_int = Max(
            Min(
                Conversion(r_hi,
                           precision=int_precision,
                           tag="r_hi_int",
                           debug=debug_multi), 2**index_size + 1), 0)
        r_lo = vx_r - r_hi * 2**-index_size
        r_lo.set_attributes(tag="r_lo", debug=debug_multi)
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        Log.report(Log.Info, "poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         r_lo,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        hi_part_table = ML_NewTable(dimensions=[2**index_size + 1],
                                    storage_precision=self.precision,
                                    tag=self.uniquify_name("exp2_table"),
                                    const=True)
        for i in range(2**index_size + 1):
            input_value = i * 2**-index_size
            tab_value = self.precision.round_sollya_object(
                sollya.SollyaObject(2)**(input_value))
            hi_part_table[i] = tab_value

        hi_part_value = TableLoad(hi_part_table,
                                  r_hi_int,
                                  precision=self.precision,
                                  tag="hi_part_value",
                                  debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        Log.report(Log.Info, "oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = hi_part_value * exp_offset * exp_min * poly + hi_part_value * exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False,
                             debug=debug_multi)

        #Reconstruction
        result = hi_part_value * exp_X * poly + hi_part_value * exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        if inline_select:
            scheme = Select(
                test_std,
                Select(test_overflow, FP_PlusInfty(self.precision),
                       Select(
                           test_subnormal,
                           subnormal_result,
                           C0,
                       )),
                result,
            )
            return scheme

        else:
            return_inf = Return(FP_PlusInfty(self.precision))
            return_C0 = Return(C0)
            return_sub = Return(subnormal_result)
            return_std = Return(result)

            non_std_statement = Statement(
                ConditionBlock(
                    test_overflow, return_inf,
                    ConditionBlock(test_subnormal, return_sub, return_C0)))

            scheme = Statement(
                ConditionBlock(test_std, non_std_statement, return_std))

            return scheme
예제 #5
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()