예제 #1
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
예제 #2
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
예제 #3
0
 def numeric_emulate(self, input_value):
   return expm1(input_value)