def __init__(self, args=DefaultArgTemplate):
    # initializing base class
    ML_FunctionBasis.__init__(self, args)
    # function specific arguments
    self.cgpe_index = args.cgpe_index
    self.tbl_index_size = args.tbl_index_size
    self.no_subnormal = args.no_subnormal
    self.no_fma = args.no_fma
    self.no_rcp = args.no_rcp
    self.log_radix = {
        "e": EXP_1,
        "2": S2,
        "10": S10
    }[args.log_radix]
    self.force_division = args.force_division

    # TODO: beware dict indexing is reliying on python matching numerical values
    #       to keys which may have different construction methods (you try to make
    #       sure every value is constructed through sollya.parse, but this is
    #       not really safe): to be IMPROVED
    LOG_EMUL_FCT_MAP = {
        S2: sollya.log2,
        EXP_1: sollya.log,
        S10: sollya.log10
    }
    if self.log_radix in LOG_EMUL_FCT_MAP:
        Log.report(Log.Info, "radix {} is part of standard radices", self.log_radix)
        self.log_emulation_function = LOG_EMUL_FCT_MAP[self.log_radix]
    else:
        Log.report(Log.Info, "radix {} is not part of standard radices {{2, e, 10}}", self.log_radix)
        self.log_emulation_function = lambda v: sollya.log(v) / sollya.log(self.log_radix)
    # .. update output and function name
    self.function_name = "LOG{}".format(args.log_radix)
    self.output_file = "{}.c".format(self.function_name)
Beispiel #2
0
        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join(
                ["Variables",
                 "  real z in [{},{}];".format(x_low, x_high),
                 "Definitions",
                 "  retval rnd64= {};".format(poly_expr),
                 "Expressions",
                 "  retval;"])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {**config,
                                              "--rel-error": "true",
                                              "--abs-error": "true"})
                rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {**config,
                                                  "--rel-error": "false",
                                                  "--abs-error": "true"})
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     x,
                                     sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     x,
                                     sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
Beispiel #3
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN)

        if not self.skip_reduction:
            n_invlog2 = self.precision.round_sollya_object(
                1 / sollya.log(2), sollya.RN)
            invlog2 = Constant(n_invlog2, tag="invlog2")

            unround_k = Multiplication(x, invlog2, tag="unround_k")

            k = Floor(unround_k, precision=self.precision, tag="k")

            log2 = Constant(n_log2, tag="log2")

            whole = Multiplication(k, log2, tag="whole")

            r = Subtraction(x, whole, tag="r")

        else:
            r = x

        approx_interval = sollya.Interval(-2**-10, n_log2 + 2**-10)
        approx_func = sollya.exp(sollya.x)
        builder = Polynomial.build_from_approximation
        sollya.settings.prec = 2**10
        poly_object = builder(approx_func, self.poly_degree,
                              [self.precision] * (self.poly_degree + 1),
                              approx_interval, sollya.relative)
        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, r)

        if not self.skip_reduction:
            ik = Conversion(k,
                            precision=ML_Binary32.get_integer_format(),
                            tag="ik")

            twok = ExponentInsertion(ik, precision=self.precision, tag="twok")

            retval = Multiplication(poly, twok, tag="retval")

        else:
            retval = poly

        scheme = Return(retval)

        return scheme
Beispiel #4
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        if not self.skip_reduction:
            x_exp_int = ExponentExtraction(x, tag="x_exp_int")
            x_exp = Conversion(x_exp_int, precision=self.precision, tag="x_exp")

            x_man = MantissaExtraction(x, tag="x_man")

            r = Multiplication(x_man, 0.5, tag="r")

        else:
            r = x

        approx_interval = sollya.Interval(0.5-2**-10, 1+2**-10)
        approx_func = sollya.log(sollya.x)
        builder = Polynomial.build_from_approximation
        sollya.settings.prec = 2**10
        poly_object = builder(approx_func,
                              self.poly_degree,
                              [self.precision]*(self.poly_degree+1),
                              approx_interval,
                              sollya.relative)
        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, r, unified_precision=self.precision)

        if not self.skip_reduction:
            n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN)
            log2 = Constant(n_log2, tag="log2")

            x_mul = Addition(x_exp, 1, tag="x_mul")

            offset = Multiplication(x_mul, log2, tag="offset")

            retval = Addition(offset, poly, tag="retval")

        else:
            retval = poly

        scheme = Return(retval)

        return scheme
Beispiel #5
0
    def determine_error(self):
        sollya.settings.display = sollya.hexadecimal
        n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN)

        poly_expr = str(sollya.horner(self.poly_object.get_sollya_object()))
        poly_expr = poly_expr.replace("_x_", "z")
        poly_expr = poly_expr.replace("z^0x1p1", "z*z")

        config = fptaylor.CHECK_CONFIG.copy()
        del config["--abs-error"]
        config["--opt"] = "gelpia"
        config["--rel-error-threshold"] = "0.0"
        config["--intermediate-opt"] = "false"
        config["--uncertainty"] = "false"
        config["--opt-timeout"] = 100000 # log is returning inf
        config["--opt-f-rel-tol"] = "1e0"
        config["--opt-f-abs-tol"] = "0.0"
        config["--opt-x-rel-tol"] = "0.0"
        config["--opt-x-abs-tol"] = "0.0"

        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join(
                ["Variables",
                 "  real z in [{},{}];".format(x_low, x_high),
                 "Definitions",
                 "  retval rnd64= {};".format(poly_expr),
                 "Expressions",
                 "  retval;"])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {**config,
                                              "--rel-error": "true",
                                              "--abs-error": "true"})
                rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {**config,
                                                  "--rel-error": "false",
                                                  "--abs-error": "true"})
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     x,
                                     sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     x,
                                     sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

        def generate_reduction_fptaylor(x):
            unround_e = sollya.log2(I)
            e_low = sollya.floor(sollya.inf(unround_e))
            e_high = sollya.floor(sollya.sup(unround_e))
            if e_low != e_high:
                assert False, "Interval must not stradle a binade"
            e = int(e_low) + 1
            z = x / (2**e) * 0.5
            query = "\n".join(
                ["Variables",
                 "  real z in [{},{}];".format(sollya.inf(z), sollya.sup(z)),
                 "Definitions",
                 "  poly rnd64= {};".format(poly_expr),
                 "  retval rnd64= {}*{} + poly;".format(e, n_log2),
                 "Expressions",
                 "  retval;"])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {**config,
                                              "--rel-error": "true",
                                              "--abs-error": "true"})
                rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {**config,
                                                  "--rel-error": "false",
                                                  "--abs-error": "true"})
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     z,
                                     sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     z,
                                     sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err
            return rel_err, abs_err

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                unround_e = sollya.log2(I)
                e_low = sollya.floor(sollya.inf(unround_e))
                e_high = sollya.floor(sollya.sup(unround_e))
                #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high)))
                if e_low == e_high:
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                e_range = sollya.Interval(e_low, e_low+1)
                I_range = 2**e_range
                for _ in range(100):
                    mid = sollya.mid(I_range)
                    e = sollya.floor(sollya.log2(mid))
                    if e == e_low:
                        I_range = sollya.Interval(mid, sollya.sup(I_range))
                    else:
                        I_range = sollya.Interval(sollya.inf(I_range), mid)

                    divider_high = sollya.sup(I_range)
                    divider_low = sollya.inf(I_range)

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(upper_part)
                in_domains.append(lower_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x)))
            in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)]
            return in_domains


        if self.skip_reduction:
            starting_domain = sollya.Interval(0.5, 1.0)
        else:
            reduction_e = 12
            starting_domain = sollya.Interval(2**(-reduction_e), 2**reduction_e)

        # analyse each piece
        in_domains = split_domain(starting_domain, self.slivers)
        errors = list()
        for I in in_domains:
            if self.skip_reduction:
                rel_err, abs_err = generate_fptaylor(I)
            else:
                rel_err, abs_err = generate_reduction_fptaylor(I)
            print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)),
                                          float(sollya.sup(I)),
                                          float(abs_err),
                                          float(rel_err)))
            errors.append((I, abs_err, rel_err))

        def generate_json(errors, domain):
            errors = [err for err in errors if err[0] in domain]
            errors.sort(key=lambda err: err[2])
            epsilon = errors[0][2]
            delta = max(err[1] for err in errors)

            d = {
                "cname": self.function_name,
                "delta": float(delta),
                "domain": [float(sollya.inf(domain)),
                           float(sollya.sup(domain)),],
                "epsilon": float(epsilon),
                "operation": "log"
            }
            return d

        if self.skip_reduction:
            d = generate_json(errors, sollya.Interval(0.5, 1.0))
            json_str = json.dumps(d, sort_keys=True, indent=4)
            json_str = "spec: " + json_str.replace("\n", "\nspec: ")
            print(json_str)

        else:
            specs = list()
            for e in range(1, reduction_e):
                d = generate_json(errors, sollya.Interval(2**(-e), 2**e))
                specs.append(d)
            for i in range(len(specs)):
                d = specs[i]
                if i == len(specs)-1:
                    json_str = json.dumps(d, sort_keys=True, indent=4)
                    json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                    print(json_str)
                    break
                nd = specs[i+1]
                if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]:
                    continue
                json_str = json.dumps(d, sort_keys=True, indent=4)
                json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                print(json_str)
Beispiel #6
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log1pf.c",
                 function_name="log1pf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        log2_hi_value = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(
            log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        # case close to 0: ctz
        ctz_exp_limit = -7
        ctz_cond = vx_exp < ctz_exp_limit
        ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

        ctz_poly_degree = sup(
            guessdegree(
                log1p(sollya.x) / sollya.x, ctz_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        ctz_poly_object = Polynomial.build_from_approximation(
            log1p(sollya.x) / sollya.x, ctz_poly_degree,
            [self.precision] * (ctz_poly_degree + 1), ctz_interval,
            sollya.absolute)

        print "generating polynomial evaluation scheme"
        ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            ctz_poly_object, vx, unified_precision=self.precision)
        ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx)

        ctz_result = vx * ctz_poly

        neg_input = Comparison(vx,
                               -1,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")

        log_function_code = CodeFunction(
            "new_log", [Variable("x", precision=ML_Binary64)],
            output_format=ML_Binary64)
        log_call_generator = FunctionOperator(
            log_function_code.get_name(),
            arity=1,
            output_precision=ML_Binary64,
            declare_prototype=log_function_code)
        newlog_function = FunctionObject(log_function_code.get_name(),
                                         (ML_Binary64, ), ML_Binary64,
                                         log_call_generator)

        # case away from 0.0
        pre_vxp1 = vx + 1.0
        pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx)
        pre_vxp1_exp = ExponentExtraction(pre_vxp1,
                                          tag="pre_vxp1_exp",
                                          debug=debugd)
        cm500 = Constant(-500, precision=ML_Int32)
        c0 = Constant(0, precision=ML_Int32)
        cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() -
                                          2)
        scaling_factor_exp = Select(cond_scaling, cm500, c0)
        scaling_factor = ExponentInsertion(scaling_factor_exp,
                                           precision=self.precision,
                                           tag="scaling_factor")

        vxp1 = pre_vxp1 * scaling_factor
        vxp1.set_attributes(tag="vxp1", debug=debug_lftolx)
        vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd)

        vxp1_inv = DivisionSeed(vxp1,
                                precision=self.precision,
                                tag="vxp1_inv",
                                debug=debug_lftolx,
                                silent=True)

        vxp1_dirty_inv = ExponentInsertion(-vxp1_exp,
                                           precision=self.precision,
                                           tag="vxp1_dirty_inv",
                                           debug=debug_lftolx)

        table_index = BitLogicAnd(BitLogicRightShift(
            TypeCast(vxp1, precision=int_precision, debug=debuglx),
            self.precision.get_field_size() - 7,
            debug=debuglx),
                                  0x7f,
                                  tag="table_index",
                                  debug=debuglx)

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv,
                                                          precision=ML_UInt64),
                                                 Constant(-2,
                                                          precision=ML_UInt64),
                                                 precision=ML_UInt64),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_lftolx)
        arg_red_index = Select(Equal(table_index, 0),
                               vxp1_dirty_inv,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_lftolx)

        red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0,
                          (arg_red_index * vx - 1.0) + arg_red_index)
        #red_vxp1 = arg_red_index * vxp1 - 1.0
        red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx)

        log_inv_lo = TableLoad(log_table,
                               table_index,
                               1,
                               tag="log_inv_lo",
                               debug=debug_lftolx)
        log_inv_hi = TableLoad(log_table,
                               table_index,
                               0,
                               tag="log_inv_hi",
                               debug=debug_lftolx)

        inv_err = S2**-6  # TODO: link to target DivisionSeed precision

        print "building mathematical polynomial"
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vxp1, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        vxp1_inv_exp = ExponentExtraction(vxp1_inv,
                                          tag="vxp1_inv_exp",
                                          debug=debugd)
        corr_exp = -vxp1_exp + scaling_factor_exp  # vxp1_inv_exp

        #poly = (red_vxp1) * (1 +  _poly)
        #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

        pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly +
                                    (-corr_exp * log2_lo - log_inv_lo))
        pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
        exact_log2_hi_exp = -corr_exp * log2_hi
        exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                         debug=debug_lftolx,
                                         prevent_optimization=True)
        #std_result =  exact_log2_hi_exp + pre_result

        exact_log2_lo_exp = -corr_exp * log2_lo
        exact_log2_lo_exp.set_attributes(
            tag="exact_log2_lo_exp",
            debug=debug_lftolx)  #, prevent_optimization = True)

        init = exact_log2_lo_exp - log_inv_lo
        init.set_attributes(tag="init",
                            debug=debug_lftolx,
                            prevent_optimization=True)
        fma0 = (red_vxp1 * _poly + init)  # - log_inv_lo)
        fma0.set_attributes(tag="fma0", debug=debug_lftolx)
        step0 = fma0
        step0.set_attributes(
            tag="step0", debug=debug_lftolx)  #, prevent_optimization = True)
        step1 = step0 + red_vxp1
        step1.set_attributes(tag="step1",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        step2 = -log_inv_hi + step1
        step2.set_attributes(tag="step2",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        std_result = exact_log2_hi_exp + step2
        std_result.set_attributes(tag="std_result",
                                  debug=debug_lftolx,
                                  prevent_optimization=True)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal, Return(vx),
                    ConditionBlock(ctz_cond, Statement(Return(ctz_result), ),
                                   Statement(Return(std_result))))))
        scheme = pre_scheme

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Beispiel #7
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
Beispiel #8
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # constant computation
        invlog2 = round(1 / log(2), sollya_precision, sollya.RN)
        invlog2_cst = Constant(invlog2, precision=self.precision)

        #v_log2_hi = round(log(2), 16, sollya.RN)
        #v_log2_lo = round(log(2) - v_log2_hi, sollya_precision, sollya.RN)

        #log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
        #log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        v_log2_hi = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        v_log2_lo = round(
            log(2) - v_log2_hi, self.precision.sollya_object, sollya.RN)
        log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi")
        log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo")

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        int_precision = self.precision.get_integer_format()

        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        integer_precision = {
            ML_Binary32: ML_UInt32,
            ML_Binary64: ML_UInt64
        }[self.precision]

        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = inv_approx_table[
                i]  # (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_multi,
                                          precision=self.precision)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debug_multi),
                self.precision.get_field_size() - 7,
                debug=debug_multi),
                                      0x7f,
                                      tag="table_index",
                                      debug=debug_multi)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=integer_precision),
                Constant(-2, precision=integer_precision),
                precision=integer_precision),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0), 1.0,
                                   pre_arg_red_index)

            #_red_vx        = arg_red_index * _vx_mant - 1.0
            _red_vx = FusedMultiplyAdd(arg_red_index,
                                       _vx_mant,
                                       1.0,
                                       specifier=FusedMultiplyAdd.Subtract)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_multi)

            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_multi)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_multi)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + sollya.x) / sollya.x, poly_degree,
                [1] + [self.precision] * (poly_degree), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
            _poly = PolynomialSchemeEvaluator.generate_estrin_scheme(
                poly_object, _red_vx, unified_precision=self.precision)

            _poly.set_attributes(tag="poly", debug=debug_multi)

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_multi)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + (_red_vx +
                                         (_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_multi)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                             debug=debug_multi)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_multi)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_multi)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_multi)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_multi)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_multi)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debug_multi)

        # exp=-1 case
        Log.report(Log.Verbose, "managing exp=-1 case")

        result2 = (-log_inv_hi - log2_hi) + (
            (red_vx + poly * red_vx) - log2_lo - log_inv_lo)
        result2.set_attributes(tag="result2", debug=debug_multi)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _ = compute_log(vx * S2100,
                                                      exp_corr_factor=m100)

        Log.report(Log.Verbose, "managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_multi)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one",
                                debug=debug_multi,
                                likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme

        return scheme
Beispiel #9
0
        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_multi,
                                          precision=self.precision)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debug_multi),
                self.precision.get_field_size() - 7,
                debug=debug_multi),
                                      0x7f,
                                      tag="table_index",
                                      debug=debug_multi)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=integer_precision),
                Constant(-2, precision=integer_precision),
                precision=integer_precision),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0), 1.0,
                                   pre_arg_red_index)

            #_red_vx        = arg_red_index * _vx_mant - 1.0
            _red_vx = FusedMultiplyAdd(arg_red_index,
                                       _vx_mant,
                                       1.0,
                                       specifier=FusedMultiplyAdd.Subtract)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_multi)

            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_multi)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_multi)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + sollya.x) / sollya.x, poly_degree,
                [1] + [self.precision] * (poly_degree), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
            _poly = PolynomialSchemeEvaluator.generate_estrin_scheme(
                poly_object, _red_vx, unified_precision=self.precision)

            _poly.set_attributes(tag="poly", debug=debug_multi)

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_multi)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + (_red_vx +
                                         (_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_multi)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                             debug=debug_multi)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_multi)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_multi)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_multi)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one
Beispiel #10
0
  def generate_argument_reduction(self, memory_limit):
    best_arg_reduc = None

    best_arg_reduc = self.eval_argument_reduction(6,10,12,13)
    best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2']
    best_arg_reduc['degree_poly1'] = 4
    best_arg_reduc['degree_poly2'] = 8
    return best_arg_reduc
    # iterate through all possible parameters, and return the best argument reduction
    # the order of importance of the caracteristics of a good argument reduction is:
    #   1- the argument reduction is valid
    #   2- the degree of the polynomials obtains are minimals
    #   3- the memory used is minimal
    # An arument reduction is valid iff:
    #   - the memory used is less than memory_limit
    #   - y-1 and z-1  fit into a uint64_t
    #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
    # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
    # (note that thoses bound are implied by, but not equivalents to the constraints)
    #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
    #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
    #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
    #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
    #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
    # note: it is hard to deduce a tight bound on prec2 from the last inequality
    # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

    #self.eval_argument_reduction(12, 20, 22, 14)

    min_size1 = 1
    max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt()
    for size1 in xrange(max_size1, min_size1-1, -1):
      
      min_prec1 = size1
      max_prec1 = 12 + size1
      for prec1 in xrange(min_prec1,max_prec1+1):
        
        # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
        first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1)
        mid_interval = first_arg_reduc['mid_interval']
        sizeof_table1 = first_arg_reduc['sizeof_table1']

        if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)):
          continue
        if not(first_arg_reduc['sizeof_table1'] < memory_limit):
          continue
        
        min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt()
        max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt()
        # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
        # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
        # (because they are modified inside the body of the loop, for the next iteration of size2)
        min_prec2 = 0
        max_prec2 = 12 + max_size2 - prec1
        for size2 in xrange(max_size2,min_size2-1,-1):
          
          max_prec2 = min(max_prec2, 12 + size2 - prec1)
          for prec2 in xrange(max_prec2,min_prec2-1,-1):
            
            #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
            #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

            arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2)
            mid_interval = arg_reduc['mid_interval']
            out_interval = arg_reduc['out_interval']
            sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2']
            if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)):
              max_prec2 = prec2 - 1
              continue
            if memory_limit < sizeof_tables:
              continue
            #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

            # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
            # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
            sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval))
            guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52)
            guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120)
            # TODO: detect when guessdegree return multiple possible degree, and find the right one
            if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1):
              print "improvable guess_degree_poly1:", guess_degree_poly1
            if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2):
              print "improvable guess_degree_poly2:", guess_degree_poly2
            degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1
            degree_poly2 = sup(guess_degree_poly2).getConstantAsInt()
            
            if ((best_arg_reduc is not None)
            and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)):
              min_prec2 = prec2 + 1
              break

            if ((best_arg_reduc is None)
             or (best_arg_reduc['degree_poly1'] > degree_poly1)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)):
              arg_reduc['degree_poly1'] = degree_poly1
              arg_reduc['degree_poly2'] = degree_poly2
              arg_reduc['sizeof_tables'] = sizeof_tables
              best_arg_reduc = arg_reduc
              #print "\n   --new best--  \n", arg_reduc, "\n"
    #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
    return best_arg_reduc
Beispiel #11
0
    def generate_scalar_scheme(self, vx):
        """ Generating implementation script for hyperic tangent
            meta-function """
        # tanh(x) = sinh(x) / cosh(x)
        #         = (e^x - e^-x) / (e^x + e^-x)
        #         = (e^(2x) - 1) / (e^(2x) + 1)
        #   when x -> +inf, tanh(x) -> 1
        #   when x -> -inf, tanh(x) -> -1
        #   ~0 e^x    ~ 1 + x - x^2 / 2 + x^3 / 6 + ...
        #      e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ...
        #   when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x
        # We can divide the input interval into 3 parts
        # positive, around 0, and finally negative

        # Possible argument reduction
        # x = m.2^E = k * log(2) + r
        # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1)
        #                     = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k)
        #
        # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1)
        #         = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1)
        #         = 1 - 2 / (e^(2x) + 1)

        # tanh is odd so we reduce the computation to the absolute value of
        # vx
        abs_vx = Abs(vx, precision=self.precision)

        # if p is the expected output precision
        # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps
        #   where eps < 1/2 * 2^-p
        p = self.precision.get_mantissa_size()
        high_bound = (p + 2) * sollya.log(2) / 2
        near_zero_bound = 0.125
        interval_num = 1024
        Log.report(Log.Verbose,
                   "high_bound={}, near_zero_bound={}, interval_num={}",
                   float(high_bound), near_zero_bound, interval_num)

        interval_size = (high_bound - near_zero_bound) / (1024)
        new_interval_size = S2**int(sollya.log2(interval_size))
        interval_num *= 2
        high_bound = new_interval_size * interval_num + near_zero_bound
        Log.report(Log.Verbose,
                   "high_bound={}, near_zero_bound={}, interval_num={}",
                   float(high_bound), near_zero_bound, interval_num)

        ERROR_THRESHOLD = S2**-p
        Log.report(Log.Info, "ERROR_THRESHOLD={}", ERROR_THRESHOLD)

        # Near 0 approximation
        near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero(
            sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx)

        # approximation parameters
        poly_degree = 7
        approx_interval = Interval(near_zero_bound, high_bound)

        sollya.settings.points = 117

        approx_scheme, approx_error = piecewise_approximation(
            sollya.tanh,
            abs_vx,
            self.precision,
            bound_low=near_zero_bound,
            bound_high=high_bound,
            num_intervals=interval_num,
            max_degree=poly_degree,
            error_threshold=ERROR_THRESHOLD)
        Log.report(Log.Warning, "approx_error={}".format(approx_error))

        comp_near_zero_bound = abs_vx < near_zero_bound
        comp_near_zero_bound.set_attributes(tag="comp_near_zero_bound",
                                            debug=debug_multi)
        comp_high_bound = abs_vx < high_bound
        comp_high_bound.set_attributes(tag="comp_high_bound",
                                       debug=debug_multi)

        complete_scheme = Select(
            comp_near_zero_bound, near_zero_scheme,
            Select(comp_high_bound, approx_scheme,
                   Constant(1.0, precision=self.precision)))

        scheme = Return(Select(vx < 0, Negation(complete_scheme),
                               complete_scheme),
                        precision=self.precision)
        return scheme
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Beispiel #13
0
    def determine_error(self):
        sollya.settings.display = sollya.hexadecimal
        n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN)
        n_invlog2 = self.precision.round_sollya_object(1 / sollya.log(2),
                                                       sollya.RN)

        poly_expr = str(sollya.horner(self.poly_object.get_sollya_object()))
        poly_expr = poly_expr.replace("_x_", "r")
        poly_expr = poly_expr.replace("r^0x1p1", "r*r")

        config = fptaylor.CHECK_CONFIG.copy()
        del config["--abs-error"]
        config["--opt"] = "bb-eval"
        config["--rel-error-threshold"] = "0.0"
        config["--intermediate-opt"] = "false"
        config["--uncertainty"] = "false"

        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  r rnd64= x;",
                "  retval rnd64= {};".format(poly_expr), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

        def generate_reduction_fptaylor(x):

            # get k, must be the same at endpoints
            unround_k = x * n_invlog2
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if not (k_low == k_high or (k_low == -1 and sollya.sup(x) == 0)):
                assert False, "Interval must not straddle multples of log(2)"
            k = int(k_low)
            r = x - k * n_log2

            twok = 2**k

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  whole rnd64= {} * {};".format(k, n_log2),
                "  r rnd64= x - whole;", "  poly rnd64= {};".format(poly_expr),
                "  retval rnd64= poly*{};".format(twok), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            # abs
            out_domains = list()
            for I in in_domains:
                if sollya.inf(I) < 0 and sollya.sup(I) > 0:
                    out_domains.append(sollya.Interval(sollya.inf(I), 0))
                    out_domains.append(sollya.Interval(0, sollya.sup(I)))
                else:
                    out_domains.append(I)
            in_domains = out_domains

            # k
            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                unround_mult = I * n_invlog2
                mult_low = sollya.floor(sollya.inf(unround_mult))
                mult_high = sollya.floor(sollya.sup(unround_mult))
                #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(mult_low), int(mult_high)))
                if mult_low == mult_high or (mult_low == -1
                                             and mult_high == 0):
                    #print("  accepted")
                    out_domains.append(I)
                    continue

                k_range = sollya.Interval(mult_low, mult_low + 1.5)
                I_range = k_range * n_log2
                for _ in range(100):
                    mid = sollya.mid(I_range)
                    k = sollya.floor(mid * n_invlog2)
                    if k == mult_low:
                        I_range = sollya.Interval(mid, sollya.sup(I_range))
                    else:
                        I_range = sollya.Interval(sollya.inf(I_range), mid)

                divider_high = sollya.sup(I_range)
                divider_low = sollya.inf(I_range)

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(upper_part)
                in_domains.append(lower_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x)))
            in_domains = [
                d for d in in_domains if sollya.inf(d) != sollya.sup(d)
            ]
            return in_domains

        if self.skip_reduction:
            starting_domain = sollya.Interval(0, n_log2)
        else:
            reduction_k = 40
            starting_domain = sollya.Interval(-reduction_k * n_log2,
                                              reduction_k * n_log2)

        # analyse each piece
        in_domains = split_domain(starting_domain, self.slivers)
        errors = list()
        for I in in_domains:
            if self.skip_reduction:
                rel_err, abs_err = generate_fptaylor(I)
            else:
                rel_err, abs_err = generate_reduction_fptaylor(I)
            print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)),
                                          float(sollya.sup(I)), float(abs_err),
                                          float(rel_err)))
            errors.append((I, abs_err, rel_err))

        def generate_json(errors, domain):
            errors = [err for err in errors if err[0] in domain]
            errors.sort(key=lambda err: err[2])
            epsilon = errors[0][2]
            delta = max(err[1] for err in errors)

            d = {
                "cname": self.function_name,
                "delta": float(delta),
                "domain": [
                    float(sollya.inf(domain)),
                    float(sollya.sup(domain)),
                ],
                "epsilon": float(epsilon),
                "operation": "exp"
            }
            return d

        if self.skip_reduction:
            d = generate_json(errors, sollya.Interval(0, n_log2))
            json_str = json.dumps(d, sort_keys=True, indent=4)
            json_str = "spec: " + json_str.replace("\n", "\nspec: ")
            print(json_str)

        else:
            specs = list()
            for k in range(1, reduction_k):
                d = generate_json(errors,
                                  sollya.Interval(-k * n_log2, k * n_log2))
                specs.append(d)
            for i in range(len(specs)):
                d = specs[i]
                if i == len(specs) - 1:
                    json_str = json.dumps(d, sort_keys=True, indent=4)
                    json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                    print(json_str)
                    break
                nd = specs[i + 1]
                if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]:
                    continue
                json_str = json.dumps(d, sort_keys=True, indent=4)
                json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                print(json_str)
Beispiel #14
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32)))

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("inv_table"))
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            #print inv_approx_table[i][0], inv_value
            inv_value = inv_approx_table[i][0]
            value_high = round(
                log2(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log2(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debuglx),
                self.precision.get_field_size() - 7,
                debug=debuglx),
                                      0x7f,
                                      tag="table_index",
                                      debug=debuglld)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            #if not processor.is_supported_operation(arg_red_index):
            #    if self.precision != ML_Binary32:
            #        arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32,
            _red_vx = arg_red_index * _vx_mant - 1.0
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            print "building mathematical polynomial"
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log2(1 + sollya.x) / sollya.x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            print "generating polynomial evaluation scheme"
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            print "sollya global_poly_object"
            print global_poly_object.get_sollya_object()
            print "sollya poly_object"
            print poly_object.get_sollya_object()

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_ddtolx)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            #pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (- _log_inv_lo)))
            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = corr_exp
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = corr_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx

        result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_lftolx)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # exp=-1 case
        print "managing exp=-1 case"
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_lftolx)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        result2 = (-log_inv_hi - 1.0) + ((poly * red_vx) - log_inv_lo)
        result2.set_attributes(tag="result2", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _ = compute_log(vx * S2100,
                                                   exp_corr_factor=m100)
        result_subnormal.set_attributes(tag="result_subnormal",
                                        debug=debug_lftolx)

        print "managing close to 1.0 cases"
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + x) / x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ),
                        Statement(ClearException(), result_subnormal,
                                  Return(result_subnormal))),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = Statement(result, pre_scheme)
        return scheme
Beispiel #15
0
        def generate_reduction_fptaylor(x):
            unround_e = sollya.log2(I)
            e_low = sollya.floor(sollya.inf(unround_e))
            e_high = sollya.floor(sollya.sup(unround_e))
            if e_low != e_high:
                assert False, "Interval must not stradle a binade"
            e = int(e_low) + 1
            z = x / (2**e) * 0.5
            query = "\n".join(
                ["Variables",
                 "  real z in [{},{}];".format(sollya.inf(z), sollya.sup(z)),
                 "Definitions",
                 "  poly rnd64= {};".format(poly_expr),
                 "  retval rnd64= {}*{} + poly;".format(e, n_log2),
                 "Expressions",
                 "  retval;"])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {**config,
                                              "--rel-error": "true",
                                              "--abs-error": "true"})
                rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {**config,
                                                  "--rel-error": "false",
                                                  "--abs-error": "true"})
                    rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     z,
                                     sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.log(sollya.x),
                                     z,
                                     sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err
            return rel_err, abs_err
Beispiel #16
0
    def generate_scalar_scheme(self, vx, n):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        n.set_attributes(tag="n")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # rootn(x, n) = x^(1/n)
        #             = exp(1/n * log(x))
        #             = 2^(1/n * log2(x))
        #             = 2^(1/n * (log2(m) + e))
        #

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        use_reciprocal = False

        # non-scaled vx used to compute vx^1
        unmodified_vx = vx

        is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal")
        exp_correction_factor = self.precision.get_mantissa_size()
        mantissa_factor = Constant(2**exp_correction_factor,
                                   tag="mantissa_factor")
        vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx")

        m = MantissaExtraction(vx, tag="m", precision=self.precision)
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        e = Select(is_subnormal,
                   e - exp_correction_factor,
                   e,
                   tag="corrected_e")

        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision,
                                                     basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(
            log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(
            Abs(m, precision=self.precision), log_f, inv_approx_table,
            log_table)
        # floating-point version of n
        n_f = Conversion(n, precision=self.precision, tag="n_f")
        inv_n = Division(Constant(1, precision=self.precision), n_f)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision),
                            log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        if use_reciprocal:
            r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi)
        else:
            r = Division(log_approx, n_f, tag="r", debug=debug_multi)

        # e_n ~ e / n
        e_f = Conversion(e, precision=self.precision, tag="e_f")
        if use_reciprocal:
            e_n = Multiplication(e_f, inv_n, tag="e_n")
        else:
            e_n = Division(e_f, n_f, tag="e_n")
        error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n")
        e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int")
        pre_e_n_frac = e_n - e_n_int
        pre_e_n_frac.set_attributes(tag="pre_e_n_frac")
        e_n_frac = pre_e_n_frac + error_e_n * inv_n
        e_n_frac.set_attributes(tag="e_n_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)
        exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True)
        exp2_r.set_attributes(tag="exp2_r", debug=debug_multi)

        exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac,
                                                       inline_select=True)
        exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi)

        exp2_e_n_int = ExponentInsertion(Conversion(e_n_int,
                                                    precision=int_precision),
                                         precision=self.precision,
                                         tag="exp2_e_n_int")

        n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi)
        n_is_odd = LogicalNot(n_is_even, tag="n_is_odd")
        result_sign = Select(
            n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        # managing n == -1
        if self.expand_div:
            ml_division_args = ML_Division.get_default_args(
                precision=self.precision, input_formats=[self.precision] * 2)
            ml_division = ML_Division(ml_division_args)
            self.division_implementation = ml_division.implementation
            self.division_implementation.set_scheme(
                ml_division.generate_scheme())
            ml_division_fct = self.division_implementation.get_function_object(
            )
        else:
            ml_division_fct = Division

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = ConditionBlock(
            LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)),
                      LogicalAnd(n_is_even, vx < 0)),
            Return(FP_QNaN(self.precision)),
            Statement(
                ConditionBlock(
                    Equal(n, -1, tag="n_is_mone"),
                    #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)),
                    Return(
                        ml_division_fct(Constant(1, precision=self.precision),
                                        unmodified_vx,
                                        tag="div_res",
                                        precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±inf, n) is +∞ for even n< 0.
                    Test(vx, specifier=Test.IsInfty),
                    Statement(
                        ConditionBlock(
                            n < 0,
                            #LogicalAnd(n_is_odd, n < 0),
                            Return(
                                Select(Test(vx,
                                            specifier=Test.IsPositiveInfty),
                                       Constant(FP_PlusZero(self.precision),
                                                precision=self.precision),
                                       Constant(FP_MinusZero(self.precision),
                                                precision=self.precision),
                                       precision=self.precision)),
                            Return(vx),
                        ), ),
                ),
                ConditionBlock(
                    # rootn(±0, n) is ±∞ for odd n < 0.
                    LogicalAnd(LogicalAnd(n_is_odd, n < 0),
                               Equal(vx, 0),
                               tag="n_is_odd_and_neg"),
                    Return(
                        Select(Test(vx, specifier=Test.IsPositiveZero),
                               Constant(FP_PlusInfty(self.precision),
                                        precision=self.precision),
                               Constant(FP_MinusInfty(self.precision),
                                        precision=self.precision),
                               precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±0, n) is +∞ for even n< 0.
                    LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)),
                    Return(FP_PlusInfty(self.precision))),
                ConditionBlock(
                    # rootn(±0, n) is +0 for even n > 0.
                    LogicalAnd(n_is_even, Equal(vx, 0)),
                    Return(vx)),
                ConditionBlock(
                    Equal(n, 1), Return(unmodified_vx),
                    Return(result_sign * exp2_r * exp2_e_n_int *
                           exp2_e_n_frac))))
        return result
Beispiel #17
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)
        sollya_precision = self.get_input_precision().sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
                kwords["arg_value"] = vx
                kwords["function_name"] = self.function_name
                return RaiseReturn(*args, **kwords)

        # 2-limb approximation of log(2)
        # hi part precision is reduced to provide exact operation
        # when multiplied by an exponent value
        log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)


        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
        # storing accurate logarithm approximation of value returned
        # by the fast reciprocal operation
        for i in range(0, 2**table_index_size):
            inv_value = inv_approx_table[i]
            value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low


        neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input")
        vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf")
        vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan")
        vx_inf    = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf")
        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal")

        # for x = m.2^e, such that e >= 0
        #
        # log(1+x) = log(1 + m.2^e)
        #          = log(2^e . 2^-e + m.2^e)
        #          = log(2^e . (2^-e + m))
        #          = log(2^e) + log(2^-e + m)
        #          = e . log(2) + log (2^-e + m)
        #
        # t = (2^-e + m)
        # t = m_t . 2^e_t
        # r ~ 1 / m_t   => r.m_t ~ 1 ~ 0
        #
        # t' = t . 2^-e_t
        #    = 2^-e-e_t + m . 2^-e_t
        #
        # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3
        # r = m_r . 2^e_r
        #
        # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r)
        #          = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r)
        #
        #

        # argument reduction
        m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi)
        e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi)

        # 2^-e
        TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi)
        t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi)

        m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi)
        e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi)

        # 2^(-e-e_t)
        TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision)
        TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi)

        rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi)

        INDEX_SIZE = table_index_size
        table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t)
        table_index.set_attributes(tag="table_index", debug=debug_multi)

        log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) 
        log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi)

        inv_err = S2**-6 # TODO: link to target DivisionSeed precision

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        approx_fct = sollya.log1p(sollya.x) / (sollya.x)
        poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
        Log.report(Log.Debug, "poly_degree is {}", poly_degree)
        global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
        poly_object = global_poly_object # .sub_poly(start_index=1)

        EXT_PRECISION_MAP = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
            ML_SingleSingle: ML_TripleSingle,
            ML_DoubleDouble: ML_TripleDouble
        }
        if not self.precision in EXT_PRECISION_MAP:
            Log.report(Log.Error, "no extended precision available for {}", self.precision)

        ext_precision = EXT_PRECISION_MAP[self.precision]

        # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t
        pre_rtp = Addition(
            rcp_mt * TwoMinusEEt,
            Multiplication(
                rcp_mt,
                Multiplication(
                    m,
                    TwoMinusEt,
                    precision=self.precision,
                    tag="pre_mult",
                    debug=debug_multi,
                ),
                precision=ext_precision,
                tag="pre_mult2",
                debug=debug_multi,
            ),
            precision=ext_precision,
            tag="pre_rtp",
            debug=debug_multi
        )
        pre_red_vx = Addition(
            pre_rtp,
            -1,
            precision=ext_precision,
        )

        red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)

        poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object())

        fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi)


        ext_poly = Multiplication(red_vx, poly, precision=ext_precision)

        pre_result = Addition(
            Addition(
                fp_e * log2_hi,
                fp_e * log2_lo,
                precision=ext_precision
            ),
            Addition(
                Addition(
                    -log_inv_hi,
                    -log_inv_lo,
                    precision=ext_precision
                ),
                ext_poly,
                precision=ext_precision
            ),
            precision=ext_precision
        )

        result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi)


        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(neg_input,
            Statement(
                ClearException(),
                Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision))
            ),
            ConditionBlock(vx_nan_or_inf,
                ConditionBlock(vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan,
                            Raise(ML_FPE_Invalid)
                        ),
                        Return(FP_QNaN(self.precision))
                    )
                ),
                Return(result)
            )
        )
        scheme = pre_scheme
        return scheme
Beispiel #18
0
    def generate_reduced_log_split(self,
                                   _vx_mant,
                                   log_f,
                                   inv_approx_table,
                                   log_table,
                                   log_table_tho=None,
                                   corr_exp=None,
                                   tho_cond=None):
        """ Generate a logarithm approximation (log_f(_vx_mant) + corr_exp) for a reduced argument
            _vx_mant which is assumed to be within [1, 2[ (i.e. an extracted mantissa)

            Addiing exponent correction (optionnal) """

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        table_index = inv_approx_table.index_function(_vx_mant)

        table_index.set_attributes(tag="table_index", debug=debug_multi)

        rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
        r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r")

        int_format = self.precision.get_integer_format()

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(
            TypeCast(ReciprocalSeed(_vx_mant,
                                    precision=self.precision,
                                    tag="seed",
                                    debug=debug_multi,
                                    silent=True),
                     precision=int_format),
            Constant(-2, precision=int_format),
            precision=int_format),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_multi)

        C0 = Constant(0, precision=table_index.get_precision())
        index_comp_0 = Equal(table_index,
                             C0,
                             tag="index_comp_0",
                             debug=debug_multi)

        arg_red_index = Select(index_comp_0,
                               1.0,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_multi)
        #_red_vx        = arg_red_index * _vx_mant - 1.0
        _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
        inv_err = S2**-6
        red_interval = Interval(1 - inv_err, 1 + inv_err)
        _red_vx.set_attributes(tag="_red_vx",
                               debug=debug_multi,
                               interval=red_interval)

        # return in case of standard (non-special) input
        if not tho_cond is None:
            assert not log_table_tho is None
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)
        else:
            assert log_table_tho is None
            _log_inv_lo = TableLoad(log_table, table_index, 1)
            _log_inv_hi = TableLoad(log_table, table_index, 0)

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1),
            approx_interval, sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, _red_vx, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Info, "{}", poly_object.get_sollya_object())

        # _poly approximates log10(1+r)/r
        # _poly * red_vx approximates log10(x)

        m0h, m0l = Mul211(_red_vx, _poly)
        m0h, m0l = Add212(_red_vx, m0h, m0l)
        m0h.set_attributes(tag="m0h", debug=debug_multi)
        m0l.set_attributes(tag="m0l")
        if not corr_exp is None:
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
        else:
            # bypass exponent addition if no exponent correction is disabled
            rh, rl = m0h, m0l
        rh.set_attributes(tag="rh0", debug=debug_multi)
        rl.set_attributes(tag="rl0", debug=debug_multi)
        rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
        rh.set_attributes(tag="rh", debug=debug_multi)
        rl.set_attributes(tag="rl", debug=debug_multi)

        # FIXME: log<self.basis>(vx) is computed as log(vx) / log(self.basis)
        # which could be optimized for some value of self.basis (e.g. 2)
        if sollya.log(self.basis) != 1.0:
            lbh = self.precision.round_sollya_object(1 /
                                                     sollya.log(self.basis))
            lbl = self.precision.round_sollya_object(1 /
                                                     sollya.log(self.basis) -
                                                     lbh)
            rh, rl = Mul222(rh, rl, lbh, lbl)
            return rh
        else:
            return rh
Beispiel #19
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
Beispiel #20
0
    def generate_scalar_scheme(self, vx):

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        int_precision = self.precision.get_integer_format()

        vx_exp = ExponentExtraction(vx,
                                    tag="vx_exp",
                                    precision=int_precision,
                                    debug=debug_multi)

        #---------------------
        # Approximation scheme
        #---------------------
        # log(x) = log(m.2^e) = log(m.2^(e-tho+tho))
        #        = log(m.2^-tho) + (e+tho) log(2)
        #  tho = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log(m.2^-tho) = log(m.r/r.2^-tho) = log(m.r) + log(2^-tho/r)
        #             = log(m.r) - log(r.2^tho)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)

        # table of the reciprocal approximation of the targeted processor
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        log_table, log_table_tho, table_index_range = self.generate_log_table(
            log_f, inv_approx_table)

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        result = self.generate_reduced_log(vx, log_f, inv_approx_table,
                                           log_table, log_table_tho)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = Constant(-100, precision=int_precision)
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = self.generate_reduced_log(vx * S2100,
                                                     log_f,
                                                     inv_approx_table,
                                                     log_table,
                                                     log_table_tho,
                                                     exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(
                ClearException(), Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision), precision=self.precision)),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision),
                               precision=self.precision),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision),
                               precision=self.precision))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision),
                                   precision=self.precision),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme
Beispiel #21
0
  def generate_scheme(self):
    memory_limit = 2500

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = input_var
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    ### Constants computations ###

    v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
    v_log2_lo = round(log(2) - v_log2_hi, 64+53, sollya.RN)
    log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
    log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")
   
    print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
    arg_reduc = self.generate_argument_reduction(memory_limit)

    print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(arg_reduc['size1'],arg_reduc['prec1'],arg_reduc['size2'],arg_reduc['prec2'],arg_reduc['degree_poly1'],arg_reduc['degree_poly2'],arg_reduc['sizeof_tables']) 
    
    print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
    inv_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False),
                           tag = self.uniquify_name("inv_table_1"))
    log_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_1"))
    for i in xrange(0, arg_reduc['length_table1']-1):
      x1 = 1 + i/S2*arg_reduc['size1']
      inv_x1 = ceil(S2**arg_reduc['prec1']/x1)*S2**arg_reduc['prec1']
      log_x1 = floor(log(x1) * S2**(128-11))*S2**(11-128)
      inv_table_1[i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
      log_table_1[i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

    print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
    inv_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False),
                           tag = self.uniquify_name("inv_table_2"))
    log_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_2"))
    for i in xrange(0, arg_reduc['length_table2']-1):
      y1 = 1 + i/S2**arg_reduc['size2']
      inv_y1 = ceil(S2**arg_reduc['prec2']/x1) * S2**arg_reduc['prec2']
      log_y1 = floor(log(inv_y1) * S2**(128-11))*S2**(11-128)
      inv_table_2[i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
      log_table_2[i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))
    
    ### Evaluation Scheme ###
    
    print "\n\033[1mGenerate the evaluation scheme:\033[0m"
    input_var = self.implementation.add_input_variable("input_var", self.precision) 
    ve = ExponentExtraction(input_var, tag = "x_exponent", debug = debugd)
    vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = ML_Custom_FixedPoint_Format(0,52,False), debug = debug_lftolx)
    #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

    print "filtering and handling special cases"
    test_is_special_cases = LogicalNot(Test(input_var, specifier = Test.IsIEEENormalPositive, likely = True, debug = debugd, tag = "is_special_cases"))
    handling_special_cases = Statement(
      ConditionBlock(
        Test(input_var, specifier = Test.IsSignalingNaN, debug = True),
        ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))
      ),
      ConditionBlock(
        Test(input_var, specifier = Test.IsNaN, debug = True),
        Return(input_var)
      )#,
      # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
      # all that remains is x is a subnormal positive
      #Statement(
      #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
      #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
      #)
    )
    
    print "doing the argument reduction"
    v_dx = vx
    v_x1 = Conversion(v_dx, tag = 'x1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size1'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_x = TypeCast(v_x1, tag = 'index_x',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
    v_inv_x = TableLoad(inv_table_1, v_index_x, tag = 'inv_x')
    v_x = Addition(v_dx, 1, tag = 'x',
                   precision = ML_Custom_FixedPoint_Format(1,52,False))
    v_dy = Multiplication(v_x, v_inv_x, tag = 'dy',
                          precision = ML_Custom_FixedPoint_Format(0,52+arg_reduc['prec1'],False))
    v_y1 = Conversion(v_dy, tag = 'y1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size2'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_y = TypeCast(v_y1, tag = 'index_y',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
    v_inv_y = TableLoad(inv_table_2, v_index_y, tag = 'inv_y')
    v_y = Addition(v_dy, 1, tag = 'y',
                   precision = ML_Custom_FixedPoint_Format(1,52+arg_reduc['prec2'],False))
    # note that we limit the number of bits used to represent dz to 64.
    # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
    v_dz = Multiplication(v_y, v_inv_y, tag = 'z',
                          precision = ML_Custom_FixedPoint_Format(64-52-arg_reduc['prec1']-arg_reduc['prec2'],52+arg_reduc['prec1']+arg_reduc['prec2'],False))
    # reduce the number of bits used to represent dz. we can do that
    
    print "doing the first polynomial evaluation"
    global_poly1_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, arg_reduc['degree_poly1']-1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute)
    poly1_object = global_poly1_object.sub_poly(start_index = 1)
    print global_poly1_object
    print poly1_object
    poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(poly1_object, v_dz, unified_precision = v_dz.get_precision())
    return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1))

    #approx_interval = Interval(0, 27021597764222975*S2**-61)
    
    #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
    #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
    #poly_object = global_poly_object.sub_poly(start_index = 1)
    #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
    #_poly.set_attributes(tag = "poly", debug = debug_lftolx)

    """
Beispiel #22
0
        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            int_format = self.precision.get_integer_format()

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=int_format),
                Constant(-2, precision=int_format),
                precision=int_format),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh
Beispiel #23
0
 def numeric_emulate(self, input_value):
     return log(input_value)
Beispiel #24
0
 def numeric_emulate(self, input_value):
     return sollya.log(input_value) / sollya.log(self.basis)
Beispiel #25
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
Beispiel #26
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        int_precision = self.precision.get_integer_format()

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        #---------------------
        # Approximation scheme
        #---------------------
        # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t))
        #           = log10(m.2^-t) + (e+t) log10(2)
        #  t = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r)
        #               = log10(m.r) - log10(r.2^t)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2],
                                    storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        log_table_tho[0][0] = 0.0
        log_table_tho[0][1] = 0.0
        hi_size = self.precision.get_field_size() - (
            self.precision.get_exponent_size() + 1)
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i]
            value_high = round(log_f(inv_value), hi_size, sollya.RN)
            value_low = round(
                log_f(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

            inv_value_tho = S2 * inv_approx_table[i]
            value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN)
            value_low_tho = round(
                log_f(inv_value_tho) - value_high_tho, sollya_precision,
                sollya.RN)
            log_table_tho[i][0] = value_high_tho
            log_table_tho[i][1] = value_low_tho

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            int_format = self.precision.get_integer_format()

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=int_format),
                Constant(-2, precision=int_format),
                precision=int_format),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh

        result = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme
Beispiel #27
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # testing special value inputs
        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")
        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        # if input is a signaling NaN, raise an invalid exception and returns
        # a quiet NaN
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # log2(vx)
        # r = vx_mant
        # e = vx_exp
        # vx reduced to r in [1, 2[
        # log2(vx) = log2(r * 2^e)
        #          = log2(r) + e
        #
        ## log2(r) is approximated by
        #  log2(r) = log2(inv_seed(r) * r / inv_seed(r)
        #          = log2(inv_seed(r) * r) - log2(inv_seed(r))
        # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0]
        #
        # inv_seed(r) * r ~ 1
        # we can easily tabulate -log2(inv_seed(r))
        #

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)
        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        # value for index 0 is set to 0.0
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            #print inv_approx_table[i][0], inv_value
            inv_value = inv_approx_table[i][0]
            value_high_bitsize = self.precision.get_field_size() - (
                self.precision.get_exponent_size() + 1)
            value_high = round(log2(inv_value), value_high_bitsize, sollya.RN)
            value_low = round(
                log2(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            # The main table is indexed by the 7 most significant bits
            # of the mantissa
            table_index = inv_approx_table.index_function(_vx_mant)
            table_index.set_attributes(tag="table_index", debug=debuglld)

            # argument reduction
            # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-inv_approx_table.index_size
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() * 1.1))) + 1
            sollya.settings.display = sollya.hexadecimal
            global_poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                log2(1 + sollya.x) / sollya.x,
                poly_degree, [self.precision] * (poly_degree + 1),
                approx_interval,
                sollya.absolute,
                error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
                    p - f, ai))
            Log.report(
                Log.Info, "poly_degree={}, approx_error={}".format(
                    poly_degree, approx_error))
            poly_object = global_poly_object.sub_poly(start_index=1, offset=1)
            #poly_object = global_poly_object.sub_poly(start_index=0,offset=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly = FMA(pre_poly, _red_vx,
                        global_poly_object.get_cst_coeff(0, self.precision))
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            Log.report(
                Log.Verbose, "sollya global_poly_object: {}".format(
                    global_poly_object.get_sollya_object()))
            Log.report(
                Log.Verbose, "sollya poly_object: {}".format(
                    poly_object.get_sollya_object()))

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision)
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx

        result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_lftolx)

        # specific input value predicate
        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="vx_snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="vx_inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # Specific specific for the case exp == -1
        # log2(x) = log2(m) - 1
        #
        # as m in [1, 2[, log2(m) in [0, 1[
        # if r is close to 2, a catastrophic cancellation can occur
        #
        # r = seed(m)
        # log2(x) = log2(seed(m) * m / seed(m)) - 1
        #         = log2(seed(m) * m) - log2(seed(m)) - 1
        #
        # for m really close to 2 => seed(m) = 0.5
        #     => log2(x) = log2(0.5 * m)
        #                =
        result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo)
        result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _ = compute_log(vx * S2100,
                                                   exp_corr_factor=m100)
        result_subnormal.set_attributes(tag="result_subnormal",
                                        debug=debug_lftolx)

        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + x) / x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ),
                        Statement(ClearException(), result_subnormal,
                                  Return(result_subnormal))),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result_exp_m1),
                                       Return(result))))))
        scheme = Statement(result, pre_scheme)
        return scheme
  def generate_scheme(self):
    """Produce an abstract scheme for the logarithm.

    This abstract scheme will be used by the code generation backend.
    """
    if self.precision not in [ML_Binary32, ML_Binary64]:
        Log.report(Log.Error, "The demanded precision is not supported")

    vx = self.implementation.add_input_variable("x", self.precision)


    def default_bool_convert(optree, precision=None, **kw):
        return bool_convert(optree, precision, -1, 0, **kw) \
                if isinstance(self.processor, VectorBackend) \
                else bool_convert(optree, precision, 1, 0, **kw)

    precision = self.precision.sollya_object
    int_prec = self.precision.get_integer_format()
    Log.report(Log.Info, "int_prec is %s" % int_prec)
    uint_prec = self.precision.get_unsigned_integer_format()


    Log.report(Log.Info, "MDL constants")
    cgpe_scheme_idx = int(self.cgpe_index)
    table_index_size = int(self.tbl_index_size)
    #
    table_nb_elements = 2**(table_index_size)
    table_dimensions = [2*table_nb_elements]  # two values are stored for each element
    field_size = Constant(self.precision.get_field_size(),
                          precision = int_prec,
                          tag = 'field_size')
    if self.log_radix == EXP_1:
      log2_hi = Constant(
        round(log(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log(2) - round(log(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    elif self.log_radix == 10:
      log2_hi = Constant(
        round(log10(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log10(2) - round(log10(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    # ... if log_radix == '2' then log2(2) == 1

    # subnormal_mask aims at trapping positive subnormals except zero.
    # That's why we will subtract 1 to the integer bitstring of the input, and
    # then compare for Less (strict) the resulting integer bitstring to this
    # mask, e.g.  0x7fffff for binary32.
    if self.no_subnormal == False:
      subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1,
                                precision = int_prec, tag = 'subnormal_mask')
    fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one')
    fp_one_as_uint = TypeCast(fp_one, precision = uint_prec,
                              tag = 'fp_one_as_uint')
    int_zero = Constant(0, precision = int_prec, tag = 'int_zero')
    int_one  = Constant(1, precision = int_prec, tag = 'int_one')
    table_mantissa_half_ulp = Constant(
            1 << (self.precision.field_size - table_index_size - 1),
            precision = int_prec
            )
    table_s_exp_index_mask = Constant(
            ~((table_mantissa_half_ulp.get_value() << 1) - 1),
            precision = uint_prec
            )

    Log.report(Log.Info, "MDL table")
    # The table holds approximations of -log(2^tau * r_i) so we first compute
    # the index value for which tau changes from 1 to 0.
    cut = sqrt(2.)
    tau_index_limit = floor(table_nb_elements * (2./cut - 1))
    sollya_logtbl = [
      (-log1p(float(i) / table_nb_elements)
      + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix)
      for i in range(table_nb_elements)
    ]
    # ...
    init_logtbl_hi = [
            round(sollya_logtbl[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl_lo = [
            round(sollya_logtbl[i] - init_logtbl_hi[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]]
    log1p_table = ML_NewTable(dimensions = table_dimensions,
                              storage_precision = self.precision,
                              init_data = init_logtbl,
                              tag = 'ml_log1p_table')
    # ...
    if self.no_rcp:
      sollya_rcptbl = [
        (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size))))
        for i in range(table_nb_elements)
      ]
      init_rcptbl = [
            round(sollya_rcptbl[i],
                  int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
      ]
      rcp_table = ML_NewTable(dimensions = [table_nb_elements],
                              storage_precision = self.precision,
                              init_data = init_rcptbl,
                              tag = 'ml_rcp_table')
    # ...

    Log.report(Log.Info, 'MDL unified subnormal handling')
    vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int')
    if self.no_subnormal == False:
      vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint')
      # Avoid the 0.0 case by subtracting 1 from vx_as_int
      tmp = Comparison(vx_as_int - 1, subnormal_mask,
                       specifier = Comparison.Less)
      is_subnormal = default_bool_convert(
        tmp, # Will catch negative values as well as NaNs with sign bit set
        precision = int_prec)
      is_subnormal.set_attributes(tag = "is_subnormal")
      if not(isinstance(self.processor, VectorBackend)):
        is_subnormal = Subtraction(Constant(0, precision = int_prec),
                                   is_subnormal,
                                   precision = int_prec)

      #################################################
      # Vectorizable integer based subnormal handling #
      #################################################
      # 1. lzcnt
      # custom lzcount-like for subnormal numbers using FPU (see draft article)
      Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi")
      Zf = Subtraction(
        TypeCast(Zi, precision = self.precision),
        fp_one,
        precision = self.precision,
        tag="Zf")
      # Zf exponent is -(nlz(x) - exponent_size).
      # 2. compute shift value
      # Vectorial comparison on x86+sse/avx is going to look like
      # '|0x00|0xff|0x00|0x00|' and that's why we use Negate.
      # But for scalar code generation, comparison will rather be either 0 or 1
      # in C. Thus mask below won't be correct for a scalar implementation.
      # FIXME: Can we know the backend that will be called and choose in
      # consequence? Should we make something arch-agnostic instead?
      #
      n_value = BitLogicAnd(
        Addition(
          DirtyExponentExtraction(Zf, self.precision),
          Constant(
            self.precision.get_bias(),
            precision = int_prec),
          precision = int_prec),
        is_subnormal,
        precision = int_prec,
        tag = "n_value")
      alpha = Negation(n_value, tag="alpha")
      #
      # 3. shift left
      # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value)
      normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha)
      # 4. set exponent to the right value
      # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value
      # The final "+ 1" comes from the fact that once renormalized, the
      # floating-point datum has a biased exponent of 1
      #tmp0 = Subtraction(
      #        field_size,
      #        value,
      #        precision = int_prec,
      #        tag="tmp0")
      # Set the value to 0 if the number is not subnormal
      #tmp1 = BitLogicAnd(tmp0, is_subnormal)
      #renormalized_exponent = BitLogicLeftShift(
      #        tmp1,
      #        field_size
      #        )
    else: # no_subnormal == True
      normal_vx_as_int = vx_as_int
      
    #normal_vx_as_int = renormalized_mantissa + renormalized_exponent
    normal_vx = TypeCast(normal_vx_as_int, precision = self.precision,
                         tag = 'normal_vx')

    # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha')
    # XXX Extract the mantissa, see if this is supported in the x86 vector
    # backend or if it still uses the support_lib.
    vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision)

    Log.report(Log.Info, "MDL scheme")
    if self.force_division == True:
      rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    elif self.no_rcp == False:
      rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision)
      if not self.processor.is_supported_operation(rcp_m):
        if self.precision == ML_Binary64:
          # Try using a binary32 FastReciprocal
          binary32_m = Conversion(vx_mantissa, precision = ML_Binary32)
          rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32)
          rcp_m = Conversion(rcp_m, precision = ML_Binary64)
        if not self.processor.is_supported_operation(rcp_m):
          # FIXME An approximation table could be used instead but for vector
          # implementations another GATHER would be required.
          # However this may well be better than a division...
          rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    else: # ... use a look-up table
      rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1)
      rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size))
      rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx',
                        debug = debug_multi)
    #  
    rcp_m.set_attributes(tag = 'rcp_m')

    # exponent is normally either 0 or -1, since m is in [1, 2). Possible
    # optimization?
    # exponent = ExponentExtraction(rcp_m, precision = self.precision,
    #         tag = 'exponent')

    ri_round = TypeCast(
            Addition(
                TypeCast(rcp_m, precision = int_prec),
                table_mantissa_half_ulp,
                precision = int_prec
                ),
            precision = uint_prec
            )
    ri_fast_rndn = BitLogicAnd(
            ri_round,
            table_s_exp_index_mask,
            tag = 'ri_fast_rndn',
            precision = uint_prec
            )
    # u = m * ri - 1
    ul = None
    if self.no_rcp == True: # ... u does not fit on a single word
      tmp_u, tmp_ul = Mul211(vx_mantissa,         
                             TypeCast(ri_fast_rndn, precision = self.precision), 
                             fma = (self.no_fma == False))
      fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one')
      u, ul = Add212(fp_minus_one, tmp_u, tmp_ul)      
      u.set_attributes(tag='uh')
      ul.set_attributes(tag='ul')
    elif self.no_fma == False:
      u = FusedMultiplyAdd(
        vx_mantissa,
        TypeCast(ri_fast_rndn, precision = self.precision),
        fp_one,
        specifier = FusedMultiplyAdd.Subtract,
        tag = 'u')
    else: # disable FMA
      # tmph + tmpl = m * ri, where tmph ~ 1
      tmph, tmpl = Mul211(vx_mantissa,         
                          TypeCast(ri_fast_rndn, precision = self.precision), 
                          fma = False)
      # u_tmp = tmph - 1 ... exact due to Sterbenz
      u_tmp = Subtraction(tmph, fp_one, precision = self.precision)
      # u = u_tmp - tmpl ... exact since the result u is representable as a single word
      u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u')
    
    unneeded_bits = Constant(
            self.precision.field_size - table_index_size,
            precision=uint_prec,
            tag="unneeded_bits"
            )
    assert self.precision.field_size - table_index_size >= 0
    ri_bits = BitLogicRightShift(
            ri_fast_rndn,
            unneeded_bits,
            precision = uint_prec,
            tag = "ri_bits"
            )
    # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case
    # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.).
    # In this particular case, i = 0 but tau is 1
    # table_index does not need to be as long as uint_prec might be,
    # try and keep it the size of size_t.
    size_t_prec = ML_UInt32
    signed_size_t_prec = ML_Int32
    table_index_mask = Constant(
            (1 << (table_index_size + 1)) - 1,
            precision = size_t_prec
            )
    table_index = BitLogicAnd(
            Conversion(ri_bits, precision = size_t_prec),
            table_index_mask,
            tag = 'table_index',
            precision = size_t_prec
            )
    # Compute tau using the tau_index_limit value.
    tmp = default_bool_convert(
            Comparison(
                TypeCast(table_index, precision = signed_size_t_prec),
                Constant(tau_index_limit, precision = signed_size_t_prec),
                specifier = Comparison.Greater
                if isinstance(self.processor, VectorBackend)
                else Comparison.LessOrEqual
                ),
            precision = signed_size_t_prec,
            tag="tmp"
            )
    # A true tmp will typically be -1 for VectorBackends, but 1 for standard C.
    tau = Conversion(
        Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add")
            if isinstance(self.processor, VectorBackend)
            else tmp,
            precision=int_prec,
            tag="pre_tau"
        )
    tau.set_attributes(tag = 'tau')
    # Update table_index: keep only table_index_size bits
    table_index_hi = BitLogicAnd(
            table_index,
            Constant((1 << table_index_size) - 1, precision = size_t_prec),
            precision = size_t_prec
            )
    # table_index_hi = table_index_hi << 1
    table_index_hi = BitLogicLeftShift(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_hi"
            )
    # table_index_lo = table_index_hi + 1
    table_index_lo = Addition(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_lo"
            )

    tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi',
                       debug = debug_multi)
    tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo',
                       debug = debug_multi)
    # Compute exponent e + tau - alpha, but first subtract the bias.
    if self.no_subnormal == False:
      tmp_eptau = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
      exponent = Subtraction(tmp_eptau, alpha, precision = int_prec)
    else:
      exponent = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
    #
    fp_exponent = Conversion(exponent, precision = self.precision,
                             tag = 'fp_exponent')

    Log.report(Log.Info, 'MDL polynomial approximation')
    if self.log_radix == EXP_1:
      sollya_function = log(1 + sollya.x)
    elif self.log_radix == 2:
      sollya_function = log2(1 + sollya.x)
    elif self.log_radix == 10:
      sollya_function = log10(1 + sollya.x)
    # ...
    if self.force_division == True: # rcp accuracy is 2^(-p)
      boundrcp = 2**(-self.precision.get_precision())
    else:
      boundrcp = 1.5 * 2**(-12)           # ... see Intel intrinsics guide
      if self.precision in [ML_Binary64]:
        if not self.processor.is_supported_operation(rcp_m):
          boundrcp = (1+boundrcp)*(1+2**(-24)) - 1
        else:
          boundrcp = 2**(-14)             # ... see Intel intrinsics guide
    arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1)
    if self.no_rcp == False:
      approx_interval = Interval(-arg_red_mag, arg_red_mag)
    else:
      approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1))
    max_eps = 2**-(2*(self.precision.get_field_size()))
    Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps)))
    poly_degree = sup(
            guessdegree(
                sollya_function,
                approx_interval,
                max_eps,
                )
            )
    Log.report(Log.Info, "poly degree is ", poly_degree)
    if self.log_radix == EXP_1:
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp.
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1),
        approx_interval,
        sollya.absolute,
        0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp.
    else: # ... == '2' or '10'
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(1, int(poly_degree) + 1), # Force 1st coeff to 0
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree),
        approx_interval,
        sollya.absolute,
        0) # Force the first coefficients to 0

    Log.report(Log.Info, str(poly_object))

    constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \
            else ML_DoubleDouble if self.precision == ML_Binary64 \
            else None
    if is_cgpe_available():
        log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision, scheme_id = cgpe_scheme_idx
                )
    else:
        Log.report(Log.Warning,
                "CGPE not available, falling back to std poly evaluator")
        log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision
                )

    # XXX Dirty implementation of double-(self.precision) poly
    def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma):
        return dirty_multi_node_expand(
          node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma)
    log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul,
                                                                use_fma=(self.no_fma == False))

    log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi')
    log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo')

    # Compute log(2) * (e + tau - alpha)
    if self.log_radix != 2: # 'e' or '10'
      log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, 
                                  fma = (self.no_fma == False))
   
    # Add log1p(u)
    if self.log_radix != 2: # 'e' or '10'
      tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo,
                                      log1pu_poly_hi, log1pu_poly_lo)
    else:
      tmp_res_hi, tmp_res_lo = Add212(fp_exponent,
                                      log1pu_poly_hi, log1pu_poly_lo)

    # Add -log(2^(tau)/m) approximation retrieved by two table lookups
    logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0]
    logx_hi.set_attributes(tag = 'logx_hi')

    scheme = Return(logx_hi, precision = self.precision)

    return scheme
Beispiel #29
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Beispiel #30
0
  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme