Esempio n. 1
0
    def misc(self):
        print("Gappa script generation")

        cg = CCodeGenerator(processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = exp_implementation.get_definition(cg,
                                                        C_Code,
                                                        static_cst=True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=Interval(0.5, 1))
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
        }
        G1 = Constant(1, precision=ML_Exact)
        exact = G1 / gappa_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = gappa_current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = gappacg.get_interval_code(gappa_goal,
                                               cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-S2**-7, S2**-7))

        try:
            eval_error = execute_gappa_script_extract(
                gappa_code.get(gappacg))["goal"]
            print("eval_error: "), eval_error
        except:
            print("error during gappa run")
Esempio n. 2
0
  def evaluate_argument_reduction(self, in_interval, in_prec, inv_size, inv_prec):
    one = Constant(1, precision = ML_Exact, tag = "one")
    
    dx =     Variable("dx",
                      precision = ML_Custom_FixedPoint_Format(0, in_prec, False),
                      interval = in_interval)
    
    # do the argument reduction
    x =       Addition(dx, one, tag = "x",
                       precision = ML_Exact)
    x1 =    Conversion(x, tag = "x1",
                       precision = ML_Custom_FixedPoint_Format(0, inv_size, False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    s = Multiplication(dx, Constant(S2**inv_size, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "interval_index_table")
    inv_x1 =  Division(one, x1, tag = "ix1",
                       precision = ML_Exact)
    inv_x = Conversion(inv_x1,  tag = "ix",
                       precision = ML_Custom_FixedPoint_Format(1, inv_prec, False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    y = Multiplication(x, inv_x, tag = "y",
                       precision = ML_Exact)
    dy =   Subtraction(y, one,  tag = "dy", 
                       precision = ML_Exact)
    
    # add the necessary goals and hints
    dx_gappa = Variable("dx_gappa", interval = dx.get_interval(), precision = dx.get_precision())
    swap_map = {dx: dx_gappa}

    # goal: dz (result of the argument reduction)
    gappa_code = self.gappa_engine.get_interval_code_no_copy(dy.copy(swap_map), bound_list = [swap_map[dx]])
    #self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table
    # hints. are the ones with isAppox=True really necessary ?
    self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(x1, inv_x1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_x1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    # execute and parse the result
    result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine))
    out_interval = result['goal']
    length_table = 1 + floor(sup(in_interval) * S2**inv_size).getConstantAsInt()
    sizeof_table = length_table * (16 + ML_Custom_FixedPoint_Format(1, inv_prec, False).get_c_bit_size()/8)
    return {
      'out_interval': out_interval,
      'length_table': length_table,
      'sizeof_table': sizeof_table,
    }
Esempio n. 3
0
  def generate_scheme(self):
    # declaring function input variable
    vx = self.implementation.add_input_variable("x", ML_Binary32)
    # declaring specific interval for input variable <x>
    vx.set_interval(Interval(-1, 1))

    # declaring free Variable y
    vy = Variable("y", precision = ML_Exact)

    # declaring expression with vx variable
    expr = vx * vx - vx * 2
    # declaring second expression with vx variable
    expr2 = vx * vx - vx

    # optimizing expressions (defining every unknown precision as the
    # default one + some optimization as FMA merging if enabled)
    opt_expr = self.optimise_scheme(expr)
    opt_expr2 = self.optimise_scheme(expr2)

    # setting specific tag name for optimized expression (to be extracted 
    # from gappa script )
    opt_expr.set_tag("goal")
    opt_expr2.set_tag("new_goal")
    
    # defining default goal to gappa execution
    gappa_goal = opt_expr 

    # declaring EXACT expression to be used as hint in Gappa's script
    annotation = self.opt_engine.exactify(vy * (1 / vy))

    # the dict var_bound is used to limit the DAG part to be explored when
    # generating the gappa script, each pair (key, value), indicate a node to stop at <key>
    # and a node to replace it with during the generation: <node>,
    # <node> must be a Variable instance with defined interval
    # vx.get_handle().get_node() is used to retrieve the node instanciating the abstract node <vx>
    # after the call to self.optimise_scheme
    var_bound = {
      vx.get_handle().get_node(): Variable("x", precision = ML_Binary32, interval = vx.get_interval())
    } 
    # generating gappa code to determine interval for <opt_expr>
    gappa_code = self.gappa_engine.get_interval_code(opt_expr, var_bound)

    # add a manual hint to the gappa code
    # which state thtat vy * (1 / vy) -> 1 { vy <> 0 };
    self.gappa_engine.add_hint(gappa_code, annotation, Constant(1, precision = ML_Exact), Comparison(vy, Constant(0, precision = ML_Integer), specifier = Comparison.NotEqual, precision = ML_Bool))
    
    # adding the expression <opt_expr2> as an extra goal in the gappa script
    self.gappa_engine.add_goal(gappa_code, opt_expr2)

    # executing gappa on the script generated from <gappa_code>
    # extract the result and store them into <gappa_result>
    # which is a dict indexed by the goals' tag
    if is_gappa_installed():
        gappa_result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine))
        Log.report(Log.Info, "eval error: ", gappa_result["new_goal"])
    else:
        Log.report(Log.Warning, "gappa was not installed: unable to check execute_gappa_script_extract")

    # dummy scheme to make functionnal code generation
    scheme = Statement(Return(vx))

    return scheme
Esempio n. 4
0
  def eval_argument_reduction(self, size1, prec1, size2, prec2):
    one = Constant(1, precision = ML_Exact, tag = "one")
    dx =     Variable("dx",
                      precision = ML_Custom_FixedPoint_Format(0, 52, False),
                      interval = Interval(0, 1 - S2**-52))

    # do the argument reduction
    x =       Addition(dx, one, tag = "x",
                       precision = ML_Exact)
    x1 =    Conversion(x, tag = "x1",
                       precision = ML_Custom_FixedPoint_Format(0, size1, False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    s = Multiplication(Subtraction(x1, one, precision = ML_Exact),
                       Constant(S2**size1, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "indexTableX")
    inv_x1 =  Division(one, x1, tag = "ix1",
                       precision = ML_Exact)
    inv_x = Conversion(inv_x1,  tag = "ix",
                       precision = ML_Custom_FixedPoint_Format(1, prec1, False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    y = Multiplication(x, inv_x, tag = "y",
                       precision = ML_Exact)
    dy =   Subtraction(y, one,  tag = "dy", 
                       precision = ML_Exact)
    y1 =    Conversion(y, tag = "y",
                       precision = ML_Custom_FixedPoint_Format(0,size2,False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    t = Multiplication(Subtraction(y1, one, precision = ML_Exact),
                       Constant(S2**size2, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "indexTableY")
    inv_y1 =  Division(one, y1, tag = "iy1",
                       precision = ML_Exact)
    inv_y = Conversion(inv_y1, tag = "iy",
                       precision = ML_Custom_FixedPoint_Format(1,prec2,False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    z = Multiplication(y, inv_y, tag = "z",
                       precision = ML_Exact)
    dz =   Subtraction(z, one, tag = "dz",
                       precision = ML_Exact)


    # add the necessary goals and hints
    dx_gappa = Variable("dx_gappa", interval = dx.get_interval(), precision = dx.get_precision())
    swap_map = {dx: dx_gappa}
    # goals (main goal: dz, the result of the argument reduction)
    gappa_code = self.gappa_engine.get_interval_code_no_copy(dz.copy(swap_map), bound_list = [dx_gappa])
    self.gappa_engine.add_goal(gappa_code, dy.copy(swap_map))
    self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table 1
    self.gappa_engine.add_goal(gappa_code, t.copy(swap_map)) # range of index of table 2
    # hints. are the ones with isAppox=True really necessary ?
    self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, y.copy(swap_map), y1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_y1.copy(swap_map), inv_y.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(x1, inv_x1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_x1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(y1, inv_y1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_y1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    toto = Variable("toto", precision = ML_Binary64)
    self.gappa_engine.add_hypothesis(gappa_code, toto, Interval(0, S2**-52))
    
    # execute and parse the result
    result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine))
    self.gappa_engine.clear_memoization_map() # avoid memory leak
    #print result['indexTableX'], result['indexTableY']
    length_table1 = 1 + floor(sup(result['indexTableX'])).getConstantAsInt()
    length_table2 = 1 + floor(sup(result['indexTableY'])).getConstantAsInt()
    if False and (length_table2 != 1 + floor(sup(result['dy']) * S2**size2).getConstantAsInt()):
      print "(dy*2**size2:", 1 + floor(sup(result['dy']*S2**size2)).getConstantAsInt(), ")"
      print "(indexTableY:", 1 + floor(sup(result['indexTableY'])).getConstantAsInt(), ")"
      print result['indexTableY'], result['dy']
      sys.exit(1)
    return {
      # arguments
      'size1': size1, 'prec1': prec1, 'size2': size2, 'prec2': prec2,
      # size of the tables
      'length_table1': length_table1,
      'length_table2': length_table2,
      'sizeof_table1': length_table1 * (16 + ML_Custom_FixedPoint_Format(1,prec1,False).get_c_bit_size()/8),
      'sizeof_table2': length_table2 * (16 + ML_Custom_FixedPoint_Format(1,prec2,False).get_c_bit_size()/8),
      # intervals
      'in_interval': dx.get_interval(),
      'mid_interval': result['dy'],
      'out_interval': result['goal'],
    }
Esempio n. 5
0
    def solve_eval_error(self, gappa_init_approx, gappa_current_approx,
                         div_approx, gappa_vx, gappa_vy, inv_iteration_list,
                         div_iteration_list, seed_accuracy, seed_interval):
        """ compute the evaluation error of reciprocal approximation of
            (1 / gappa_vy)

            :param seed_accuracy: absolute error for seed value
            :type seed_accuracy: SollyaObject

        """
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=seed_interval)
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
        }

        yerr_last = div_iteration_list[-1].yerr

        # copying cg_eval_error_copy_map to allow mutation during
        # optimise_scheme while keeping a clean copy for later use
        optimisation_copy_map = cg_eval_error_copy_map.copy()
        gappa_current_approx = self.optimise_scheme(gappa_current_approx,
                                                    copy=optimisation_copy_map)
        div_approx = self.optimise_scheme(div_approx,
                                          copy=optimisation_copy_map)
        yerr_last = self.optimise_scheme(yerr_last, copy=optimisation_copy_map)
        yerr_last.get_handle().set_node(yerr_last)
        G1 = Constant(1, precision=ML_Exact)
        exact_recp = G1 / gappa_vy
        exact_recp.set_precision(ML_Exact)
        exact_recp.set_tag("exact_recp")
        recp_approx_error_goal = gappa_current_approx - exact_recp
        recp_approx_error_goal.set_attributes(precision=ML_Exact,
                                              tag="recp_approx_error_goal")

        gappacg = GappaCodeGenerator(self.processor,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = GappaCodeObject()

        exact_div = gappa_vx * exact_recp
        exact_div.set_attributes(precision=ML_Exact, tag="exact_div")
        div_approx_error_goal = div_approx - exact_div
        div_approx_error_goal.set_attributes(precision=ML_Exact,
                                             tag="div_approx_error_goal")

        bound_list = [op for op in cg_eval_error_copy_map]

        gappacg.add_goal(gappa_code, yerr_last)

        gappa_code = gappacg.get_interval_code(
            [recp_approx_error_goal, div_approx_error_goal],
            bound_list,
            cg_eval_error_copy_map,
            gappa_code=gappa_code,
            register_bound_hypothesis=False)

        for node in bound_list:
            gappacg.add_hypothesis(gappa_code, cg_eval_error_copy_map[node],
                                   cg_eval_error_copy_map[node].get_interval())

        new_exact_recp_node = exact_recp.get_handle().get_node()
        new_exact_div_node = exact_div.get_handle().get_node()

        # adding specific hints for Newton-Raphson reciprocal iteration
        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_recp_node)

        for div_iter in div_iteration_list:
            div_iter.get_hint_rules(gappacg, gappa_code, new_exact_recp_node,
                                    new_exact_div_node)

        seed_wrt_exact = seed_var - new_exact_recp_node
        seed_wrt_exact.set_attributes(precision=ML_Exact, tag="seed_wrt_exact")
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-seed_accuracy, seed_accuracy))

        try:
            gappa_results = execute_gappa_script_extract(
                gappa_code.get(gappacg))
            recp_eval_error = gappa_results["recp_approx_error_goal"]
            div_eval_error = gappa_results["div_approx_error_goal"]
            print("eval error(s): recp={}, div={}".format(
                recp_eval_error, div_eval_error))
        except:
            print("error during gappa run")
            raise
            recp_eval_error = None
            div_eval_error = None
        return recp_eval_error, div_eval_error
Esempio n. 6
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 num_iter=3,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="__divsf3.c",
                 function_name="__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name,
                                          output_format=precision)
        vx = exp_implementation.add_input_variable("x", precision)
        vy = exp_implementation.add_input_variable("y", precision)
        processor = target

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma=False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(
                        self.error,
                        self.approx,
                        self.approx,
                        specifier=FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)

                if self.force_fma:
                    rule0 = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0

                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule
                         ) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%lx")
        debugd = ML_Debug(display_format="%d")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Max(Min(ExponentExtraction(vx), 1020),
                 -1020,
                 tag="ex",
                 debug=debugd)
        ey = Max(Min(ExponentExtraction(vy), 1020),
                 -1020,
                 tag="ey",
                 debug=debugd)

        exact_ex = ExponentExtraction(vx, tag="exact_ex")
        exact_ey = ExponentExtraction(vy, tag="exact_ey")

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        # computing the inverse square root
        init_approx = None

        scaling_factor_x = ExponentInsertion(-ex, tag="sfx_ei")
        scaling_factor_y = ExponentInsertion(-ey, tag="sfy_ei")

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug=debug_lftolx, tag="scaled_vx")
        scaled_vy.set_attributes(debug=debug_lftolx, tag="scaled_vy")

        scaled_vx.set_precision(ML_Binary64)
        scaled_vy.set_precision(ML_Binary64)

        # forcing vx precision to make processor support test
        init_approx_precision = DivisionSeed(scaled_vx,
                                             scaled_vy,
                                             precision=self.precision,
                                             tag="seed",
                                             debug=debug_lftolx)
        if not processor.is_supported_operation(init_approx_precision):
            if self.precision != ML_Binary32:
                px = Conversion(
                    scaled_vx, precision=ML_Binary32, tag="px",
                    debug=debugf) if self.precision != ML_Binary32 else vx
                py = Conversion(
                    scaled_vy, precision=ML_Binary32, tag="py",
                    debug=debugf) if self.precision != ML_Binary32 else vy

                init_approx_fp32 = Conversion(DivisionSeed(
                    px, py, precision=ML_Binary32, tag="seed", debug=debugf),
                                              precision=self.precision,
                                              tag="seed_ext",
                                              debug=debug_lftolx)
                if not processor.is_supported_operation(init_approx_fp32):
                    Log.report(
                        Log.Error,
                        "The target %s does not implement inverse square root seed"
                        % processor)
                else:
                    init_approx = init_approx_fp32
            else:
                Log.report(
                    Log.Error,
                    "The target %s does not implement inverse square root seed"
                    % processor)
        else:
            init_approx = init_approx_precision

        current_approx_std = init_approx
        # correctly-rounded inverse computation
        num_iteration = num_iter

        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()

        def compute_div(_init_approx, _vx=None, _vy=None, scale_result=None):
            inv_iteration_list = []
            Attributes.set_default_rounding_mode(ML_RoundToNearest)
            Attributes.set_default_silent(True)
            _current_approx = _init_approx
            for i in range(num_iteration):
                new_iteration = NR_Iteration(
                    _current_approx,
                    _vy,
                    force_fma=False if (i != num_iteration - 1) else True)
                inv_iteration_list.append(new_iteration)
                _current_approx = new_iteration.get_new_approx()
                _current_approx.set_attributes(tag="iter_%d" % i,
                                               debug=debug_lftolx)

            def dividend_mult(div_approx,
                              inv_approx,
                              dividend,
                              divisor,
                              index,
                              force_fma=False):
                #yerr = dividend - div_approx * divisor
                yerr = FMSN(div_approx, divisor, dividend)
                yerr.set_attributes(tag="yerr%d" % index, debug=debug_lftolx)
                #new_div = div_approx + yerr * inv_approx
                new_div = FMA(yerr, inv_approx, div_approx)
                new_div.set_attributes(tag="new_div%d" % index,
                                       debug=debug_lftolx)
                return new_div

            # multiplication correction iteration
            # to get correctly rounded full division
            _current_approx.set_attributes(tag="final_approx",
                                           debug=debug_lftolx)
            current_div_approx = _vx * _current_approx
            num_dividend_mult_iteration = 1
            for i in range(num_dividend_mult_iteration):
                current_div_approx = dividend_mult(current_div_approx,
                                                   _current_approx, _vx, _vy,
                                                   i)

            # last iteration
            yerr_last = FMSN(current_div_approx, _vy,
                             _vx)  #, clearprevious = True)
            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()
            last_div_approx = FMA(yerr_last,
                                  _current_approx,
                                  current_div_approx,
                                  rounding_mode=ML_GlobalRoundMode)

            yerr_last.set_attributes(tag="yerr_last", debug=debug_lftolx)

            pre_result = last_div_approx
            pre_result.set_attributes(tag="unscaled_div_result",
                                      debug=debug_lftolx)
            if scale_result != None:
                #result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
                scale_factor_0 = Max(Min(scale_result, 950),
                                     -950,
                                     tag="scale_factor_0",
                                     debug=debugd)
                scale_factor_1 = Max(Min(scale_result - scale_factor_0, 950),
                                     -950,
                                     tag="scale_factor_1",
                                     debug=debugd)
                scale_factor_2 = scale_result - (scale_factor_1 +
                                                 scale_factor_0)
                scale_factor_2.set_attributes(debug=debugd,
                                              tag="scale_factor_2")

                result = ((pre_result * ExponentInsertion(scale_factor_0)) *
                          ExponentInsertion(scale_factor_1)
                          ) * ExponentInsertion(scale_factor_2)
            else:
                result = pre_result
            result.set_attributes(tag="result", debug=debug_lftolx)

            ext_pre_result = FMA(yerr_last,
                                 _current_approx,
                                 current_div_approx,
                                 precision=ML_DoubleDouble,
                                 tag="ext_pre_result",
                                 debug=debug_ddtolx)
            subnormal_pre_result = SpecificOperation(
                ext_pre_result,
                ex - ey,
                precision=self.precision,
                specifier=SpecificOperation.Subnormalize,
                tag="subnormal_pre_result",
                debug=debug_lftolx)
            sub_scale_factor = ex - ey
            sub_scale_factor_0 = Max(Min(sub_scale_factor, 950),
                                     -950,
                                     tag="sub_scale_factor_0",
                                     debug=debugd)
            sub_scale_factor_1 = Max(Min(sub_scale_factor - sub_scale_factor_0,
                                         950),
                                     -950,
                                     tag="sub_scale_factor_1",
                                     debug=debugd)
            sub_scale_factor_2 = sub_scale_factor - (sub_scale_factor_1 +
                                                     sub_scale_factor_0)
            sub_scale_factor_2.set_attributes(debug=debugd,
                                              tag="sub_scale_factor_2")
            #subnormal_result = (subnormal_pre_result * ExponentInsertion(ex, tag ="sr_ex_ei")) * ExponentInsertion(-ey, tag = "sr_ey_ei")
            subnormal_result = (
                subnormal_pre_result *
                ExponentInsertion(sub_scale_factor_0)) * ExponentInsertion(
                    sub_scale_factor_1,
                    tag="sr_ey_ei") * ExponentInsertion(sub_scale_factor_2)
            subnormal_result.set_attributes(debug=debug_lftolx,
                                            tag="subnormal_result")
            return result, subnormal_result, _current_approx, inv_iteration_list

        def bit_match(fp_optree, bit_id, likely=False, **kwords):
            return NotEqual(BitLogicAnd(
                TypeCast(fp_optree, precision=ML_Int64), 1 << bit_id),
                            0,
                            likely=likely,
                            **kwords)

        def extract_and_inject_sign(sign_source,
                                    sign_dest,
                                    int_precision=ML_Int64,
                                    fp_precision=self.precision,
                                    **kwords):
            int_sign_dest = sign_dest if isinstance(
                sign_dest.get_precision(), ML_Fixed_Format) else TypeCast(
                    sign_dest, precision=int_precision)
            return TypeCast(BitLogicOr(
                BitLogicAnd(TypeCast(sign_source, precision=int_precision),
                            1 << (self.precision.bit_size - 1)),
                int_sign_dest),
                            precision=fp_precision)

        x_zero = Test(vx, specifier=Test.IsZero, likely=False)
        y_zero = Test(vy, specifier=Test.IsZero, likely=False)

        comp_sign = Test(vx,
                         vy,
                         specifier=Test.CompSign,
                         tag="comp_sign",
                         debug=debuglx)

        y_nan = Test(vy, specifier=Test.IsNaN, likely=False)

        x_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False)
        y_snan = Test(vy, specifier=Test.IsSignalingNaN, likely=False)

        x_inf = Test(vx, specifier=Test.IsInfty, likely=False, tag="x_inf")
        y_inf = Test(vy,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="y_inf",
                     debug=debugd)

        scheme = None
        gappa_vx, gappa_vy = None, None
        gappa_init_approx = None
        gappa_current_approx = None

        if isinstance(processor, K1B_Processor):
            print "K1B specific generation"

            gappa_vx = vx
            gappa_vy = vy

            fast_init_approx = DivisionSeed(vx,
                                            vy,
                                            precision=self.precision,
                                            tag="fast_init_approx",
                                            debug=debug_lftolx)
            slow_init_approx = DivisionSeed(scaled_vx,
                                            scaled_vy,
                                            precision=self.precision,
                                            tag="slow_init_approx",
                                            debug=debug_lftolx)

            gappa_init_approx = fast_init_approx

            specific_case = bit_match(fast_init_approx,
                                      0,
                                      tag="b0_specific_case_bit",
                                      debug=debugd)
            y_subnormal_or_zero = bit_match(fast_init_approx,
                                            1,
                                            tag="b1_y_sub_or_zero",
                                            debug=debugd)
            x_subnormal_or_zero = bit_match(fast_init_approx,
                                            2,
                                            tag="b2_x_sub_or_zero",
                                            debug=debugd)
            y_inf_or_nan = bit_match(fast_init_approx,
                                     3,
                                     tag="b3_y_inf_or_nan",
                                     debug=debugd)
            inv_underflow = bit_match(fast_init_approx,
                                      4,
                                      tag="b4_inv_underflow",
                                      debug=debugd)
            x_inf_or_nan = bit_match(fast_init_approx,
                                     5,
                                     tag="b5_x_inf_or_nan",
                                     debug=debugd)
            mult_error_underflow = bit_match(fast_init_approx,
                                             6,
                                             tag="b6_mult_error_underflow",
                                             debug=debugd)
            mult_dividend_underflow = bit_match(
                fast_init_approx,
                7,
                tag="b7_mult_dividend_underflow",
                debug=debugd)
            mult_dividend_overflow = bit_match(fast_init_approx,
                                               8,
                                               tag="b8_mult_dividend_overflow",
                                               debug=debugd)
            direct_result_flag = bit_match(fast_init_approx,
                                           9,
                                           tag="b9_direct_result_flag",
                                           debug=debugd)
            div_overflow = bit_match(fast_init_approx,
                                     10,
                                     tag="b10_div_overflow",
                                     debug=debugd)

            # bit11/eb large = bit_match(fast_init_approx, 11)
            # bit12 = bit_match(fast_init_approx, 11)

            #slow_result, slow_result_subnormal, _, _ = compute_div(slow_init_approx, scaled_vx, scaled_vy, scale_result = (ExponentInsertion(ex, tag = "eiy_sr"), ExponentInsertion(-ey, tag ="eiy_sr")))
            slow_result, slow_result_subnormal, _, _ = compute_div(
                slow_init_approx, scaled_vx, scaled_vy, scale_result=ex - ey)
            fast_result, fast_result_subnormal, fast_current_approx, inv_iteration_list = compute_div(
                fast_init_approx, vx, vy, scale_result=None)
            gappa_current_approx = fast_current_approx

            pre_scheme = ConditionBlock(
                NotEqual(specific_case,
                         0,
                         tag="specific_case",
                         likely=True,
                         debug=debugd),
                Return(fast_result),
                ConditionBlock(
                    Equal(direct_result_flag, 0, tag="direct_result_case"),
                    Return(fast_init_approx),
                    ConditionBlock(
                        x_subnormal_or_zero | y_subnormal_or_zero
                        | inv_underflow | mult_error_underflow
                        | mult_dividend_overflow | mult_dividend_underflow,
                        ConditionBlock(
                            x_zero | y_zero,
                            Return(fast_init_approx),
                            ConditionBlock(
                                Test(slow_result, specifier=Test.IsSubnormal),
                                Return(slow_result_subnormal),
                                Return(slow_result)),
                        ),
                        ConditionBlock(
                            x_inf_or_nan,
                            Return(fast_init_approx),
                            ConditionBlock(
                                y_inf_or_nan,
                                Return(fast_init_approx),
                                ConditionBlock(
                                    NotEqual(div_overflow,
                                             0,
                                             tag="div_overflow_case"),
                                    Return(
                                        RoundedSignedOverflow(
                                            fast_init_approx,
                                            tag="signed_inf")),
                                    #Return(extract_and_inject_sign(fast_init_approx, FP_PlusInfty(self.precision) , tag = "signed_inf")),
                                    Return(FP_SNaN(self.precision))))))))

            scheme = Statement(fast_result, pre_scheme)

        else:
            print "generic generation"

            x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False)
            y_inf_or_nan = Test(vy,
                                specifier=Test.IsInfOrNaN,
                                likely=False,
                                tag="y_inf_or_nan",
                                debug=debugd)

            result, subnormal_result, gappa_current_approx, inv_iteration_list = compute_div(
                current_approx_std,
                scaled_vx,
                scaled_vy,
                scale_result=(ExponentInsertion(ex), ExponentInsertion(-ey)))
            gappa_vx = scaled_vx
            gappa_vy = scaled_vy
            gappa_init_approx = init_approx

            # x inf and y inf
            pre_scheme = ConditionBlock(
                x_inf_or_nan,
                ConditionBlock(
                    x_inf,
                    ConditionBlock(
                        y_inf_or_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision)),
                        ),
                        ConditionBlock(comp_sign,
                                       Return(FP_MinusInfty(self.precision)),
                                       Return(FP_PlusInfty(self.precision)))),
                    Statement(ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    x_zero,
                    ConditionBlock(
                        y_zero | y_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))), Return(vx)),
                    ConditionBlock(
                        y_inf_or_nan,
                        ConditionBlock(
                            y_inf,
                            Return(
                                Select(comp_sign, FP_MinusZero(self.precision),
                                       FP_PlusZero(self.precision))),
                            Statement(
                                ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                                Return(FP_QNaN(self.precision)))),
                        ConditionBlock(
                            y_zero,
                            Statement(
                                Raise(ML_FPE_DivideByZero),
                                ConditionBlock(
                                    comp_sign,
                                    Return(FP_MinusInfty(self.precision)),
                                    Return(FP_PlusInfty(self.precision)))),
                            ConditionBlock(
                                Test(result,
                                     specifier=Test.IsSubnormal,
                                     likely=False),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Statement(
                                            Raise(ML_FPE_Inexact,
                                                  ML_FPE_Underflow))),
                                    Return(subnormal_result),
                                ),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Raise(ML_FPE_Inexact)),
                                    Return(result)))))))
            rnd_mode = GetRndMode()
            scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest),
                               yerr_last, SetRndMode(rnd_mode), pre_result,
                               ClearException(), result, pre_scheme)

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=self.precision)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        print "checking processor support"
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)

        print "Gappa script generation"

        cg = CCodeGenerator(processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = exp_implementation.get_definition(cg,
                                                        C_Code,
                                                        static_cst=True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=Interval(0.5, 1))
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
        }
        G1 = Constant(1, precision=ML_Exact)
        exact = G1 / gappa_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = gappa_current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = gappacg.get_interval_code(gappa_goal,
                                               cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-S2**-7, S2**-7))

        try:
            eval_error = execute_gappa_script_extract(
                gappa_code.get(gappacg))["goal"]
            print "eval_error: ", eval_error
        except:
            print "error during gappa run"
Esempio n. 7
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 num_iter = 3,
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "__divsf3.c", 
                 function_name = "__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = precision)
        vx = exp_implementation.add_input_variable("x", precision) 
        vy = exp_implementation.add_input_variable("y", precision) 

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma = False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(self.error, self.approx, self.approx, specifier = FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)


                if self.force_fma:
                    rule0 = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0
                
                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf        = ML_Debug(display_format = "%f")
        debuglf       = ML_Debug(display_format = "%lf")
        debugx        = ML_Debug(display_format = "%x")
        debuglx       = ML_Debug(display_format = "%lx")
        debugd        = ML_Debug(display_format = "%d")
        debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_ddtolx  = ML_Debug(display_format = "%\"PRIx64\" %\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" % (v, v))
        debug_dd      = ML_Debug(display_format = "{.hi=%lf, .lo=%lf}", pre_process = lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Min(ExponentExtraction(vx, tag = "ex", debug = debugd), 1020)
        ey = Min(ExponentExtraction(vy, tag = "ey", debug = debugd), 1020)

        scaling_factor_x = ExponentInsertion(-ex) #ConditionalAllocation(Abs(ex) > 100, -ex, 0)
        scaling_factor_y = ExponentInsertion(-ey) #ConditionalAllocation(Abs(ey) > 100, -ey, 0)

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug = debug_lftolx, tag = "scaled_vx")
        scaled_vy.set_attributes(debug = debug_lftolx, tag = "scaled_vy")

        px = Conversion(scaled_vx, precision = ML_Binary32, tag = "px", debug=debugf) if self.precision != ML_Binary32 else vx
        py = Conversion(scaled_vy, precision = ML_Binary32, tag = "py", debug=debugf) if self.precision != ML_Binary32 else vy

        pre_init_approx = DivisionSeed(px, py, precision = ML_Binary32, tag = "seed", debug = debugf)  
        init_approx = Conversion(pre_init_approx, precision = self.precision, tag = "seedd", debug = debug_lftolx) if self.precision != ML_Binary32 else pre_init_approx

        current_approx = init_approx 
        # correctly-rounded inverse computation
        num_iteration = num_iter
        inv_iteration_list = []

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        for i in range(num_iteration):
            new_iteration = NR_Iteration(current_approx, scaled_vy, force_fma = False if (i != num_iteration - 1) else True)
            inv_iteration_list.append(new_iteration)
            current_approx = new_iteration.get_new_approx()
            current_approx.set_attributes(tag = "iter_%d" % i, debug = debug_lftolx)


        def dividend_mult(div_approx, inv_approx, dividend, divisor, index, force_fma = False):
            yerr = dividend - div_approx * divisor
            #yerr = FMSN(div_approx, divisor, dividend)
            yerr.set_attributes(tag = "yerr%d" % index, debug = debug_lftolx)
            new_div = div_approx + yerr * inv_approx
            #new_div = FMA(yerr, inv_approx, div_approx)
            new_div.set_attributes(tag = "new_div%d" % index, debug = debug_lftolx)
            return new_div

        # multiplication correction iteration
        # to get correctly rounded full division
        current_approx.set_attributes(tag = "final_approx", debug = debug_lftolx)
        current_div_approx = scaled_vx * current_approx
        num_dividend_mult_iteration = 1
        for i in range(num_dividend_mult_iteration):
            current_div_approx = dividend_mult(current_div_approx, current_approx, scaled_vx, scaled_vy, i)


        # last iteration
        yerr_last = FMSN(current_div_approx, scaled_vy, scaled_vx) #, clearprevious = True)
        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()
        last_div_approx = FMA(yerr_last, current_approx, current_div_approx)

        yerr_last.set_attributes(tag = "yerr_last", debug = debug_lftolx)

        pre_result = last_div_approx
        pre_result.set_attributes(tag = "unscaled_div_result", debug = debug_lftolx)
        result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
        result.set_attributes(tag = "result", debug = debug_lftolx)


        x_inf_or_nan = Test(vx, specifier = Test.IsInfOrNaN, likely = False)
        y_inf_or_nan = Test(vy, specifier = Test.IsInfOrNaN, likely = False, tag = "y_inf_or_nan", debug = debugd)
        comp_sign = Test(vx, vy, specifier = Test.CompSign, tag = "comp_sign", debug = debuglx )
        x_zero = Test(vx, specifier = Test.IsZero, likely = False)
        y_zero = Test(vy, specifier = Test.IsZero, likely = False)

        y_nan = Test(vy, specifier = Test.IsNaN, likely = False)

        x_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False)
        y_snan = Test(vy, specifier = Test.IsSignalingNaN, likely = False)

        x_inf = Test(vx, specifier = Test.IsInfty, likely = False, tag = "x_inf")
        y_inf = Test(vy, specifier = Test.IsInfty, likely = False, tag = "y_inf", debug = debugd)

        # determining an extended precision 
        ext_precision_map = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_DoubleDouble,
        }
        ext_precision = ext_precision_map[self.precision]

        ext_pre_result = FMA(yerr_last, current_approx, current_div_approx, precision = ext_precision, tag = "ext_pre_result", debug = debug_ddtolx)
        subnormal_result = None
        if isinstance(ext_precision, ML_Compound_FP_Format):
            subnormal_pre_result = SpecificOperation(ext_pre_result, ex - ey, precision = self.precision, specifier = SpecificOperation.Subnormalize, tag = "subnormal_pre_result", debug = debug_lftolx)
            subnormal_result = (subnormal_pre_result * ExponentInsertion(ex)) * ExponentInsertion(-ey)
        else:
            subnormal_result = Conversion(ext_pre_result * ExponentInsertion(ex - ey, tag = "final_scaling_factor", precision = ext_precision), precision = self.precision)


        # x inf and y inf 
        pre_scheme = ConditionBlock(x_inf_or_nan, 
            ConditionBlock(x_inf,
                ConditionBlock(y_inf_or_nan, 
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision)),
                    ),
                    ConditionBlock(comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))
                ),
                Statement(
                    ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(x_zero,
                ConditionBlock(y_zero | y_nan,
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision))
                    ),
                    Return(vx)
                ),
                ConditionBlock(y_inf_or_nan,
                    ConditionBlock(y_inf,
                        Return(Select(comp_sign, FP_MinusZero(self.precision), FP_PlusZero(self.precision))),
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))
                        )
                    ),
                    ConditionBlock(y_zero,
                        Statement(
                            Raise(ML_FPE_DivideByZero),
                            ConditionBlock(comp_sign, 
                                Return(FP_MinusInfty(self.precision)),
                                Return(FP_PlusInfty(self.precision))
                            )
                        ),
                        ConditionBlock(Test(result, specifier = Test.IsSubnormal, likely = False),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow))
                                ),
                                Return(subnormal_result),
                            ),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Raise(ML_FPE_Inexact)
                                ),
                                Return(result)
                            )
                        )
                    )
                )
            )
        )

        rnd_mode = GetRndMode()
        scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest), yerr_last, SetRndMode(rnd_mode), pre_result, ClearException(), result, pre_scheme)


        processor = target

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)


        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)
        
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed", precision = self.precision, interval = Interval(0.5, 1))
        cg_eval_error_copy_map = {
            init_approx.get_handle().get_node(): seed_var,
            scaled_vx.get_handle().get_node(): Variable("x", precision = self.precision, interval = Interval(1, 2)),
            scaled_vy.get_handle().get_node(): Variable("y", precision = self.precision, interval = Interval(1, 2)),
        }
        G1 = Constant(1, precision = ML_Exact)
        exact = G1 / scaled_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
        gappa_code = gappacg.get_interval_code(gappa_goal, cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact, Interval(-S2**-7, S2**-7))

        eval_error = execute_gappa_script_extract(gappa_code.get(gappacg))["goal"]
        print "eval_error: ", eval_error