def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = hdl_precision_parser("FU%d.%d" % (int_size, frac_size)) output_precision = hdl_precision_parser("FS%d.%d" % (int_size, frac_size)) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_z = self.implementation.add_input_signal("z", input_precision) abstract_formulae = var_x anchor = FixedPointPosition(abstract_formulae, -3, align=FixedPointPosition.FromPointToMSB, tag="anchor") comp = abstract_formulae > anchor result = Select(comp, Conversion(var_x, precision=self.precision), Conversion(var_y, precision=self.precision)) self.implementation.add_output_signal("result", result) return [self.implementation]
def legalize_Select(optree): """ legalize Select operation node by converting if and else inputs to Select output format if the bit sizes do not match """ cond = optree.get_input(0) op0 = optree.get_input(1) op1 = optree.get_input(2) precision = optree.get_precision() if precision is None: Log.report(Log.Error, "None precision for Select:\n{}", optree) if op0.get_precision().get_bit_size() != precision.get_bit_size(): optree.set_input( 1, Conversion( op0, precision = precision ) ) if op1.get_precision().get_bit_size() != precision.get_bit_size(): optree.set_input( 2, Conversion( op1, precision = optree.get_precision() ) ) return optree
def legalize_invsqrt_seed(optree): """ Legalize an InverseSquareRootSeed optree """ assert isinstance(optree, ReciprocalSquareRootSeed) op_prec = optree.get_precision() # input = 1.m_hi-m_lo * 2^e # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5) op_input = optree.get_input(0) convert_back = False approx_prec = ML_Binary32 if op_prec != approx_prec: op_input = Conversion(op_input, precision=ML_Binary32) convert_back = True # TODO: fix integer precision selection # as we are in a late code generation stage, every node's precision # must be set op_exp = ExponentExtraction(op_input, tag="op_exp", debug=debug_multi, precision=ML_Int32) neg_half_exp = Division(Negation(op_exp, precision=ML_Int32), Constant(2, precision=ML_Int32), precision=ML_Int32) approx_exp = ExponentInsertion(neg_half_exp, tag="approx_exp", debug=debug_multi, precision=approx_prec) op_exp_parity = Modulo(op_exp, Constant(2, precision=ML_Int32), precision=ML_Int32) approx_exp_correction = Select(Equal(op_exp_parity, Constant(0, precision=ML_Int32)), Constant(1.0, precision=approx_prec), Select(Equal( op_exp_parity, Constant(-1, precision=ML_Int32)), Constant(S2**0.5, precision=approx_prec), Constant(S2**-0.5, precision=approx_prec), precision=approx_prec), precision=approx_prec, tag="approx_exp_correction", debug=debug_multi) table_index = invsqrt_approx_table.get_index_function()(op_input) table_index.set_attributes(tag="invsqrt_index", debug=debug_multi) approx = Multiplication(TableLoad(invsqrt_approx_table, table_index, precision=approx_prec), Multiplication(approx_exp_correction, approx_exp, precision=approx_prec), tag="invsqrt_approx", debug=debug_multi, precision=approx_prec) if approx_prec != op_prec: return Conversion(approx, precision=op_prec) else: return approx
def sw_legalize_concatenation(node): """ Legalize a RTL Concatenation node into a sub-graph of operation compatible with software implementation """ assert len(node.inputs) == 2 lhs = node.get_input(0) rhs = node.get_input(1) return BitLogicOr(BitLogicLeftShift(Conversion( lhs, precision=node.get_precision()), rhs.get_precision().get_bit_size(), precision=node.get_precision()), Conversion(rhs, precision=node.get_precision()), precision=node.get_precision())
def legalize_reciprocal_seed(optree): """ Legalize an ReciprocalSeed optree """ assert isinstance(optree, ReciprocalSeed) op_prec = optree.get_precision() initial_prec = op_prec back_convert = False op_input = optree.get_input(0) INV_APPROX_TABLE_FORMAT = generic_inv_approx_table.get_storage_precision() if op_prec != INV_APPROX_TABLE_FORMAT: op_input = Conversion(op_input, precision=INV_APPROX_TABLE_FORMAT) op_prec = INV_APPROX_TABLE_FORMAT back_convert = True # input = 1.m_hi-m_lo * 2^e # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5) # TODO: fix integer precision selection # as we are in a late code generation stage, every node's precision # must be set int_prec = op_prec.get_integer_format() op_sign = CopySign(op_input, Constant(1.0, precision=op_prec), precision=op_prec) op_exp = ExponentExtraction(op_input, tag="op_exp", debug=debug_multi, precision=int_prec) neg_exp = Negation(op_exp, precision=int_prec) approx_exp = ExponentInsertion(neg_exp, tag="approx_exp", debug=debug_multi, precision=op_prec) table_index = generic_inv_approx_table.get_index_function()(op_input) table_index.set_attributes(tag="inv_index", debug=debug_multi) approx = Multiplication(TableLoad(generic_inv_approx_table, table_index, precision=op_prec), Multiplication(approx_exp, op_sign, precision=op_prec), tag="inv_approx", debug=debug_multi, precision=op_prec) if back_convert: return Conversion(approx, precision=initial_prec) else: return approx
def propagate_format_to_input(new_format, optree, input_index_list): """ Propgate new_format to @p optree's input whose index is listed in @p input_index_list """ for op_index in input_index_list: op_input = optree.get_input(op_index) if op_input.get_precision() is None: op_input.set_precision(new_format) index_list = does_node_propagate_format(op_input) propagate_format_to_input(new_format, op_input, index_list) elif not test_format_equality(new_format, op_input.get_precision()): if is_constant(op_input): if not is_fixed_point(new_format): Log.report( Log.Error, "format {} during propagation to input {} of {} is not a fixed-point format", new_format, op_input, optree) elif format_does_fit(op_input, new_format): Log.report( Log.Info, "Simplify Constant Conversion {} to larger Constant: {}", op_input.get_str(display_precision=True) if Log.is_level_enabled(Log.Info) else "", str(new_format)) new_input = op_input.copy() new_input.set_precision(new_format) optree.set_input(op_index, new_input) else: Log.report( Log.Error, "Constant is about to be reduced to a too constrained format: {}", op_input.get_str(display_precision=True) if Log.is_level_enabled(Log.Error) else "") else: new_input = Conversion(op_input, precision=new_format) optree.set_input(op_index, new_input)
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "input_precision is {}".format(self.input_precision)) Log.report(Log.Info, "output_precision is {}".format(self.output_precision)) # declaring main input variable var_x = self.implementation.add_input_signal("x", self.input_precision) var_y = self.implementation.add_input_signal("y", self.input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) self.implementation.start_new_stage() add = var_x + var_y self.implementation.start_new_stage() sub = add - var_y self.implementation.start_new_stage() pre_result = sub - var_x self.implementation.start_new_stage() post_result = pre_result + var_x result = Conversion(pre_result, precision=self.output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def generate_dummy_scheme(self): Log.report( Log.Info, "generating MultArray with output precision {precision}".format( precision=self.precision)) acc = None a_inputs = {} b_inputs = {} stage_map = self.instanciate_inputs() stage_index_list = sorted(stage_map.keys()) for stage_id in stage_index_list: # synchronizing pipeline stage if stage_id is None: pass else: while stage_id > self.implementation.get_current_stage(): self.implementation.start_new_stage() operation_list = stage_map[stage_id] for ctor, operand_list in operation_list: new_term = ctor(*tuple(operand_list)) if acc is None: acc = new_term else: acc = Addition(acc, new_term) result = Conversion(acc, precision=self.precision) self.implementation.add_output_signal("result_o", result) return [self.implementation]
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug = debug_fixed) var_y.set_attributes(debug = debug_fixed) sub = var_x - var_y c = Constant(0) self.implementation.start_new_stage() #pre_result = Select( # c > sub, # c, # sub #) pre_result = Max(0, sub) self.implementation.start_new_stage() result = Conversion(pre_result + var_x, precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug = debug_fixed) var_y.set_attributes(debug = debug_fixed) test = (var_x > 1) test.set_attributes(tag = "test", debug = debug_std) large_add = (var_x + var_y) pre_result = Select( test, 1, large_add, tag = "pre_result", debug = debug_fixed ) result = Conversion(pre_result, precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def get_output_check_statement(output_signal, output_tag, output_value): """ Generate output value check statement """ test_pass_cond = Comparison( output_signal, output_value, specifier=Comparison.Equal, precision=ML_Bool ) check_statement = ConditionBlock( LogicalNot( test_pass_cond, precision = ML_Bool ), Report( Concatenation( " result for {}: ".format(output_tag), Conversion( output_signal if output_signal.get_precision() is ML_StdLogic else TypeCast( output_signal, precision=ML_StdLogicVectorFormat( output_signal.get_precision().get_bit_size() ) ), precision = ML_String ), precision = ML_String ) ) ) return test_pass_cond, check_statement
def legalize_integer_nearest(optree): """ transform a NearestInteger node floating-point to integer into a sequence of floating-point NearestInteger and Conversion. This conversion is lossy """ op_input = optree.get_input(0) int_precision = { v4float32: v4int32, ML_Binary32: ML_Int32 }[optree.get_precision()] return Conversion(NearestInteger(op_input, precision=int_precision), precision=optree.get_precision())
def signal_str_conversion(optree, op_format): """ converision of @p optree from op_format to ML_String """ return Conversion( optree if op_format is ML_StdLogic else TypeCast( optree, precision=ML_StdLogicVectorFormat( op_format.get_bit_size() ) ), precision=ML_String )
def simplify_inverse(optree, processor): dummy_var = Variable("dummy_var_seed", precision = optree.get_precision()) dummy_div_seed = DivisionSeed(dummy_var, precision = optree.get_precision()) inv_approx_table = processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map) seed_input = optree.inputs[0] c0 = Constant(0, precision = ML_Int32) if optree.get_precision() == inv_approx_table.get_storage_precision(): return TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = optree.get_precision()) else: return Conversion(TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = inv_approx_table.get_storage_precision()), precision = optree.get_precision())
def insert_conversion_when_required(op_input, final_precision): """ Generate a conversion of op_input to format final_precision if required :param op_input: input operation node :type op_input: ML_Operation :param final_precision: target format :type final_precision: ML_Format :return: op_input converted to final_precision if required :rtype: ML_Operation """ # assert not final_precision is None if op_input.get_precision() != final_precision: return Conversion(op_input, precision=final_precision) else: return op_input
def generate_scheme(self): # declaring function input variable vx = self.implementation.add_input_variable("x", self.get_input_precision(0)) bf16_params = ML_NewTable(dimensions=[self.table_size], storage_precision=BFloat16) for i in range(self.table_size): bf16_params[i] = 1.1**i conv_vx = Conversion(TableLoad(bf16_params, vx), precision=ML_Binary32, tag="conv_vx", debug=debug_multi) result = conv_vx scheme = Return(result, precision=self.precision, debug=debug_multi) return scheme
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) test = (var_x > 1) test.set_attributes(tag="test", debug=debug_std) sub = var_x - var_y c = Constant(0) pre_result_select = Select(c > sub, Select(c < var_y, sub, Select(LogicalAnd( c > var_x, c < var_y, tag="last_lev_cond"), var_x, c, tag="last_lev_sel"), tag="pre_select"), var_y, tag="pre_result_select") pre_result = Max(0, var_x - var_y, tag="pre_result") result = Conversion(Addition(pre_result, pre_result_select, tag="add"), precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def legalize_vector_reduction_test(optree): """ Legalize a vector test (e.g. IsMaskNotAnyZero) to a sub-graph of basic operations """ op_input = optree.get_input(0) vector_size = op_input.get_precision().get_vector_size() conv_format = { 2: v2int32, 4: v4int32, 8: v8int32, }[vector_size] cast_format = { 2: ML_Int64, 4: ML_Int128, 8: ML_Int256, }[vector_size] return Comparison(TypeCast(Conversion(op_input, precision=conv_format), precision=cast_format), Constant(0, precision=cast_format), specifier=Comparison.Equal, precision=ML_Bool)
def comp_3to2(a, b, c): """ 3 digits to 2 digits compressor """ s = BitLogicXor(a, BitLogicXor(b, c, precision=ML_StdLogic), precision=ML_StdLogic) c = BitLogicOr(BitLogicAnd(a, b, precision=ML_StdLogic), BitLogicOr(BitLogicAnd(a, c, precision=ML_StdLogic), BitLogicAnd(c, b, precision=ML_StdLogic), precision=ML_StdLogic), precision=ML_StdLogic) return c, s a = TypeCast(a, precision=fixed_point(1, 0, signed=False)) b = TypeCast(b, precision=fixed_point(1, 0, signed=False)) c = TypeCast(c, precision=fixed_point(1, 0, signed=False)) full = TypeCast(Conversion(a + b + c, precision=fixed_point(2, 0, signed=False)), precision=ML_StdLogicVectorFormat(2)) carry = BitSelection(full, 1) digit = BitSelection(full, 0) return carry, digit
def generate_bitfield_extraction(target_format, input_node, lo_index, hi_index): shift = lo_index mask_size = hi_index - lo_index + 1 input_format = input_node.get_precision().get_base_format() if is_fixed_point(input_format) and is_fixed_point(target_format): frac_size = target_format.get_frac_size() int_size = input_format.get_bit_size() - frac_size cast_format = ML_Custom_FixedPoint_Format(int_size, frac_size, signed=False) else: cast_format = None # 1st step: shifting the input node the right amount shifted_node = input_node if shift == 0 else BitLogicRightShift( input_node, Constant(shift, precision=ML_Int32), precision=input_format) raw_format = ML_Custom_FixedPoint_Format(input_format.get_bit_size(), 0, signed=False) # 2nd step: masking the input node # TODO/FIXME: check thast mask does not overflow or wrap-around masked_node = BitLogicAnd(TypeCast(shifted_node, precision=raw_format), Constant((2**mask_size - 1), precision=raw_format), precision=raw_format) if not cast_format is None: casted_node = TypeCast(masked_node, precision=cast_format) else: casted_node = masked_node converted_node = Conversion(casted_node, precision=target_format) return converted_node
def generate_exp_insertion(optree, result_precision): """ generate the expanded version of ExponentInsertion with @p optree as input and assuming @p result_precision as output precision """ if result_precision.is_vector_format(): scalar_format = optree.precision.get_scalar_format() vector_size = optree.precision.get_vector_size() # determine the working format (for expression) work_format = VECTOR_TYPE_MAP[result_precision.get_scalar_format(). get_integer_format()][vector_size] bias_cst = [-result_precision.get_scalar_format().get_bias() ] * vector_size shift_cst = [result_precision.get_scalar_format().get_field_size() ] * vector_size else: scalar_format = optree.precision work_format = result_precision.get_integer_format() bias_cst = -result_precision.get_bias() shift_cst = result_precision.get_field_size() if not is_std_integer_format(scalar_format): Log.report( Log.Error, "{} should be a std integer format in generate_exp_insertion {} with precision {}", scalar_format, optree, result_precision) assert is_std_integer_format(scalar_format) biased_exponent = Addition(Conversion(optree, precision=work_format) if not optree.precision is work_format else optree, Constant(bias_cst, precision=work_format), precision=work_format) result = BitLogicLeftShift(biased_exponent, Constant( shift_cst, precision=work_format, ), precision=work_format) return TypeCast(result, precision=result_precision)
def generate_scalar_scheme(self, vx, n): # fixing inputs' node tag vx.set_attributes(tag="x") n.set_attributes(tag="n") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # rootn(x, n) = x^(1/n) # = exp(1/n * log(x)) # = 2^(1/n * log2(x)) # = 2^(1/n * (log2(m) + e)) # # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) use_reciprocal = False # non-scaled vx used to compute vx^1 unmodified_vx = vx is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal") exp_correction_factor = self.precision.get_mantissa_size() mantissa_factor = Constant(2**exp_correction_factor, tag="mantissa_factor") vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx") m = MantissaExtraction(vx, tag="m", precision=self.precision) e = ExponentExtraction(vx, tag="e", precision=int_precision) e = Select(is_subnormal, e - exp_correction_factor, e, tag="corrected_e") ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table( log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split( Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) # floating-point version of n n_f = Conversion(n, precision=self.precision, tag="n_f") inv_n = Division(Constant(1, precision=self.precision), n_f) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) if use_reciprocal: r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi) else: r = Division(log_approx, n_f, tag="r", debug=debug_multi) # e_n ~ e / n e_f = Conversion(e, precision=self.precision, tag="e_f") if use_reciprocal: e_n = Multiplication(e_f, inv_n, tag="e_n") else: e_n = Division(e_f, n_f, tag="e_n") error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n") e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int") pre_e_n_frac = e_n - e_n_int pre_e_n_frac.set_attributes(tag="pre_e_n_frac") e_n_frac = pre_e_n_frac + error_e_n * inv_n e_n_frac.set_attributes(tag="e_n_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True) exp2_r.set_attributes(tag="exp2_r", debug=debug_multi) exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac, inline_select=True) exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi) exp2_e_n_int = ExponentInsertion(Conversion(e_n_int, precision=int_precision), precision=self.precision, tag="exp2_e_n_int") n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi) n_is_odd = LogicalNot(n_is_even, tag="n_is_odd") result_sign = Select( n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) # managing n == -1 if self.expand_div: ml_division_args = ML_Division.get_default_args( precision=self.precision, input_formats=[self.precision] * 2) ml_division = ML_Division(ml_division_args) self.division_implementation = ml_division.implementation self.division_implementation.set_scheme( ml_division.generate_scheme()) ml_division_fct = self.division_implementation.get_function_object( ) else: ml_division_fct = Division # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = ConditionBlock( LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)), LogicalAnd(n_is_even, vx < 0)), Return(FP_QNaN(self.precision)), Statement( ConditionBlock( Equal(n, -1, tag="n_is_mone"), #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), Return( ml_division_fct(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), ), ConditionBlock( # rootn( ±inf, n) is +∞ for even n< 0. Test(vx, specifier=Test.IsInfty), Statement( ConditionBlock( n < 0, #LogicalAnd(n_is_odd, n < 0), Return( Select(Test(vx, specifier=Test.IsPositiveInfty), Constant(FP_PlusZero(self.precision), precision=self.precision), Constant(FP_MinusZero(self.precision), precision=self.precision), precision=self.precision)), Return(vx), ), ), ), ConditionBlock( # rootn(±0, n) is ±∞ for odd n < 0. LogicalAnd(LogicalAnd(n_is_odd, n < 0), Equal(vx, 0), tag="n_is_odd_and_neg"), Return( Select(Test(vx, specifier=Test.IsPositiveZero), Constant(FP_PlusInfty(self.precision), precision=self.precision), Constant(FP_MinusInfty(self.precision), precision=self.precision), precision=self.precision)), ), ConditionBlock( # rootn( ±0, n) is +∞ for even n< 0. LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)), Return(FP_PlusInfty(self.precision))), ConditionBlock( # rootn(±0, n) is +0 for even n > 0. LogicalAnd(n_is_even, Equal(vx, 0)), Return(vx)), ConditionBlock( Equal(n, 1), Return(unmodified_vx), Return(result_sign * exp2_r * exp2_e_n_int * exp2_e_n_frac)))) return result
def piecewise_approximation(function, variable, precision, bound_low=-1.0, bound_high=1.0, num_intervals=16, max_degree=2, error_threshold=S2**-24, odd=False, even=False): """ Generate a piecewise approximation :param function: function to be approximated :type function: SollyaObject :param variable: input variable :type variable: Variable :param precision: variable's format :type precision: ML_Format :param bound_low: lower bound for the approximation interval :param bound_high: upper bound for the approximation interval :param num_intervals: number of sub-interval / sub-division of the main interval :param max_degree: maximum degree for an approximation on any sub-interval :param error_threshold: error bound for an approximation on any sub-interval :return: pair (scheme, error) where scheme is a graph node for an approximation scheme of function evaluated at variable, and error is the maximum approximation error encountered :rtype tuple(ML_Operation, SollyaObject): """ degree_generator = piecewise_approximation_degree_generator( function, bound_low, bound_high, num_intervals=num_intervals, error_threshold=error_threshold, ) degree_list = list(degree_generator) # if max_degree is None then we determine it locally if max_degree is None: max_degree = max(degree_list) # table to store coefficients of the approximation on each segment coeff_table = ML_NewTable( dimensions=[num_intervals, max_degree + 1], storage_precision=precision, tag="coeff_table", const=True # by default all approximation coeff table are const ) error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai) max_approx_error = 0.0 interval_size = (bound_high - bound_low) / num_intervals for i in range(num_intervals): subint_low = bound_low + i * interval_size subint_high = bound_low + (i + 1) * interval_size local_function = function(sollya.x + subint_low) local_interval = Interval(-interval_size, interval_size) local_degree = degree_list[i] if local_degree > max_degree: Log.report( Log.Warning, "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation", local_degree, max_degree) # as max_degree defines the size of the table we can use # it as the degree for each sub-interval polynomial # as there is nothing to gain (yet) by using a smaller polynomial degree = max_degree # min(max_degree, local_degree) if function(subint_low) == 0.0: # if the lower bound is a zero to the function, we # need to force value=0 for the constant coefficient # and extend the approximation interval local_poly_degree_list = list( range(1 if even else 0, degree + 1, 2 if odd or even else 1)) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function(sollya.x) / sollya.x, local_poly_degree_list, [precision] * len(local_poly_degree_list), Interval(-subint_high * 0.95, subint_high), sollya.absolute, error_function=error_function) # multiply by sollya.x poly_object = poly_object.sub_poly(offset=-1) else: try: poly_object, approx_error = Polynomial.build_from_approximation_with_error( local_function, degree, [precision] * (degree + 1), local_interval, sollya.absolute, error_function=error_function) except SollyaError as err: # try to see if function is constant on the interval (possible # failure cause for fpminmax) cst_value = precision.round_sollya_object( function(subint_low), sollya.RN) accuracy = error_threshold diff_with_cst_range = sollya.supnorm(cst_value, local_function, local_interval, sollya.absolute, accuracy) diff_with_cst = sup(abs(diff_with_cst_range)) if diff_with_cst < error_threshold: Log.report(Log.Info, "constant polynomial detected") poly_object = Polynomial([function(subint_low)] + [0] * degree) approx_error = diff_with_cst else: Log.report( Log.error, "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ", degree, i, diff_with_cst, error_threshold, error=err) for ci in range(max_degree + 1): if ci in poly_object.coeff_map: coeff_table[i][ci] = poly_object.coeff_map[ci] else: coeff_table[i][ci] = 0.0 if approx_error > error_threshold: Log.report( Log.Warning, "piecewise_approximation on index {} exceeds error threshold: {} > {}", i, approx_error, error_threshold) max_approx_error = max(max_approx_error, abs(approx_error)) # computing offset diff = Subtraction(variable, Constant(bound_low, precision=precision), tag="diff", debug=debug_multi, precision=precision) int_prec = precision.get_integer_format() # delta = bound_high - bound_low delta_ratio = Constant(num_intervals / (bound_high - bound_low), precision=precision) # computing table index # index = nearestint(diff / delta * <num_intervals>) index = Max(0, Min( NearestInteger( Multiplication(diff, delta_ratio, precision=precision), precision=int_prec, ), num_intervals - 1), tag="index", debug=debug_multi, precision=int_prec) poly_var = Subtraction(diff, Multiplication( Conversion(index, precision=precision), Constant(interval_size, precision=precision)), precision=precision, tag="poly_var", debug=debug_multi) # generating indexed polynomial coeffs = [(ci, TableLoad(coeff_table, index, ci)) for ci in range(max_degree + 1)][::-1] poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2( coeffs, poly_var, precision, {}, precision) return poly_scheme, max_approx_error
def check_processor_support(self, optree, memoization_map=None, debug=False, language=C_Code): """ check if all precision-instantiated operation are supported by the processor """ memoization_map = memoization_map if not memoization_map is None else {} if debug: print("checking processor support: ", self.processor.__class__) # Debug print if optree in memoization_map: return True if not isinstance(optree, ML_LeafNode): for inp in optree.inputs: self.check_processor_support(inp, memoization_map, debug=debug, language=language) if isinstance(optree, ConditionBlock): self.check_processor_support(optree.get_pre_statement(), memoization_map, debug=debug, language=language) pass elif isinstance(optree, Statement): pass elif isinstance(optree, Loop): pass elif isinstance(optree, Return): pass elif isinstance(optree, ReferenceAssign): pass elif isinstance(optree, PlaceHolder): pass elif isinstance(optree, SwitchBlock): #self.check_processor_support(optree.get_pre_statement(), memoization_map) for op in optree.get_extra_inputs(): # TODO: assert case is integer constant self.check_processor_support(op, memoization_map, debug=debug, language=language) elif not self.processor.is_supported_operation( optree, debug=debug, language=language): # trying operand format escalation init_optree = optree old_list = optree.inputs while False: #optree.__class__ in type_escalation: match_found = False for result_type_cond in type_escalation[optree.__class__]: if result_type_cond(optree.get_precision()): for op_index in range(len(optree.inputs)): op = optree.inputs[op_index] for op_type_cond in type_escalation[ optree.__class__][result_type_cond]: if op_type_cond(op.get_precision()): new_type = type_escalation[ optree. __class__][result_type_cond][ op_type_cond](optree) if op.get_precision() != new_type: # conversion insertion input_list = list(optree.inputs) input_list[op_index] = Conversion( op, precision=new_type) optree.inputs = tuple(input_list) match_found = True break break if not match_found: break # checking final processor support if not self.processor.is_supported_operation(optree): # look for possible simplification if self.has_support_simplification(optree): simplified_tree = self.get_support_simplification( optree) Log.report( Log.Verbose, "simplifying %s" % optree.get_str(depth=2, display_precision=True)) Log.report( Log.Verbose, "into %s" % simplified_tree.get_str( depth=2, display_precision=True)) optree.change_to(simplified_tree) if self.processor.is_supported_operation(optree): memoization_map[optree] = True return True print(optree) # Error print print("pre escalation: ", old_list) # Error print print(self.processor.get_operation_keys( optree)) # Error print print( optree.get_str(display_precision=True, display_id=True, memoization_map={})) # Error print Log.report(Log.Error, "unsupported operation\n") # memoization memoization_map[optree] = True return True
def recursive_support_check(optree): if optree in memoization_map: return True elif not isinstance(optree, ML_LeafNode): # memoization memoization_map[optree] = True for inp in optree.inputs: recursive_support_check(inp) if isinstance(optree, ConditionBlock): pass elif isinstance(optree, Statement): pass elif isinstance(optree, ConditionalBranch): pass elif isinstance(optree, UnconditionalBranch): pass elif isinstance(optree, BasicBlock): pass elif isinstance(optree, PhiNode): pass elif isinstance(optree, Loop): pass elif isinstance(optree, Return): pass elif isinstance(optree, ReferenceAssign): pass elif isinstance(optree, PlaceHolder): pass elif isinstance(optree, SwitchBlock): for op in optree.get_extra_inputs(): # TODO: assert case is integer constant recursive_support_check(op) elif not processor.is_supported_operation( optree, debug=debug, language=language): # trying operand format escalation init_optree = optree old_list = optree.inputs while False: #optree.__class__ in type_escalation: match_found = False for result_type_cond in type_escalation[ optree.__class__]: if result_type_cond(optree.get_precision()): for op_index in range(len(optree.inputs)): op = optree.inputs[op_index] for op_type_cond in type_escalation[ optree. __class__][result_type_cond]: if op_type_cond(op.get_precision()): new_type = type_escalation[ optree. __class__][result_type_cond][ op_type_cond](optree) if op.get_precision() != new_type: # conversion insertion input_list = list( optree.inputs) input_list[ op_index] = Conversion( op, precision=new_type) optree.inputs = tuple( input_list) match_found = True break break if not match_found: break # checking final processor support if not processor.is_supported_operation(optree, language=language): # look for possible simplification if has_support_simplification(optree): simplified_tree = get_support_simplification( optree, processor) Log.report( Log.Verbose, "simplifying %s" % optree.get_str( depth=2, display_precision=True)) Log.report( Log.Verbose, "into %s" % simplified_tree.get_str( depth=2, display_precision=True)) optree.change_to(simplified_tree) if processor.is_supported_operation( optree, language=language): memoization_map[optree] = True return True print("pre escalation node is: ", old_list) # Error print print("languages is {}".format(language)) print("Operation' keys are: {}".format( processor.get_operation_keys( optree))) # Error print print("Operation tree is: \n", optree.get_str( display_precision=True, depth=1, display_id=True, memoization_map=None)) # Error print Log.report( Log.Error, "unsupported operation in PassCheckProcessorSupport's check_processor_support {}:\n{}", processor, optree) else: # memoization memoization_map[optree] = True return True
def generate_scheme(self): # declaring function input variable v_x = [ self.implementation.add_input_variable( "x%d" % index, self.get_input_precision(index)) for index in range(self.arity) ] double_format = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble }[self.precision] # testing Add211 exact_add = Addition(v_x[0], v_x[1], precision=double_format, tag="exact_add") # testing Mul211 exact_mul = Multiplication(v_x[0], v_x[1], precision=double_format, tag="exact_mul") # testing Sub211 exact_sub = Subtraction(v_x[1], v_x[0], precision=double_format, tag="exact_sub") # testing Add222 multi_add = Addition(exact_add, exact_sub, precision=double_format, tag="multi_add") # testing Mul222 multi_mul = Multiplication(multi_add, exact_mul, precision=double_format, tag="multi_mul") # testing Add221 and Add212 and Sub222 multi_sub = Subtraction(Addition(exact_sub, v_x[1], precision=double_format, tag="add221"), Addition(v_x[0], multi_mul, precision=double_format, tag="add212"), precision=double_format, tag="sub222") # testing Mul212 and Mul221 mul212 = Multiplication(multi_sub, v_x[0], precision=double_format, tag="mul212") mul221 = Multiplication(exact_mul, v_x[1], precision=double_format, tag="mul221") # testing Sub221 and Sub212 sub221 = Subtraction(mul212, mul221.hi, precision=double_format, tag="sub221") sub212 = Subtraction(sub221, mul212.lo, precision=double_format, tag="sub212") # testing FMA2111 fma2111 = FMA(sub221.lo, sub212.hi, mul221.hi, precision=double_format, tag="fma2111") # testing FMA2112 fma2112 = FMA(fma2111.lo, fma2111.hi, fma2111, precision=double_format, tag="fma2112") # testing FMA2212 fma2212 = FMA(fma2112, fma2112.hi, fma2112, precision=double_format, tag="fma2212") # testing FMA2122 fma2122 = FMA(fma2212.lo, fma2212, fma2212, precision=double_format, tag="fma2122") # testing FMA22222 fma2222 = FMA(fma2122, fma2212, fma2111, precision=double_format, tag="fma2222") # testing Add122 add122 = Addition(fma2222, fma2222, precision=self.precision, tag="add122") # testing Add112 add112 = Addition(add122, fma2222, precision=self.precision, tag="add112") # testing Add121 add121 = Addition(fma2222, add112, precision=self.precision, tag="add121") # testing subnormalization multi_subnormalize = SpecificOperation( Addition(add121, add112, precision=double_format), Constant(3, precision=self.precision.get_integer_format()), specifier=SpecificOperation.Subnormalize, precision=double_format, tag="multi_subnormalize") result = Conversion(multi_subnormalize, precision=self.precision) scheme = Statement(Return(result)) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) table_size_log = self.table_size_log integer_size = 31 integer_precision = ML_Int32 max_bound = sup(abs(self.input_intervals[0])) max_bound_log = int(ceil(log2(max_bound))) Log.report(Log.Info, "max_bound_log=%s " % max_bound_log) scaling_power = integer_size - max_bound_log Log.report(Log.Info, "scaling power: %s " % scaling_power) storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True) Log.report(Log.Info, "tabulating cosine and sine") # cosine and sine fused table fused_table = ML_NewTable( dimensions=[2**table_size_log, 2], storage_precision=storage_precision, tag="fast_lib_shared_table") # self.uniquify_name("cossin_table")) # filling table for i in range(2**table_size_log): local_x = i / S2**table_size_log * S2**max_bound_log cos_local = cos( local_x ) # nearestint(cos(local_x) * S2**storage_precision.get_frac_size()) sin_local = sin( local_x ) # nearestint(sin(local_x) * S2**storage_precision.get_frac_size()) fused_table[i][0] = cos_local fused_table[i][1] = sin_local # argument reduction evaluation scheme # scaling_factor = Constant(S2**scaling_power, precision = self.precision) red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power, scaling_power, signed=True) Log.report( Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" % red_vx_precision.get_c_bit_size()) # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision) red_vx = Conversion(vx, precision=red_vx_precision, tag="red_vx", debug=debug_fixed32) computation_precision = red_vx_precision # self.precision output_precision = self.get_output_precision() Log.report(Log.Info, "computation_precision is %s" % computation_precision) Log.report(Log.Info, "storage_precision is %s" % storage_precision) Log.report(Log.Info, "output_precision is %s" % output_precision) hi_mask_value = 2**32 - 2**(32 - table_size_log - 1) hi_mask = Constant(hi_mask_value, precision=ML_Int32) Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value) red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32), hi_mask, precision=ML_Int32, tag="red_vx_hi_int", debug=debugd) red_vx_hi = TypeCast(red_vx_hi_int, precision=red_vx_precision, tag="red_vx_hi", debug=debug_fixed32) red_vx_lo = red_vx - red_vx_hi red_vx_lo.set_attributes(precision=red_vx_precision, tag="red_vx_lo", debug=debug_fixed32) table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32), scaling_power - (table_size_log - max_bound_log), precision=ML_Int32, tag="table_index", debug=debugd) tabulated_cos = TableLoad(fused_table, table_index, 0, tag="tab_cos", precision=storage_precision, debug=debug_fixed32) tabulated_sin = TableLoad(fused_table, table_index, 1, tag="tab_sin", precision=storage_precision, debug=debug_fixed32) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building polynomial approximation for cosine") # cosine polynomial approximation poly_interval = Interval(0, S2**(max_bound_log - table_size_log)) Log.report(Log.Info, "poly_interval=%s " % poly_interval) cos_poly_degree = 2 # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal))) Log.report(Log.Verbose, "cosine polynomial approximation") cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error( cos(sollya.x), [0, 2], [0] + [computation_precision.get_bit_size()], poly_interval, sollya.absolute, error_function=error_function) #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision) Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error) cos_coeff_list = cos_poly_object.get_ordered_coeff_list() coeff_C0 = cos_coeff_list[0][1] coeff_C2 = Constant(cos_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) Log.report(Log.Info, "building polynomial approximation for sine") # sine polynomial approximation sin_poly_degree = 2 # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal))) Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree) Log.report(Log.Verbose, "sine polynomial approximation") sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error( sin(sollya.x) / sollya.x, [0, 2], [0] + [computation_precision.get_bit_size()] * (sin_poly_degree + 1), poly_interval, sollya.absolute, error_function=error_function) sin_coeff_list = sin_poly_object.get_ordered_coeff_list() coeff_S0 = sin_coeff_list[0][1] coeff_S2 = Constant(sin_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) # scheme selection between sine and cosine if self.cos_output: scheme = self.generate_cos_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) else: scheme = self.generate_sin_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) result = Conversion(scheme, precision=self.get_output_precision()) Log.report( Log.Verbose, "result operation tree :\n %s " % result.get_str( display_precision=True, depth=None, memoization_map={})) scheme = Statement(Return(result)) return scheme
def insert_conversion_when_required(op_input, final_precision): if op_input.get_precision() != final_precision: return Conversion(op_input, precision=final_precision) else: return op_input
def generate_scalar_scheme(self, vx, vy): # fixing inputs' node tag vx.set_attributes(tag="x") vy.set_attributes(tag="y") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # pow(x, n) = x^(y) # = exp(y * log(x)) # = 2^(y * log2(x)) # = 2^(y * (log2(m) + e)) # e = ExponentExtraction(vx, tag="e", precision=int_precision) m = MantissaExtraction(vx, tag="m", precision=self.precision) # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter= lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) r = Multiplication(log_approx, vy, tag="r", debug=debug_multi) # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e) # # log_approx = log2(Abs(m)) # r = y * log_approx ~ y * log2(m) # # NOTES: manage cases where e is negative and # (y * log2(m)) AND (y * e) could cancel out # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT # be of opposite signs # log2(m) in [0, 1[ so cancellation can occur only if e == -1 # we split 2^x in 2^x = 2^t0 * 2^t1 # if e < 0: t0 = y * (log2(m) + e), t1=0 # else: t0 = y * log2(m), t1 = y * e t_cond = e < 0 # e_y ~ e * y e_f = Conversion(e, precision=self.precision) #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0") #NearestInteger(t0, precision=self.precision, tag="t0_int") EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision) LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision) t0_int = Select(t_cond, EY + LY, EY, tag="t0_int") t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac") #t0_frac.set_attributes(tag="t0_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True) exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi) exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int") t1 = Select(t_cond, Constant(0, precision=self.precision), r) exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True) exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi) result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) y_int = NearestInteger(vy, precision=self.precision) y_is_integer = Equal(y_int, vy) y_is_even = LogicalOr( # if y is a number (exc. inf) greater than 2**mantissa_size * 2, # then it is an integer multiple of 2 => even Abs(vy) >= 2**(self.precision.get_mantissa_size()+1), LogicalAnd( y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1), # we want to limit the modulo computation to an integer input Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0) ) ) y_is_odd = LogicalAnd( LogicalAnd( Abs(vy) < 2**(self.precision.get_mantissa_size()+1), y_is_integer ), Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1) ) # special cases management special_case_results = Statement( # x is sNaN OR y is sNaN ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)), Return(FP_QNaN(self.precision)) ), # pow(x, ±0) is 1 if x is not a signaling NaN ConditionBlock( Test(vy, specifier=Test.IsZero), Return(Constant(1.0, precision=self.precision)) ), # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)), Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))), ), # pow(±0, −∞) is +∞ with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_MinusInfty(self.precision)), ), # pow(±0, +∞) is +0 with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is ±0 for finite y>0 an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)), Return(vx), ), # pow(−1, ±∞) is 1 with no exception ConditionBlock( LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)), Return(Constant(1.0, precision=self.precision)), ), # pow(+1, y) is 1 for any y (even a quiet NaN) ConditionBlock( vx == 1, Return(Constant(1.0, precision=self.precision)), ), # pow(x, +∞) is +0 for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusZero(self.precision)) ), # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +∞ for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +0 for a number y < 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +∞ for a number y > 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0), Return(FP_PlusInfty(self.precision)) ), # pow(−∞, y) is −0 for finite y < 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)), Return(FP_MinusZero(self.precision)), ), # pow(−∞, y) is −∞ for finite y > 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)), Return(FP_MinusInfty(self.precision)), ), # pow(−∞, y) is +0 for finite y < 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusZero(self.precision)), ), # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer # TODO: signal divideByZero exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +0 for finite y>0 and not an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusZero(self.precision)), ), ) # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = Statement( special_case_results, # fallback default cases Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac)) return result
def generate_bench(self, processor, test_num=1000, unroll_factor=10): """ generate performance bench for self.op_class """ initial_inputs = [ Constant(random.uniform(inf(self.init_interval), sup(self.init_interval)), precision=precision) for i, precision in enumerate(self.input_precisions) ] var_inputs = [ Variable("var_%d" % i, precision=FormatAttributeWrapper(precision, ["volatile"]), var_type=Variable.Local) for i, precision in enumerate(self.input_precisions) ] printf_timing_op = FunctionOperator( "printf", arg_map={ 0: "\"%s[%s] %%lld elts computed "\ "in %%lld cycles =>\\n %%.3f CPE \\n\"" % ( self.bench_name, self.output_precision.get_display_format() ), 1: FO_Arg(0), 2: FO_Arg(1), 3: FO_Arg(2), 4: FO_Arg(3) }, void_function=True ) printf_timing_function = FunctionObject( "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64], ML_Void, printf_timing_op) timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local) void_function_op = FunctionOperator("(void)", arity=1, void_function=True) void_function = FunctionObject("(void)", [self.output_precision], ML_Void, void_function_op) # initialization of operation inputs init_assign = metaop.Statement() for var_input, init_value in zip(var_inputs, initial_inputs): init_assign.push(ReferenceAssign(var_input, init_value)) # test loop loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local) test_num_cst = Constant(test_num / unroll_factor, precision=ML_Int64, tag="test_num") # Goal build a chain of dependant operation to measure # elementary operation latency local_inputs = tuple(var_inputs) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) for i in range(unroll_factor - 1): local_inputs = tuple([local_result] + var_inputs[1:]) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) # renormalisation local_result = self.renorm_function(local_result) # variable assignation to build dependency chain var_assign = Statement() var_assign.push(ReferenceAssign(var_inputs[0], local_result)) final_value = var_inputs[0] # loop increment value loop_increment = 1 test_loop = Loop( ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)), loop_i < test_num_cst, Statement(var_assign, ReferenceAssign(loop_i, loop_i + loop_increment)), ) # bench scheme test_scheme = Statement( ReferenceAssign(timer, processor.get_current_timestamp()), init_assign, test_loop, ReferenceAssign( timer, Subtraction(processor.get_current_timestamp(), timer, precision=ML_Int64)), # prevent intermediary variable simplification void_function(final_value), printf_timing_function( final_value, Constant(test_num, precision=ML_Int64), timer, Division(Conversion(timer, precision=ML_Binary64), Constant(test_num, precision=ML_Binary64), precision=ML_Binary64)) # ,Return(Constant(0, precision = ML_Int32)) ) return test_scheme