def _run_one_hot_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the one hot op.""" add_op_model = _new_regression_op_model(model, kop) # operand_bit_counts[0] * operand_bit_count[0] # The highest priority gate has ~1 gate (actually, a wire) # The lowest priority gate has O(n) gates (reduction tree of higher # priority inputs) # sum(1->n) is quadratic expr = _new_expression(add_op_model) _set_multiply_expression(expr) _set_operand_bit_count_expression_factor(expr.lhs_expression, 0) _set_operand_bit_count_expression_factor(expr.rhs_expression, 0) for bit_count in _bitwidth_sweep(0): # lsb / msb priority or the same logic but mirror image. model.data_points.append( _build_data_point_bit_types(op, kop, bit_count + 1, [bit_count], stub, attributes=[('lsb_prio', 'true')])) logging.info('# one_hot: %s, %s input bits --> %s', op, str(bit_count), str(model.data_points[-1].delay)) # Validate model delay_model.DelayModel(model)
def _run_one_hot_select_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the one hot select op.""" add_op_model = _new_regression_op_model(model, kop) # operand_bit_count(0) * result_bit_count expr = _new_expression(add_op_model) _set_multiply_expression(expr) _set_operand_bit_count_expression_factor(expr.lhs_expression, 0) _set_result_bit_count_expression_factor(expr.rhs_expression) # Enumerate cases and bitwidth. for num_cases in _operand_count_sweep(): for bit_count in _bitwidth_sweep(0): select_bits = num_cases model.data_points.append( _build_data_point_bit_types(op, kop, bit_count, [select_bits] + ([bit_count] * num_cases), stub)) logging.info('# one_hot_select_op: %s, %s bits, %s cases --> %s', op, str(bit_count), str(num_cases), str(model.data_points[-1].delay)) # Validate model delay_model.DelayModel(model)
def _run_linear_bin_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the given linear bin_op and adds it to the model.""" _new_regression_op_model(model, kop, result_bit_count=True) model.data_points.extend(_run_nary_op(op, kop, stub, num_inputs=2)) # Validate model delay_model.DelayModel(model)
def test_regression_estimator_cross_validation_passes(self): def gen_operation(result_bit_count, operand_bit_count): return 'op: "kFoo" bit_count: %d operands { } operands { bit_count: %d }' % ( result_bit_count, operand_bit_count) def gen_data_point(result_bit_count, operand_bit_count, delay): return 'data_points{{ operation {{ {} }} delay: {} delay_offset: 0}}'.format( gen_operation(result_bit_count, operand_bit_count), delay) data_points_str = [ gen_data_point(1, 2, 1), gen_data_point(2, 2, 2), gen_data_point(4, 1, 4), gen_data_point(5, 111, 5), gen_data_point(7, 13, 7), gen_data_point(8, 2, 8), gen_data_point(10, 12, 10), gen_data_point(15, 6, 15), gen_data_point(20, 40, 20), gen_data_point(30, 15, 30), gen_data_point(31, 2, 31), gen_data_point(35, 2, 35), gen_data_point(40, 30, 40), gen_data_point(45, 9, 45), gen_data_point(50, 4, 50), gen_data_point(55, 400, 55), gen_data_point(70, 10, 70), gen_data_point(100, 50, 100), gen_data_point(125, 15, 125), gen_data_point(150, 100, 150), ] proto_text = """ op_models { op: "kFoo" estimator { regression { expressions { factor { source: RESULT_BIT_COUNT } } kfold_validator { max_data_point_error: 0.15 max_fold_geomean_error: 0.075 } } } } """ proto_text = proto_text + '\n'.join(data_points_str) delay_model.DelayModel( text_format.Parse(proto_text, delay_model_pb2.DelayModel()))
def test_regression_estimator_cross_validation_insufficient_data_for_folds( self): def gen_operation(result_bit_count, operand_bit_count): return 'op: "kFoo" bit_count: %d operands { } operands { bit_count: %d }' % ( result_bit_count, operand_bit_count) def gen_data_point(result_bit_count, operand_bit_count, delay): return 'data_points{{ operation {{ {} }} delay: {} delay_offset: 0}}'.format( gen_operation(result_bit_count, operand_bit_count), delay) data_points_str = [ gen_data_point(1, 2, 100), gen_data_point(4, 1, 125), gen_data_point(4, 6, 150), gen_data_point(7, 13, 175), gen_data_point(10, 12, 200), gen_data_point(30, 15, 400), ] proto_text = """ op_models { op: "kFoo" estimator { regression { expressions { factor { source: OPERAND_BIT_COUNT operand_number: 1 } } expressions { factor { source: RESULT_BIT_COUNT } } kfold_validator { num_cross_validation_folds: 8 max_data_point_error: 99.0 max_fold_geomean_error: 99.0 } } } } """ proto_text = proto_text + '\n'.join(data_points_str) with self.assertRaises(delay_model.Error) as e: delay_model.DelayModel( text_format.Parse(proto_text, delay_model_pb2.DelayModel())) self.assertEqualIgnoringWhitespaceAndFloats( 'kFoo: Too few data points to cross ' 'validate: 6 data points, 8 folds', str(e.exception))
def _run_single_bit_result_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub, num_inputs: int) -> None: """Runs characterization for an op that always produce a single-bit output.""" _new_regression_op_model(model, kop, operand_bit_counts=[0]) for input_bits in _bitwidth_sweep(0): logging.info('# reduction_op: %s, %s bits', op, str(input_bits)) model.data_points.append( _build_data_point_bit_types(op, kop, 1, [input_bits] * num_inputs, stub)) # Validate model delay_model.DelayModel(model)
def _run_unary_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub, signed=False) -> None: """Runs characterization for the given unary_op and adds it to the model.""" add_op_model = _new_regression_op_model(model, kop) expr = _new_expression(add_op_model) constant = -1 if signed else 0 _set_result_bit_count_expression_factor(expr, add_constant=constant) model.data_points.extend(_run_nary_op(op, kop, stub, num_inputs=1)) # Validate model delay_model.DelayModel(model)
def _run_encode_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the encode op.""" _new_regression_op_model(model, kop, operand_bit_counts=[0]) # input_bits should be at least 2 bits. for input_bits in _bitwidth_sweep(0): node_bits = bits.min_bit_count_unsigned(input_bits - 1) model.data_points.append( _build_data_point_bit_types(op, kop, node_bits, [input_bits], stub)) logging.info('# encode_op: %s, %s input bits --> %s', op, str(input_bits), str(model.data_points[-1].delay)) # Validate model delay_model.DelayModel(model)
def main(argv): if len(argv) > 2: raise app.UsageError('Too many command-line arguments.') with open(argv[1], 'rb') as f: contents = f.read() dm = delay_model.DelayModel( text_format.Parse(contents, delay_model_pb2.DelayModel())) for op in dm.ops(): op_model = dm.op_model(op) maybe_plot_op_model(op_model.estimator) for specialization_kind, estimator in op_model.specializations.items(): maybe_plot_op_model( estimator, delay_model_pb2.SpecializationKind.Name(specialization_kind))
def _run_nary_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the given nary_op and adds it to the model.""" add_op_model = _new_regression_op_model(model, kop) expr = _new_expression(add_op_model) _set_multiply_expression(expr) _set_result_bit_count_expression_factor(expr.lhs_expression) # Note - for most operand counts, this works much better as # operand_count-1. However, for low operand count (e.g. 2,4) # we can fit multiple ops inside a LUT, so the number of operands # has little effect until higher operand counts. So, weirdly, # we get lower error overall by just using operand_count... _set_operand_count_expression_factor(expr.rhs_expression) for input_count in _operand_count_sweep(): model.data_points.extend( _run_nary_op(op, kop, stub, num_inputs=input_count)) # Validate model delay_model.DelayModel(model)
def _run_dynamic_bit_slice_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the dynamic bit slice op.""" add_op_model = _new_regression_op_model(model, kop) # ~= result_bit_count * operand_bit_count[1] (start bits) # Hard to model this well - in theory, this should be something # more like result_bit_count * 2 ^ start bits. However, # as we add more result bits, more work gets eliminated / reduced # (iff 2 ^ start bits + result width > input bits). mul_expr = _new_expression(add_op_model) _set_multiply_expression(mul_expr) _set_result_bit_count_expression_factor(mul_expr.lhs_expression) _set_operand_bit_count_expression_factor(mul_expr.rhs_expression, 1) # input_bits should be at least 2 bits idx = 0 for input_bits in _bitwidth_sweep(2): for start_bits in range( 3, bits.min_bit_count_unsigned(input_bits - 1) + 1): for node_bits in range(1, input_bits, BITWIDTH_STRIDE_DEGREES[2]): model.data_points.append( _build_data_point_bit_types(op, kop, node_bits, [input_bits, start_bits], stub, attributes=[('width', str(node_bits))])) logging.info( '# idx: %s, dynamic_bit_slice_op: %s, %s start bits, ' '%s input bits, %s width --> %s', str(idx), op, str(start_bits), str(input_bits), str(node_bits), str(model.data_points[-1].delay)) idx = idx + 1 # Validate model delay_model.DelayModel(model)
def _run_quadratic_bin_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub, signed: bool = False) -> None: """Runs characterization for the quadratic bin_op and adds it to the model.""" add_op_model = _new_regression_op_model(model, kop) # result_bit_count * result_bit_count # This is because the sign bit is special. expr = _new_expression(add_op_model) _set_multiply_expression(expr) constant = -1 if signed else 0 _set_result_bit_count_expression_factor(expr.lhs_expression, add_constant=constant) _set_result_bit_count_expression_factor(expr.rhs_expression, add_constant=constant) model.data_points.extend(_run_nary_op(op, kop, stub, num_inputs=2)) # Validate model delay_model.DelayModel(model)
def main(argv): if len(argv) > 2: raise app.UsageError('Too many command-line arguments.') with open(argv[1], 'rb') as f: contents = f.read() dm = delay_model.DelayModel( text_format.Parse(contents, delay_model_pb2.DelayModel())) env = jinja2.Environment(undefined=jinja2.StrictUndefined) tmpl_text = runfiles.get_contents_as_text( 'xls/delay_model/generate_delay_lookup.tmpl') template = env.from_string(tmpl_text) rendered = template.render( delay_model=dm, name=FLAGS.model_name, precedence=FLAGS.precedence, camel_case_name=''.join( s.capitalize() for s in FLAGS.model_name.split('_'))) print('// DO NOT EDIT: this file is AUTOMATICALLY GENERATED and should not ' 'be changed.') print(rendered)
def _run_select_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the select op.""" add_op_model = _new_regression_op_model(model, kop) # operand_count * result_bit_count # Alternatively, try pow(2, operand_bit_count(0)) * result_bit_count expr = _new_expression(add_op_model) _set_multiply_expression(expr) _set_operand_count_expression_factor(expr.lhs_expression, add_constant=-2) _set_result_bit_count_expression_factor(expr.rhs_expression) # Enumerate cases and bitwidth. # Note: at 7 and 8 cases, there is a weird dip in LUTs at around 40 bits wide # Why? No idea... for num_cases in _operand_count_sweep(): for bit_count in _bitwidth_sweep(0): # Handle differently if num_cases is a power of 2. select_bits = bits.min_bit_count_unsigned(num_cases - 1) if math.pow(2, select_bits) == num_cases: model.data_points.append( _build_data_point_bit_types( op, kop, bit_count, [select_bits] + ([bit_count] * num_cases), stub)) else: model.data_points.append( _build_data_point_bit_types( op, kop, bit_count, [select_bits] + ([bit_count] * (num_cases + 1)), stub)) logging.info('# select_op: %s, %s bits, %s cases --> %s', op, str(bit_count), str(num_cases), str(model.data_points[-1].delay)) # Validate model delay_model.DelayModel(model)
def test_regression_estimator_cross_validation_data_point_exceeds_geomean_error( self): def gen_operation(result_bit_count, operand_bit_count): return 'op: "kFoo" bit_count: %d operands { } operands { bit_count: %d }' % ( result_bit_count, operand_bit_count) def gen_data_point(result_bit_count, operand_bit_count, delay): return 'data_points{{ operation {{ {} }} delay: {} delay_offset: 0}}'.format( gen_operation(result_bit_count, operand_bit_count), delay) data_points_str = [ gen_data_point(1, 2, 1), gen_data_point(2, 2, 2), gen_data_point(4, 1, 4), gen_data_point(5, 111, 5), gen_data_point(7, 13, 7), gen_data_point(8, 2, 8), gen_data_point(10, 12, 10), gen_data_point(15, 6, 15), gen_data_point(20, 40, 20), gen_data_point(30, 15, 30), gen_data_point(31, 2, 31), gen_data_point(35, 2, 35), gen_data_point(40, 30, 40), gen_data_point(45, 9, 45), gen_data_point(50, 4, 50), gen_data_point(55, 400, 55), gen_data_point(70, 10, 70), gen_data_point(100, 50, 100), gen_data_point(125, 15, 125), gen_data_point(150, 100, 150), ] proto_text = """ op_models { op: "kFoo" estimator { regression { expressions { factor { source: OPERAND_BIT_COUNT operand_number: 1 } } kfold_validator { max_fold_geomean_error: 0.1 } } } } """ proto_text = proto_text + '\n'.join(data_points_str) # Build regression model with operand_bit_count (uncorrelated with delay) # as only factor. with self.assertRaises(delay_model.Error) as e: delay_model.DelayModel( text_format.Parse(proto_text, delay_model_pb2.DelayModel())) self.assertEqualIgnoringWhitespaceAndFloats( 'kFoo: Regression model failed ' 'k-fold cross validation for test set with geomean error 0.0 > max 0.0', str(e.exception)) self.assertIn('> max 0.1', str(e.exception))
def test_regression_estimator_cross_validation_data_point_exceeds_max_error( self): def gen_operation(result_bit_count, operand_bit_count): return 'op: "kFoo" bit_count: %d operands { } operands { bit_count: %d }' % ( result_bit_count, operand_bit_count) def gen_data_point(result_bit_count, operand_bit_count, delay): return 'data_points{{ operation {{ {} }} delay: {} delay_offset: 0}}'.format( gen_operation(result_bit_count, operand_bit_count), delay) data_points_str = [ gen_data_point(1, 2, 1), gen_data_point(2, 2, 2), gen_data_point(4, 1, 4), gen_data_point(5, 111, 5), gen_data_point(7, 13, 7), gen_data_point(8, 2, 8), gen_data_point(10, 12, 10), gen_data_point(15, 6, 15), gen_data_point(20, 40, 20), # Outlier gen_data_point(30, 15, 50), # gen_data_point(31, 2, 31), gen_data_point(35, 2, 35), gen_data_point(40, 30, 40), gen_data_point(45, 9, 45), gen_data_point(50, 4, 50), gen_data_point(55, 400, 55), gen_data_point(70, 10, 70), gen_data_point(100, 50, 100), gen_data_point(125, 15, 125), gen_data_point(150, 100, 150), ] proto_text = """ op_models { op: "kFoo" estimator { regression { expressions { factor { source: RESULT_BIT_COUNT } } kfold_validator { max_data_point_error: 0.3 } } } } """ proto_text = proto_text + '\n'.join(data_points_str) with self.assertRaises(delay_model.Error) as e: delay_model.DelayModel( text_format.Parse(proto_text, delay_model_pb2.DelayModel())) self.assertEqualIgnoringWhitespaceAndFloats( 'kFoo: Regression model failed k-fold ' 'cross validation for data point (30, 50) with absolute error 0.0' ' > max 0.0', str(e.exception)) self.assertIn('> max 0.3', str(e.exception))
def run_characterization( stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization via 'stub', DelayModel to stdout as prototext.""" model = delay_model_pb2.DelayModel() # Bin ops _run_linear_bin_op_and_add('add', 'kAdd', model, stub) _run_linear_bin_op_and_add('sub', 'kSub', model, stub) # Observed shift data is noisy. _run_linear_bin_op_and_add('shll', 'kShll', model, stub) _run_linear_bin_op_and_add('shrl', 'kShrl', model, stub) _run_linear_bin_op_and_add('shra', 'kShra', model, stub) _run_quadratic_bin_op_and_add('sdiv', 'kSDiv', model, stub, signed=True) _run_quadratic_bin_op_and_add('smod', 'kSMod', model, stub, signed=True) _run_quadratic_bin_op_and_add('udiv', 'kUDiv', model, stub) _run_quadratic_bin_op_and_add('umod', 'kUMod', model, stub) # Unary ops _run_unary_op_and_add('neg', 'kNeg', model, stub, signed=True) _run_unary_op_and_add('not', 'kNot', model, stub) # Nary ops _run_nary_op_and_add('and', 'kAnd', model, stub) _run_nary_op_and_add('nand', 'kNand', model, stub) _run_nary_op_and_add('nor', 'kNor', model, stub) _run_nary_op_and_add('or', 'kOr', model, stub) _run_nary_op_and_add('xor', 'kXor', model, stub) # Reduction ops _run_reduction_op_and_add('and_reduce', 'kAndReduce', model, stub) _run_reduction_op_and_add('or_reduce', 'kOrReduce', model, stub) _run_reduction_op_and_add('xor_reduce', 'kXorReduce', model, stub) # Comparison ops _run_comparison_op_and_add('eq', 'kEq', model, stub) _run_comparison_op_and_add('ne', 'kNe', model, stub) # Note: Could optimize for sign - accuracy gains from # sign have been marginal so far, though. These ops # also cost less than smul / sdiv anyway. _run_comparison_op_and_add('sge', 'kSGe', model, stub) _run_comparison_op_and_add('sgt', 'kSGt', model, stub) _run_comparison_op_and_add('sle', 'kSLe', model, stub) _run_comparison_op_and_add('slt', 'kSLt', model, stub) _run_comparison_op_and_add('uge', 'kUGe', model, stub) _run_comparison_op_and_add('ugt', 'kUGt', model, stub) _run_comparison_op_and_add('ule', 'kULe', model, stub) _run_comparison_op_and_add('ult', 'kULt', model, stub) # Select ops # For functions only called for 1 op, could just encode # op and kOp into function. However, perfer consistency # and readability of passing them in as args. # Note: Select op observed data is really weird, see _run_select_op_and_add _run_select_op_and_add('sel', 'kSel', model, stub) _run_one_hot_select_op_and_add('one_hot_sel', 'kOneHotSel', model, stub) # Encode ops _run_encode_op_and_add('encode', 'kEncode', model, stub) _run_decode_op_and_add('decode', 'kDecode', model, stub) # Dynamic bit slice op _run_dynamic_bit_slice_op_and_add('dynamic_bit_slice', 'kDynamicBitSlice', model, stub) # One hot op _run_one_hot_op_and_add('one_hot', 'kOneHot', model, stub) # Mul ops # Note: Modeling smul w/ sign bit as with sdiv decreases accuracy. _run_mul_op_and_add('smul', 'kSMul', model, stub) _run_mul_op_and_add('umul', 'kUMul', model, stub) # Array ops _run_array_index_op_and_add('array_index', 'kArrayIndex', model, stub) _run_array_update_op_and_add('array_update', 'kArrayUpdate', model, stub) # Add free ops. for free_op in FREE_OPS: entry = model.op_models.add(op=free_op) entry.estimator.fixed = 0 # Final validation delay_model.DelayModel(model) print('# proto-file: xls/delay_model/delay_model.proto') print('# proto-message: xls.delay_model.DelayModel') print(model)
def _run_mul_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for a mul op.""" def _get_big_op_bitcount_expr(): """Returns larger bit count of the two operands.""" expr = delay_model_pb2.DelayExpression() _set_max_expression(expr) _set_operand_bit_count_expression_factor(expr.lhs_expression, 0) _set_operand_bit_count_expression_factor(expr.rhs_expression, 1) return expr def _get_small_op_bitcount_expr(): """Returns smaller bit count of the two operands.""" expr = delay_model_pb2.DelayExpression() _set_min_expression(expr) _set_operand_bit_count_expression_factor(expr.lhs_expression, 0) _set_operand_bit_count_expression_factor(expr.rhs_expression, 1) return expr def _get_zero_expr(): """Returns a constant 0 expression.""" expr = delay_model_pb2.DelayExpression() _set_constant_expression(expr, 0) return expr def _get_meaningful_width_bits_expr(): """Returns the maximum number of meaningful result bits.""" expr = delay_model_pb2.DelayExpression() _set_add_expression(expr) _set_operand_bit_count_expression_factor(expr.lhs_expression, 0) _set_operand_bit_count_expression_factor(expr.rhs_expression, 1) return expr def _get_bounded_width_offset_domain(begin_expr, end_expr): """Gives the bounded offset of result_bit_count. Gives the bounded offset of result_bit_count into the range[begin_expr, end_expr] e.g.: for begin_expr = 2, end_expr = 4, we map: 1 --> 0 2 --> 0 3 --> 1 4 --> 2 5 --> 2 6 --> 2 etc. expr = min(end_expr, max(begin_expr, result_bit_count)) - begin_expr Args: begin_expr: begin of range. end_expr: end of range. Returns: Bounded offset of result_bit_count. """ expr = delay_model_pb2.DelayExpression() _set_sub_expression(expr) expr.rhs_expression.CopyFrom(begin_expr) min_expr = expr.lhs_expression _set_min_expression(min_expr) min_expr.lhs_expression.CopyFrom(end_expr) max_expr = min_expr.rhs_expression _set_max_expression(max_expr) max_expr.lhs_expression.CopyFrom(begin_expr) _set_result_bit_count_expression_factor(max_expr.rhs_expression) return expr def _get_rectangle_area(width_expr, height_expr): """Return the area of a rectangle of dimensions width_expr * height_expr.""" expr = delay_model_pb2.DelayExpression() _set_multiply_expression(expr) expr.lhs_expression.CopyFrom(width_expr) expr.rhs_expression.CopyFrom(height_expr) return expr def _get_triangle_area(width_expr): """Return the area of a isosceles right triangle. Width_expr expression: _get_triangle_area(width_expr, width_expr) / 2 Args: width_expr: Width expression. Returns: The area of a isosceles right triangle. """ expr = delay_model_pb2.DelayExpression() _set_divide_expression(expr) sqr_expression = _get_rectangle_area(width_expr, width_expr) expr.lhs_expression.CopyFrom(sqr_expression) _set_constant_expression(expr.rhs_expression, 2) return expr def _get_partial_triangle_area(width_expr, max_width_expr): """Return the area of a partial isosceles right triangle. | /| -| | / | | | / | | | / | | |/ | | | | | heigh = maximum_width /| | | / |area | | / | | | / | | | /____|_____| -| | |_____| width |___________| maximum_width expr = rectangle_area(width, maximum_width) - triangle_area(width) Args: width_expr: Width expression. max_width_expr: Max width expression. Returns: Area of partial isosceles right triangle. """ expr = delay_model_pb2.DelayExpression() _set_sub_expression(expr) rectangle_expr = _get_rectangle_area(width_expr, max_width_expr) expr.lhs_expression.CopyFrom(rectangle_expr) triangle_expr = _get_triangle_area(width_expr) expr.rhs_expression.CopyFrom(triangle_expr) return expr # Compute for multiply can be divided into 3 regions. # Regions: # A B C # |---------|----- - # /| | / | # / | | / | # / | | / | height = min(op[0], op[1]) # / | | / | # / | |/ | # -----|---------| - # |______________| # max(op[0],op[1]) # |____| # min(op[0], op[1]) # |____________________| # max(op[0],op[1]) + min(op[0], op[1]) # *Math works out the same whether op[0] or [op1] is larger. # expr = area(region C) + (area(region B) + area(region A)) # Top level add add_op_model = _new_regression_op_model(model, kop) outer_add_expr = _new_expression(add_op_model) _set_add_expression(outer_add_expr) # precompute min/max(op[0], op[1]) big_op_bitcount_expr = _get_big_op_bitcount_expr() small_op_bitcount_expr = _get_small_op_bitcount_expr() # Region C region_c_domain_expr = _get_bounded_width_offset_domain( _get_zero_expr(), small_op_bitcount_expr) region_c_area_expr = _get_triangle_area(region_c_domain_expr) outer_add_expr.lhs_expression.CopyFrom(region_c_area_expr) # Inner add inner_add_expr = outer_add_expr.rhs_expression _set_add_expression(inner_add_expr) # Region B region_b_domain_expr = _get_bounded_width_offset_domain( small_op_bitcount_expr, big_op_bitcount_expr) region_b_area_expr = _get_rectangle_area(region_b_domain_expr, small_op_bitcount_expr) inner_add_expr.lhs_expression.CopyFrom(region_b_area_expr) # Region A region_a_domain_expr = _get_bounded_width_offset_domain( big_op_bitcount_expr, _get_meaningful_width_bits_expr()) region_a_area_expr = _get_partial_triangle_area(region_a_domain_expr, small_op_bitcount_expr) inner_add_expr.rhs_expression.CopyFrom(region_a_area_expr) # All bit counts should be at least 2 for mplier_count in _bitwidth_sweep(2): for mcand_count in _bitwidth_sweep(2): for node_count in _bitwidth_sweep(2): model.data_points.append( _build_data_point_bit_types(op, kop, node_count, [mplier_count, mcand_count], stub)) logging.info( '# mul: %s, %s * %s, %s node count, result_bits --> %s', op, str(mplier_count), str(mcand_count), str(node_count), str(model.data_points[-1].delay)) # Validate model delay_model.DelayModel(model)
def _run_array_update_op_and_add( op: str, kop: str, model: delay_model_pb2.DelayModel, stub: synthesis_service_pb2_grpc.SynthesisServiceStub) -> None: """Runs characterization for the ArrayUpdate op.""" add_op_model = _new_regression_op_model(model, kop) # Area is a function of #elements*weight + elements*bitwidth*weight. # # This seems to hold across a range of element counts, bitwidth, and number # of dimensions i.e. # # The weight isn't an artifact of where we sampled data - It is actually # ~constant rather than being something like the ratio of #elements to # #bitwidths or similar. def _set_addressable_element_count_expression(elm_expr): _set_divide_expression(elm_expr) _set_operand_bit_count_expression_factor(elm_expr.lhs_expression, 0) _set_operand_bit_count_expression_factor(elm_expr.rhs_expression, 1) elm_expr = _new_expression(add_op_model) _set_addressable_element_count_expression(elm_expr) mul_expr = _new_expression(add_op_model) _set_multiply_expression(mul_expr) _set_addressable_element_count_expression(mul_expr.lhs_expression) _set_operand_bit_count_expression_factor(mul_expr.rhs_expression, 1) for num_dims in range(1, 3): for array_dimension_sizes in _yield_array_dimension_sizes(num_dims): # If single-dimension array, increase number of elements. if num_dims == 1: assert len(array_dimension_sizes) == 1 array_dimension_sizes[0] = array_dimension_sizes[0] * 2 for element_bit_count in _bitwidth_sweep(3): array_and_element_dimensions = [element_bit_count ] + array_dimension_sizes # Format dimension args operand_dimensions = [array_and_element_dimensions] operand_dimensions.append([element_bit_count]) for dim in reversed(array_dimension_sizes): operand_dimensions.append( [bits.min_bit_count_unsigned(dim - 1)]) # Record data point result = _build_data_point(op, kop, array_and_element_dimensions, operand_dimensions, stub) array_operand = result.operation.operands.add() array_operand.bit_count = functools.reduce( operator.mul, array_and_element_dimensions, 1) new_elm_operand = result.operation.operands.add() new_elm_operand.bit_count = element_bit_count model.data_points.append(result) logging.info( '%s: %s --> %s', str(kop), ','.join(str(item) for item in operand_dimensions), str(result.delay)) # Validate model delay_model.DelayModel(model)