def start(self, args): decls = [] body = [] # consts = [] for arg in args: if type(arg) == Declaration: decls.append(arg) elif type(arg) in [AbstractLoop, Assignment]: body.append(arg) else: raise RuntimeError('Unsupported syntax in main program') # Add implicit constants that are created when the bounds and steps # of loop vars are not explicitly stated non_consts = set() for decl in decls: non_consts.add(decl.name) for stmt in body: for loop in get_loops(stmt): for shape in loop.loop_shapes: non_consts.add(shape.loop_var.var) consts_set = set() for stmt in body: for access in get_accesses(stmt): if access.var not in non_consts: consts_set.add(access.var) consts = [Const(name) for name in sorted(list(consts_set))] return Program(decls, body, consts)
def transform(self, pattern, tries=None): dependence_graph, pattern_with_ids = analyze_dependence(pattern) if tries is None: tries = NTries(10) while tries.next(): cloned = pattern_with_ids.clone() is_legal = True for loop in get_loops(cloned): order = randomize_loop_order(loop) if not is_permutable(dependence_graph, loop, order): is_legal = False break reorder(order, loop.loop_shapes) if is_legal: yield cloned
def create_instance_with_fixed_size(pattern, size): def set_exact_loop_bounds(var_map, loop_var, min_val, max_val): lower_bound = f'{loop_var}_greater_eq' var_map.set_min(lower_bound, min_val) var_map.set_max(lower_bound, min_val) upper_bound = f'{loop_var}_less_eq' var_map.set_min(upper_bound, max_val) var_map.set_max(upper_bound, max_val) loops = get_loops(pattern) loop_shapes = gather_loop_shapes(loops) loop_vars = gather_loop_vars(loop_shapes) # instance var_map = VariableMap(default_max=size) for loop_var in set(loop_vars): set_exact_loop_bounds(var_map, loop_var, 0, size-1) instance = create_instance(pattern, var_map) return instance
def try_once(): random_pattern = replace_constant_variables_blindly(pattern, var_map) cvars = get_scalar_cvars(random_pattern) accesses = get_accesses(random_pattern) cloned_var_map = var_map.clone() # Set array sizes in var map for decl in random_pattern.decls: for dimension in range(decl.n_dimensions): size = decl.sizes[dimension] if size is not None: dim_var = dimension_var(decl.name, dimension) # TODO: we don't really need min cloned_var_map.set_min(dim_var, 0) # TODO: maybe allow more room for the max cloned_var_map.set_max(dim_var, size) index_constraints = generate_index_constraints(accesses, cvars, cloned_var_map) loop_shape_constraints = [] for loop in get_loops(random_pattern): loop_shape_constraints += generate_loop_shape_constraints(loop.loop_shapes, cvars, cloned_var_map) bound_constraints = generate_bound_constraints(random_pattern.decls, cvars, cloned_var_map) l.debug('Index constraints:\n' + '\n'.join(map(str, index_constraints))) l.debug('Loop shape constraints:\n' + '\n'.join(map(str, loop_shape_constraints))) l.debug('Bound constraints:\n' + '\n'.join(map(str, bound_constraints))) assert(len(index_constraints) > 0) invert_index_constraints = Not(And(index_constraints)) constraints = [invert_index_constraints] + loop_shape_constraints + bound_constraints solver = Solver() solver.set('timeout', 10000) solver.add(constraints) status = solver.check() if status != unsat: l.debug(f'Constraints are not unsatisfiable ({status}). ' 'May result in index out of bound') l.debug('Constraints:\n' + '\n'.join(map(str, constraints))) if status == sat: l.debug(f'Model:\n{solver.model()}') return None constraints = index_constraints + loop_shape_constraints + bound_constraints solver = Solver() solver.set('timeout', 10000) solver.add(constraints) status = solver.check() if status != sat: l.debug(f'Constraints are not satisfiable ({status}). ' 'May result in no iterations') l.debug('\n'.join(map(str, constraints))) return None bounds = determine_array_access_bounds(random_pattern.decls, accesses, cvars, constraints, cloned_var_map, l) if bounds is None: return None # assign types once we're sure it's a valid program nonlocal types if types is None: types = TypeAssignment() assign_types(random_pattern, types) return Instance(random_pattern, bounds)
def transform(self, pattern): pattern_with_ids = assign_node_ids(pattern) loops = get_loops(pattern_with_ids) loop_vars = [] for loop in loops: loop_id = loop.attributes['node_id'] for shape in loop.loop_shapes: assert(type(shape.loop_var) == Access) loop_vars.append((loop_id, shape.loop_var.var)) # sort by depth def depth_rec(node, id_var_pair, current_depth): loop_id, loop_var = id_var_pair if type(node) == Program: for stmt in node.body: return depth_rec(stmt, id_var_pair, current_depth+1) elif type(node) == AbstractLoop: if node.attributes['node_id'] == loop_id: for i, shape in enumerate(node.loop_shapes): if shape.loop_var.var == loop_var: return (current_depth, i) raise RuntimeError(f'Loop var {loop_var} not found in {node.pprint()}') for stmt in node.body: return depth_rec(stmt, id_var_pair, current_depth+1) else: return (current_depth + 1, 0) def depth(id_var_pair): return depth_rec(pattern_with_ids, id_var_pair, 0) sorted_loop_vars = sorted(loop_vars, key=depth, reverse=True) # TODO: assign unique node ids while True: cloned = pattern_with_ids.clone() for loop_id, loop_var in sorted_loop_vars: factor = random.randint(1, self.max_factor) if factor == 1: continue loops = get_loops(cloned) loop = None for l in loops: if l.attributes['node_id'] == loop_id: loop = l assert(loop is not None) loop_shapes_before = [] loop_shapes_after = [] loop_var_index = None unroll_shape = None remainder_shape = None is_unrollable = True for i, shape in enumerate(loop.loop_shapes): if shape.loop_var.var == loop_var: loop_var_index = i original_step = shape.step.clone() # Build the unroll shape # only support literals for simplicity logger.info('trying') if (type(shape.greater_eq) != Literal or shape.greater_eq.ty != int or type(shape.less_eq) != Literal or shape.less_eq.ty != int or type(shape.step) != Literal or shape.step.ty != int): is_unrollable = False break logger.info('passed') unroll_greater_eq = shape.greater_eq.val unroll_step = shape.step.val * factor unroll_n_iterations = (shape.less_eq.val - shape.greater_eq.val + shape.step.val) // (shape.step.val * factor) unroll_less_eq = unroll_greater_eq + ((unroll_n_iterations - 1) * unroll_step) unroll_shape = LoopShape(shape.loop_var.clone(), Literal(int, unroll_greater_eq), Literal(int, unroll_less_eq), Literal(int, unroll_step)) # Build the remainder shape remainder_greater_eq = unroll_less_eq + unroll_step remainder_less_eq = shape.less_eq.val remainder_step = shape.step.val remainder_shape = LoopShape(shape.loop_var.clone(), Literal(int, remainder_greater_eq), Literal(int, remainder_less_eq), Literal(int, remainder_step)) break else: loop_shapes_before.append(shape) if not is_unrollable: print(f'{loop_var} is not unrollable') continue assert(loop_var_index is not None) assert(unroll_shape is not None) assert(remainder_shape is not None) for shape in loop.loop_shapes[loop_var_index+1:]: loop_shapes_after.append(shape) unrolled_body = [] for f in range(0, factor): unrolled_innermost_body = [] step = loop.loop_shapes[loop_var_index].step assert(type(step) == Literal) assert(step.ty == int) replacer = UnrollReplacer(loop_var, f * step.val) for stmt in loop.body: unrolled_stmt = stmt.clone() unrolled_stmt.replace(replacer) unrolled_innermost_body.append(unrolled_stmt) if len(loop_shapes_after) == 0: unrolled_body += unrolled_innermost_body else: shapes = [shape.clone() for shape in loop_shapes_after] unrolled_body.append(AbstractLoop(shapes, unrolled_innermost_body)) remainder_innermost_body = [stmt.clone() for stmt in loop.body] if len(loop_shapes_after) == 0: remainder_body = remainder_innermost_body else: shapes = [shape.clone() for shape in loop_shapes_after] remainder_body = [AbstractLoop(shapes, remainder_innermost_body)] unrolled_loop = AbstractLoop([unroll_shape], unrolled_body) remainder_loop = AbstractLoop([remainder_shape], remainder_body) # The unroll sequence is the unrolled loop followed by the remainder loop if len(loop_shapes_before) == 0: unroll_sequence = [unrolled_loop, remainder_loop] else: # The surrounding loop needs to preserve the loop_id # since the surrounding loops may be unrolled as well unroll_sequence = [AbstractLoop(loop_shapes_before, [unrolled_loop, remainder_loop], loop_id)] # Replace the original loop with the unroll sequence index = loop.surrounding_loop.find_stmt(loop) loop.surrounding_loop.remove_stmt(loop) loop.surrounding_loop.insert_stmts(index, unroll_sequence) yield cloned