def transform(self): '''The implementation of the abstract transform method for OpenCL''' # read device properties global dev_props # initialize device properties only once if dev_props is None: dev_props = self.getDeviceProps() if self.props is None: self.props = dev_props # read all transformation arguments targs = self.readTransfArgs(self.perf_params, self.transf_args) g.Globals().metadata.update(targs) # perform the transformation of the statement transformed_stmt = self.openclify(self.stmt, targs) return transformed_stmt
def transform(self): '''To unroll-and-jam the enclosed for-loop''' # get rid of compound statement that contains only a single statement while isinstance(self.stmt.stmt, orio.module.loop.ast.CompStmt) and len( self.stmt.stmt.stmts) == 1: self.stmt.stmt = self.stmt.stmt.stmts[0] # extract for-loop structure for_loop_info = self.flib.extractForLoopInfo(self.stmt) index_id, lbound_exp, ubound_exp, stride_exp, loop_body = for_loop_info index_decl = ast.VarDecl('int', [index_id.name]) # when ufactor = 1, no transformation will be applied if self.ufactor == 1: orig_loop = self.flib.createForLoop(index_id, lbound_exp, ubound_exp, stride_exp, loop_body) if self.parallelize: inames = self.flib.getLoopIndexNames(orig_loop) inames_str = ','.join(inames) if inames: omp_pragma = orio.module.loop.ast.Pragma( 'omp parallel for private(%s)' % inames_str) else: omp_pragma = orio.module.loop.ast.Pragma( 'omp parallel for') return ast.CompStmt([index_decl, omp_pragma, orig_loop]) else: return ast.CompStmt([index_decl, orig_loop]) # start generating the orio.main.unrolled loop # compute lower bound --> new_LB = LB new_lbound_exp = lbound_exp.replicate() # compute upper bound --> new_UB = UB-ST*(UF-1) it = orio.module.loop.ast.BinOpExp( stride_exp.replicate(), orio.module.loop.ast.NumLitExp(self.ufactor - 1, orio.module.loop.ast.NumLitExp.INT), orio.module.loop.ast.BinOpExp.MUL) new_ubound_exp = orio.module.loop.ast.BinOpExp( ubound_exp.replicate(), it, orio.module.loop.ast.BinOpExp.SUB) new_ubound_exp = self.cfolder.fold(new_ubound_exp) # compute stride --> new_ST = UF*ST it = orio.module.loop.ast.NumLitExp(self.ufactor, orio.module.loop.ast.NumLitExp.INT) new_stride_exp = orio.module.loop.ast.BinOpExp( it, stride_exp.replicate(), orio.module.loop.ast.BinOpExp.MUL) new_stride_exp = self.cfolder.fold(new_stride_exp) s = loop_body.replicate() #obtain info about whether or not to introduce new variables self.__computeNewVarsIntro(s) # compute unrolled statements unrolled_stmt_seqs = [] for i in range(0, self.ufactor): s = loop_body.replicate() it = orio.module.loop.ast.NumLitExp( i, orio.module.loop.ast.NumLitExp.INT) increment_exp = orio.module.loop.ast.BinOpExp( it, stride_exp.replicate(), orio.module.loop.ast.BinOpExp.MUL) increment_exp = self.cfolder.fold(increment_exp) ns = self.__addIdentWithExp(s, index_id.name, increment_exp) ns = self.cfolder.fold(ns) if isinstance(ns, orio.module.loop.ast.CompStmt): unrolled_stmt_seqs.append(ns.stmts) else: unrolled_stmt_seqs.append([ns]) # compute the unrolled loop body by jamming/fusing the unrolled statements if self.do_jamming: unrolled_loop_body = self.__jamStmts(unrolled_stmt_seqs) else: unrolled_stmts = reduce(lambda x, y: x + y, unrolled_stmt_seqs, []) unrolled_loop_body = orio.module.loop.ast.CompStmt(unrolled_stmts) # generate the orio.main.unrolled loop lbound_name = 'orio_lbound' + str(g.Globals().getcounter()) lbound_name_exp = orio.module.loop.ast.IdentExp(lbound_name) lbound_init = orio.module.loop.ast.VarDeclInit('int', lbound_name_exp, new_lbound_exp) loop = self.flib.createForLoop(index_id, new_lbound_exp, new_ubound_exp, new_stride_exp, unrolled_loop_body) # generate the cleanup-loop lower-bound expression # if self.parallelize or self.language == 'fortran': t = orio.module.loop.ast.BinOpExp( orio.module.loop.ast.ParenthExp(ubound_exp.replicate()), orio.module.loop.ast.ParenthExp(lbound_exp.replicate()), orio.module.loop.ast.BinOpExp.SUB) t = orio.module.loop.ast.BinOpExp( t, orio.module.loop.ast.NumLitExp(1, orio.module.loop.ast.NumLitExp.INT), orio.module.loop.ast.BinOpExp.ADD) t = orio.module.loop.ast.BinOpExp( orio.module.loop.ast.ParenthExp(t), orio.module.loop.ast.NumLitExp(self.ufactor, orio.module.loop.ast.NumLitExp.INT), orio.module.loop.ast.BinOpExp.MOD) t = orio.module.loop.ast.BinOpExp( orio.module.loop.ast.ParenthExp(ubound_exp.replicate()), orio.module.loop.ast.ParenthExp(t), orio.module.loop.ast.BinOpExp.SUB) cleanup_lbound_exp = orio.module.loop.ast.BinOpExp( orio.module.loop.ast.ParenthExp(t), orio.module.loop.ast.NumLitExp(1, orio.module.loop.ast.NumLitExp.INT), orio.module.loop.ast.BinOpExp.ADD) #t = orio.module.loop.ast.BinOpExp(orio.module.loop.ast.ParenthExp(ubound_exp.replicate()), # orio.module.loop.ast.NumLitExp(self.ufactor, # orio.module.loop.ast.NumLitExp.INT), # orio.module.loop.ast.BinOpExp.MOD) #cleanup_lbound_exp = orio.module.loop.ast.BinOpExp( # orio.module.loop.ast.ParenthExp(ubound_exp.replicate()), # orio.module.loop.ast.ParenthExp(t), # orio.module.loop.ast.BinOpExp.SUB) cleanup_lbound_exp = self.cfolder.fold(cleanup_lbound_exp) #else: #cleanup_lbound_exp = None # the above if else conditions are removed to make CUDA submodule works, which needs a lower bound. # Not sure why there is an if else condition in the first place. # generate the clean-up loop cleanup_lbound_name = 'orio_lbound' + str(g.Globals().getcounter()) cleanup_lbound_name_exp = orio.module.loop.ast.IdentExp( cleanup_lbound_name) cleanup_lbound_init = orio.module.loop.ast.VarDeclInit( 'int', cleanup_lbound_name_exp, cleanup_lbound_exp) cleanup_loop = self.flib.createForLoop(index_id, cleanup_lbound_exp, ubound_exp, stride_exp, loop_body) # generate the transformed statement if self.parallelize: inames = self.flib.getLoopIndexNames(loop) inames_str = ','.join(inames) if inames: omp_pragma = orio.module.loop.ast.Pragma( 'omp parallel for private(%s)' % inames_str) else: omp_pragma = orio.module.loop.ast.Pragma('omp parallel for') stmts = [index_decl, omp_pragma, loop, cleanup_loop] else: stmts = [index_decl, loop, cleanup_loop] transformed_stmt = orio.module.loop.ast.CompStmt(stmts) # return the transformed statement return transformed_stmt
def generate(self, tnode, indent=' ', extra_indent=' '): '''To generate code that corresponds to the given AST''' s = '' if isinstance(tnode, ast.NumLitExp): s += str(tnode.val) elif isinstance(tnode, ast.StringLitExp): s += str(tnode.val) elif isinstance(tnode, ast.IdentExp): s += str(tnode.name) elif isinstance(tnode, ast.ArrayRefExp): s += self.generate(tnode.exp, indent, extra_indent) s += '[' + self.generate(tnode.sub_exp, indent, extra_indent) + ']' elif isinstance(tnode, ast.FunCallExp): s += self.generate(tnode.exp, indent, extra_indent) + '(' s += ','.join( [self.generate(x, indent, extra_indent) for x in tnode.args]) s += ')' elif isinstance(tnode, ast.UnaryExp): s = self.generate(tnode.exp, indent, extra_indent) if tnode.op_type == tnode.PLUS: s = '+' + s elif tnode.op_type == tnode.MINUS: s = '-' + s elif tnode.op_type == tnode.LNOT: s = '!' + s elif tnode.op_type == tnode.PRE_INC: s = ' ++' + s elif tnode.op_type == tnode.PRE_DEC: s = ' --' + s elif tnode.op_type == tnode.POST_INC: s = s + '++ ' elif tnode.op_type == tnode.POST_DEC: s = s + '-- ' elif tnode.op_type == tnode.DEREF: s = '*' + s elif tnode.op_type == tnode.ADDRESSOF: s = '&' + s else: g.err( 'orio.module.loop.codegen internal error: unknown unary operator type: %s' % tnode.op_type) elif isinstance(tnode, ast.BinOpExp): s += self.generate(tnode.lhs, indent, extra_indent) if tnode.op_type == tnode.MUL: s += ' * ' elif tnode.op_type == tnode.DIV: s += ' / ' elif tnode.op_type == tnode.MOD: s += ' % ' elif tnode.op_type == tnode.ADD: s += ' + ' elif tnode.op_type == tnode.SUB: s += ' - ' elif tnode.op_type == tnode.LT: s += ' < ' elif tnode.op_type == tnode.GT: s += ' > ' elif tnode.op_type == tnode.LE: s += ' <= ' elif tnode.op_type == tnode.GE: s += ' >= ' elif tnode.op_type == tnode.EQ: s += ' == ' elif tnode.op_type == tnode.NE: s += ' != ' elif tnode.op_type == tnode.LOR: s += ' || ' elif tnode.op_type == tnode.LAND: s += ' && ' elif tnode.op_type == tnode.COMMA: s += ', ' elif tnode.op_type == tnode.EQ_ASGN: #print "(((((( Binop: tnode.lhs.meta=%s, tnode.rhs.meta=%s ))))) " \ # % (str(tnode.lhs.meta),str(tnode.rhs.meta)) s += ' = ' else: g.err( 'orio.module.loop.codegen internal error: unknown binary operator type: %s' % tnode.op_type) s += self.generate(tnode.rhs, indent, extra_indent) elif isinstance(tnode, ast.ParenthExp): s += '(' + self.generate(tnode.exp, indent, extra_indent) + ')' elif isinstance(tnode, ast.Comment): s += indent if tnode.text: s += '/*' + tnode.text + '*/' s += '\n' elif isinstance(tnode, ast.ExpStmt): if tnode.getLabel(): s += tnode.getLabel() + ':' s += indent if tnode.exp: s += self.generate(tnode.exp, indent, extra_indent) s += ';\n' elif isinstance(tnode, ast.GotoStmt): if tnode.getLabel(): s += tnode.getLabel() + ':' s += indent if tnode.target: s += 'goto ' + tnode.target + ';\n' elif isinstance(tnode, ast.CompStmt): try: tmp = tnode.meta.get('id') fake_loop = False #if tmp and (not tmp in self.ids): if tmp and g.Globals().marker_loops: #self.ids.append(tmp) fake_loop = True #s += tmp + ': \n' fake_scope_loop = 'for (int %s=0; %s < 1; %s++)' % ( tmp, tmp, tmp) s += indent + fake_scope_loop old_indent = indent indent += extra_indent s += indent + '{\n' self.alldecls = set([]) for stmt in tnode.stmts: g.debug('generating code for stmt type: %s' % stmt.__class__.__name__, obj=self, level=7) s += self.generate(stmt, indent + extra_indent, extra_indent) g.debug('code so far:' + s, obj=self, level=7) s += indent + '}\n' if fake_loop: indent = old_indent except Exception as e: g.err( 'orio.module.loop.codegen:%s: encountered an error in C code generation for CompStmt: %s %s' % (tnode.line_no, e.__class__, e)) elif isinstance(tnode, ast.IfStmt): try: if tnode.getLabel(): s += tnode.getLabel() + ':' s += indent + 'if (' + self.generate(tnode.test, indent, extra_indent) + ') ' if isinstance(tnode.true_stmt, ast.CompStmt): tstmt_s = self.generate(tnode.true_stmt, indent, extra_indent) s += tstmt_s[tstmt_s.index('{'):] if tnode.false_stmt: s = s[:-1] + ' else ' else: s += '\n' s += self.generate(tnode.true_stmt, indent + extra_indent, extra_indent) if tnode.false_stmt: s += indent + 'else ' if tnode.false_stmt: if isinstance(tnode.false_stmt, ast.CompStmt): tstmt_s = self.generate(tnode.false_stmt, indent, extra_indent) s += tstmt_s[tstmt_s.index('{'):] else: s += '\n' s += self.generate(tnode.false_stmt, indent + extra_indent, extra_indent) except Exception as e: g.err( 'orio.module.loop.codegen:%s: encountered an error in C code generation for IfStmt: %s %s ' % (tnode.line_no, e.__class__, e)) elif isinstance(tnode, ast.ForStmt): try: tmp = tnode.meta.get('id') fake_loop = False parent_with_id = False if tnode.parent: if isinstance(tnode.parent, ast.CompStmt) or isinstance( tnode.parent, ast.ForStmt): if tnode.parent.meta.get('id'): parent_with_id = True if not parent_with_id and tmp and g.Globals( ).marker_loops: # and tmp not in self.ids: #self.ids.append(tmp) fake_loop = True #s += tmp + ': \n' fake_scope_loop = 'for (int %s=0; %s < 1; %s++)' % ( tmp, tmp, tmp) s += indent + fake_scope_loop + ' {\n' old_indent = indent indent += extra_indent local_decl = True # In some cases, we wish loop index variables to be accessible after the # corresponding loop. For example, the remainder loop generated by register tiling reuses the # index variable from the preceding loop, hence, it is declared before the actual loop, # so that it can be accessed later. if tnode.init and tnode.meta.get('declare_vars_outside'): s += indent + 'int %s;\n' % ', '.join( tnode.meta['declare_vars_outside']) local_decl = False s += indent + 'for (' if tnode.init: if isinstance(tnode.init, ast.BinOpExp) and local_decl: #if tnode.init.lhs.name.startswith('_orio_'): # Orio-generated variable s += 'int ' s += self.generate(tnode.init, indent, extra_indent) s += '; ' if tnode.test: s += self.generate(tnode.test, indent, extra_indent) s += '; ' if tnode.iter: s += self.generate(tnode.iter, indent, extra_indent) s += ') ' if isinstance(tnode.stmt, ast.CompStmt): stmt_s = self.generate(tnode.stmt, indent, extra_indent) s += stmt_s[stmt_s.index('{'):] self.alldecls = set([]) else: s += '\n' s += self.generate(tnode.stmt, indent + extra_indent, extra_indent) if fake_loop and tmp: s += indent + '} // ' + fake_scope_loop + '\n' indent = old_indent except Exception as e: g.err( 'orio.module.loop.codegen:%s: encountered an error in C code generation: %s %s' % (tnode.line_no, e.__class__, e)) elif isinstance(tnode, ast.TransformStmt): g.err( 'orio.module.loop.codegen internal error: a transformation statement is never generated as an output' ) elif isinstance(tnode, ast.VarDecl): qual = '' if tnode.qualifier.strip(): qual = str(tnode.qualifier) + ' ' sv = indent + qual + str(tnode.type_name) + ' ' sv += ', '.join(tnode.var_names) sv += ';\n' if not sv in self.alldecls: s += sv self.alldecls.add(sv) elif isinstance(tnode, ast.VarDeclInit): qual = '' if tnode.qualifier.strip(): qual = str(tnode.qualifier) + ' ' s += indent + qual + str(tnode.type_name) + ' ' s += self.generate(tnode.var_name, indent, extra_indent) s += '=' + self.generate(tnode.init_exp, indent, extra_indent) s += ';' elif isinstance(tnode, ast.Pragma): s += '#pragma ' + str(tnode.pstring) + '\n' elif isinstance(tnode, ast.Container): s += self.generate(tnode.ast, indent, extra_indent) elif isinstance(tnode, ast.DeclStmt): for d in tnode.vars(): s += self.generate(d, indent, '') else: g.err( 'orio.module.loop.codegen internal error: unrecognized type of AST: %s\n%s' % (tnode.__class__.__name__, str(tnode))) return s