def inspect_it(self,sentence,kernel): # print "Inside inspect_it()",sentence,kernel """The Urutu method translates to CUDA kernel here.""" phrase = sentence.split('\t') if phrase.count('#') > 0: return tab = phrase.count('') if tab < self.tabs and self.device_scope == False: for j in range(self.tabs - tab): kernel += "}\n" self.tabs = phrase.count('') sh = shlex.shlex(phrase[-1]) i = sh.get_token() if i == '@' or i == 'def' or i == '' or i == '#' or i == '//' or i == '"""': return kernel stmt = [] while i is not sh.eof: stmt.append(i) i = sh.get_token() if stmt.count('and') > 0: stmt[stmt.index('and')] = " && " if self.keys.count('tx') > 0 and self.var_nam.count('tx') == 0: kernel, self.threads_dec[0] = threads.tx(self.threads_dec[0], kernel) self.var_nam.append("tx") self.type_vars.append("int") if self.keys.count('ty') > 0 and self.var_nam.count('ty') == 0: kernel, self.threads_dec[1] = threads.ty(self.threads_dec[1], kernel) self.var_nam.append("ty") self.type_vars.append("int") if self.keys.count('tz') > 0 and self.var_nam.count('tz') == 0: kernel, self.threads_dec[2] = threads.tz(self.threads_dec[2], kernel) self.var_nam.append("tz") self.type_vars.append("int") if self.keys.count('bx') > 0 and self.var_nam.count('bx') == 0: kernel, self.blocks_dec[0] = blocks.bx(self.blocks_dec[0], kernel) self.var_nam.append("bx") self.type_vars.append("int") if self.keys.count('by') > 0 and self.var_nam.count('by') == 0: kernel, self.blocks_dec[1] = blocks.by(self.blocks_dec[1], kernel) self.var_nam.append("by") self.type_vars.append("int") if self.keys.count('bz') > 0 and self.var_nam.count('bz') == 0: kernel, self.blocks_dec[2] = blocks.bz(self.blocks_dec[2], kernel) self.var_nam.append("bz") self.type_vars.append("int") if stmt.count('Tx') == 1 or stmt.count('Ty') == 1 or stmt.count('Tz') == 1: self.var_nam, self.var_val, self.threads, ker, self.type_vars= threads.threads_decl(stmt, self.var_nam, self.var_val, self.threads, self.type_vars) kernel += ker if stmt.count('=') > 0: id_eq = stmt.index('=') type_, ptr_, var_, val_ = self.checktype(stmt[:id_eq],stmt[id_eq+1:]) kernel += type_+ptr_+var_ + "= " + val_ +";\n" return kernel if stmt.count('Bx') == 1 or stmt.count('By') == 1 or stmt.count('Bz') == 1: self.var_nam, self.var_val, self.blocks, ker, self.type_vars = blocks.blocks_decl(stmt, self.var_nam, self.var_val, self.blocks, self.type_vars) kernel += ker if stmt.count('=') > 0: id_eq = stmt.index("=") type_, ptr_, var_, val_ = self.checktype(stmt[:id_eq],stmt[id_eq+1:]) self.type_vars.append(type_) self.var_nam.append(ptr_) kernel += type_ + ptr_ + var_ + "= " + val_ +";\n" return kernel for j in self.device_func_name: if stmt.count(j) > 0: kernel += self.device_create_func(self.device_func_name.index(j),j, stmt) kernel = self.device_body_buff + "}\n" + kernel self.device_body_buff = "" return kernel if stmt[0] == '__global' or stmt[0] == '__shared' or stmt[0] == '__register' or stmt[0] == '__constant' : self.decarrays(stmt) return kernel if stmt.count('for') > 0: kernel += self._for(stmt, kernel) return kernel if stmt.count('if') > 0: return kernel + grammar.keyword(stmt, kernel) if stmt.count('else') > 0: kernel = kernel + "else{\n" return kernel if stmt.count('cpu') < 1: for j in stmt: if self.moved.count(j) < 1 and self.arg_nam.count(j) > 0: self.movedata(j) if stmt.count('cpu') > 0: self.kernel_final.append(kernel+"}") cpu_empty = {'id':0, 'ln':0, 'return':[], 'src':"", 'args': [], 'htod': [], 'dtoh': []} self.cpu_id = self.cpu_id + 1 cpu_empty['id'] = self.cpu_id cpu_empty['ln'] = self.sentences.index(sentence) eq_id = stmt.index('=') if eq_id > 0: cpu_empty['return'].append(stmt[0]) # if self.moved.count() # If a data array is not sent to gpu, we use it in cpu function func_name = stmt.index('cpu') + 2 for i in stmt[func_name+2:-1]: if i is not ',': if self.arguments.count(i) > 0: cpu_empty['args'].append(i) if self.moved.count(i) > 0: cpu_empty['dtoh'].append(i) else: print "Pass the array accessed by the CPU function to Urutu Kernel" stmt.remove(stmt[func_name-2]) stmt.remove(stmt[func_name-2]) cpu_empty['src'] = stmt cpu_empty['isnext'] = False if len(self.cpu_info) == 0: self.run_gpu(self.kernel_final[-1]) self.gpu_id+=1 kernel = "__global__ void "+ self.global_func + "_" + str(self.gpu_id) + "(" + self.kernel_args + "){\n" self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.remove_workitems() self.kernel = kernel elif self.cpu_info[-1]['ln'] + 1 != cpu_empty['ln']: self.cpu_info[-1]['isnext'] = False print "Running on CPU" self.run_cpu() self.kernel_final.append(kernel+"}") self.gpu_id+=1 kernel = "__global__ void "+ self.global_func + "_" + str(self.gpu_id) + "(" + self.kernel_args + "){\n" self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.remove_workitems() self.kernel = kernel elif self.cpu_info[-1]['ln'] + 1 == cpu_empty['ln']: self.cpu_info[-1]['isnext'] = True self.cpu_info.append(cpu_empty) return kernel if stmt.count('Urmod') > 0: self.num_mod = self.num_mod + 1 self.modules.append(stmt[2:]) self.kernel_final.append(kernel+"}") kernel = "__global__ void "+ self.global_func + "_" + str(len(self.modules)+1) + "(" + self.kernel_args + "){\n" self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.kernel = kernel return kernel else: # print "Entering Checkvars",phrase if(len(self.cpu_info)) > 0 and self.return_kernel == False: self.run_cpu() return self.checkvars(stmt,phrase[-1],kernel)
def inspect_it(self, sentence, kernel): # print "Inside inspect_it()",sentence,kernel """The Urutu method translates to CUDA kernel here.""" phrase = sentence.split("\t") if phrase.count("#") > 0: return tab = phrase.count("") if tab < self.tabs and self.device_scope == False: for j in range(self.tabs - tab): kernel += "}\n" self.tabs = phrase.count("") sh = shlex.shlex(phrase[-1]) i = sh.get_token() if i == "@" or i == "def" or i == "" or i == "#" or i == "//" or i == '"""': return kernel stmt = [] while i is not sh.eof: stmt.append(i) i = sh.get_token() if stmt.count("and") > 0: stmt[stmt.index("and")] = " && " if self.keys.count("tx") > 0 and self.var_nam.count("tx") == 0: kernel, self.threads_dec[0] = threads.tx(self.threads_dec[0], kernel) self.var_nam.append("tx") self.type_vars.append("int") if self.keys.count("ty") > 0 and self.var_nam.count("ty") == 0: kernel, self.threads_dec[1] = threads.ty(self.threads_dec[1], kernel) self.var_nam.append("ty") self.type_vars.append("int") if self.keys.count("tz") > 0 and self.var_nam.count("tz") == 0: kernel, self.threads_dec[2] = threads.tz(self.threads_dec[2], kernel) self.var_nam.append("tz") self.type_vars.append("int") if self.keys.count("bx") > 0 and self.var_nam.count("bx") == 0: kernel, self.blocks_dec[0] = blocks.bx(self.blocks_dec[0], kernel) self.var_nam.append("bx") self.type_vars.append("int") if self.keys.count("by") > 0 and self.var_nam.count("by") == 0: kernel, self.blocks_dec[1] = blocks.by(self.blocks_dec[1], kernel) self.var_nam.append("by") self.type_vars.append("int") if self.keys.count("bz") > 0 and self.var_nam.count("bz") == 0: kernel, self.blocks_dec[2] = blocks.bz(self.blocks_dec[2], kernel) self.var_nam.append("bz") self.type_vars.append("int") if stmt.count("Tx") == 1 or stmt.count("Ty") == 1 or stmt.count("Tz") == 1: self.var_nam, self.var_val, self.threads, ker, self.type_vars = threads.threads_decl( stmt, self.var_nam, self.var_val, self.threads, self.type_vars ) kernel += ker if stmt.count("=") > 0: id_eq = stmt.index("=") type_, ptr_, var_, val_ = self.checktype(stmt[:id_eq], stmt[id_eq + 1 :]) kernel += type_ + ptr_ + var_ + "= " + val_ + ";\n" return kernel if stmt.count("Bx") == 1 or stmt.count("By") == 1 or stmt.count("Bz") == 1: self.var_nam, self.var_val, self.blocks, ker, self.type_vars = blocks.blocks_decl( stmt, self.var_nam, self.var_val, self.blocks, self.type_vars ) kernel += ker if stmt.count("=") > 0: id_eq = stmt.index("=") type_, ptr_, var_, val_ = self.checktype(stmt[:id_eq], stmt[id_eq + 1 :]) self.type_vars.append(type_) self.var_nam.append(ptr_) kernel += type_ + ptr_ + var_ + "= " + val_ + ";\n" return kernel for j in self.device_func_name: if stmt.count(j) > 0: kernel += self.device_create_func(self.device_func_name.index(j), j, stmt) kernel = self.device_body_buff + "}\n" + kernel self.device_body_buff = "" return kernel if stmt[0] == "__global" or stmt[0] == "__shared" or stmt[0] == "__register" or stmt[0] == "__constant": self.decarrays(stmt) return kernel if stmt.count("for") > 0: kernel += self._for(stmt, kernel) return kernel if stmt.count("if") > 0: return kernel + grammar.keyword(stmt, kernel) if stmt.count("else") > 0: kernel = kernel + "else{\n" return kernel if stmt.count("cpu") < 1: for j in stmt: if self.moved.count(j) < 1 and self.arg_nam.count(j) > 0: self.movedata(j) if stmt.count("cpu") > 0: self.kernel_final.append(kernel + "}") cpu_empty = {"id": 0, "ln": 0, "return": [], "src": "", "args": [], "htod": [], "dtoh": []} self.cpu_id = self.cpu_id + 1 cpu_empty["id"] = self.cpu_id cpu_empty["ln"] = self.sentences.index(sentence) eq_id = stmt.index("=") if eq_id > 0: cpu_empty["return"].append(stmt[0]) # if self.moved.count() # If a data array is not sent to gpu, we use it in cpu function func_name = stmt.index("cpu") + 2 for i in stmt[func_name + 2 : -1]: if i is not ",": if self.arguments.count(i) > 0: cpu_empty["args"].append(i) if self.moved.count(i) > 0: cpu_empty["dtoh"].append(i) else: print "Pass the array accessed by the CPU function to Urutu Kernel" stmt.remove(stmt[func_name - 2]) stmt.remove(stmt[func_name - 2]) cpu_empty["src"] = stmt cpu_empty["isnext"] = False if len(self.cpu_info) == 0: self.run_gpu(self.kernel_final[-1]) self.gpu_id += 1 kernel = ( "__global__ void " + self.global_func + "_" + str(self.gpu_id) + "(" + self.kernel_args + "){\n" ) self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.remove_workitems() self.kernel = kernel elif self.cpu_info[-1]["ln"] + 1 != cpu_empty["ln"]: self.cpu_info[-1]["isnext"] = False print "Running on CPU" self.run_cpu() self.kernel_final.append(kernel + "}") self.gpu_id += 1 kernel = ( "__global__ void " + self.global_func + "_" + str(self.gpu_id) + "(" + self.kernel_args + "){\n" ) self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.remove_workitems() self.kernel = kernel elif self.cpu_info[-1]["ln"] + 1 == cpu_empty["ln"]: self.cpu_info[-1]["isnext"] = True self.cpu_info.append(cpu_empty) return kernel if stmt.count("Urmod") > 0: self.num_mod = self.num_mod + 1 self.modules.append(stmt[2:]) self.kernel_final.append(kernel + "}") kernel = ( "__global__ void " + self.global_func + "_" + str(len(self.modules) + 1) + "(" + self.kernel_args + "){\n" ) self.ismap.append(True) self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.kernel = kernel return kernel else: # print "Entering Checkvars",phrase if (len(self.cpu_info)) > 0 and self.return_kernel == False: self.run_cpu() return self.checkvars(stmt, phrase[-1], kernel)
def inspect_it(self,sentence,kernel): # print "Inside inspect_it()",sentence,kernel phrase = sentence.split('\t') if phrase.count('#') > 0: return tab = phrase.count('') ## if tab > self.tabs and tab != len(phrase): # for j in range(tab - self.tabs): # kernel = kernel + "{\n" if tab < self.tabs: for j in range(self.tabs - tab): kernel = kernel + "}\n" self.tabs = phrase.count('') sh = shlex.shlex(phrase[-1]) i = sh.get_token() if i == '@' or i == 'def' or i == '' or i == '#' or i == '//' or i == '"""': return kernel stmt = [] while i is not sh.eof: stmt.append(i) i = sh.get_token() # print stmt for j in self.device_func_name: if stmt.count(j) > 0: kernel += self.device_create_func(self.device_func_name.index(j),j, stmt) kernel = self.device_body_buff + "}\n" + kernel self.device_body_buff = "" return kernel if self.keys.count('tx') > 0 or self.keys.count("__shared"): kernel, self.threads_dec[0] = threads.tx(self.threads_dec[0], kernel) if self.keys.count('ty') > 0: kernel, self.threads_dec[1] = threads.ty(self.threads_dec[1], kernel) if self.keys.count('tz') > 0: kernel, self.threads_dec[2] = threads.tz(self.threads_dec[2], kernel) if self.keys.count('bx') > 0: kernel, self.blocks_dec[0] = blocks.bx(self.blocks_dec[0], kernel) if self.keys.count('bx') > 0: kernel, self.blocks_dec[1] = blocks.by(self.blocks_dec[1], kernel) if self.keys.count('bz') > 0: kernel, self.blocks_dec[2] = blocks.bz(self.blocks_dec[2], kernel) if stmt.count('Tx') == 1 or stmt.count('Ty') == 1 or stmt.count('Tz') == 1: threads.threads_decl(stmt, self.var_nam, self.var_val, self.threads, self.type_vars) return kernel if stmt.count('Bx') == 1 or stmt.count('By') == 1 or stmt.count('Bz') == 1: blocks.blocks_decl(stmt, self.var_nam, self.var_val, self.blocks) return kernel if stmt[0] == '__global' or stmt[0] == '__shared' or stmt[0] == '__register' or stmt[0] == '__constant' : self.decarrays(stmt) return kernel if stmt.count('for') > 0: kernel += self._for(stmt, kernel) return kernel if stmt.count('if') > 0: return kernel + grammar.keyword(stmt, kernel) if stmt.count('else') > 0: kernel = kernel + "else{\n " self.tabs+=1 return kernel if stmt.count('Urmod') > 0: self.modules.append(stmt[2:]) self.kernel_final.append(kernel+"}") kernel = "__kernel void" + self.global_func + "_" + str(len(self.modules)+1) + "(" + self.kernel_args + "){\n" kernel = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + kernel self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.kernel = kernel return kernel else: # print "Entering Checkvars" return self.checkvars(stmt,phrase[-1],kernel)
def inspect_it(self, sentence, kernel): # print "Inside inspect_it()",sentence,kernel phrase = sentence.split('\t') if phrase.count('#') > 0: return tab = phrase.count('') ## if tab > self.tabs and tab != len(phrase): # for j in range(tab - self.tabs): # kernel = kernel + "{\n" if tab < self.tabs: for j in range(self.tabs - tab): kernel = kernel + "}\n" self.tabs = phrase.count('') sh = shlex.shlex(phrase[-1]) i = sh.get_token() if i == '@' or i == 'def' or i == '' or i == '#' or i == '//' or i == '"""': return kernel stmt = [] while i is not sh.eof: stmt.append(i) i = sh.get_token() # print stmt for j in self.device_func_name: if stmt.count(j) > 0: kernel += self.device_create_func( self.device_func_name.index(j), j, stmt) kernel = self.device_body_buff + "}\n" + kernel self.device_body_buff = "" return kernel if self.keys.count('tx') > 0 or self.keys.count("__shared"): kernel, self.threads_dec[0] = threads.tx(self.threads_dec[0], kernel) if self.keys.count('ty') > 0: kernel, self.threads_dec[1] = threads.ty(self.threads_dec[1], kernel) if self.keys.count('tz') > 0: kernel, self.threads_dec[2] = threads.tz(self.threads_dec[2], kernel) if self.keys.count('bx') > 0: kernel, self.blocks_dec[0] = blocks.bx(self.blocks_dec[0], kernel) if self.keys.count('bx') > 0: kernel, self.blocks_dec[1] = blocks.by(self.blocks_dec[1], kernel) if self.keys.count('bz') > 0: kernel, self.blocks_dec[2] = blocks.bz(self.blocks_dec[2], kernel) if stmt.count('Tx') == 1 or stmt.count('Ty') == 1 or stmt.count( 'Tz') == 1: threads.threads_decl(stmt, self.var_nam, self.var_val, self.threads, self.type_vars) return kernel if stmt.count('Bx') == 1 or stmt.count('By') == 1 or stmt.count( 'Bz') == 1: blocks.blocks_decl(stmt, self.var_nam, self.var_val, self.blocks) return kernel if stmt[0] == '__global' or stmt[0] == '__shared' or stmt[ 0] == '__register' or stmt[0] == '__constant': self.decarrays(stmt) return kernel if stmt.count('for') > 0: kernel += self._for(stmt, kernel) return kernel if stmt.count('if') > 0: return kernel + grammar.keyword(stmt, kernel) if stmt.count('else') > 0: kernel = kernel + "else{\n " self.tabs += 1 return kernel if stmt.count('Urmod') > 0: self.modules.append(stmt[2:]) self.kernel_final.append(kernel + "}") kernel = "__kernel void" + self.global_func + "_" + str( len(self.modules) + 1) + "(" + self.kernel_args + "){\n" kernel = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + kernel self.threads_dec = [False, False, False] self.blocks_dec = [False, False, False] self.kernel = kernel return kernel else: # print "Entering Checkvars" return self.checkvars(stmt, phrase[-1], kernel)