def visit_lowcreate(self, lc): cr = self.cur_scope.creates[lc.label] # compute the calling convention c = regmagic.mapcall(cr.args, funcname="create", loc=cr.loc) lc.callconv = c["nargs"] newbl = Block(loc=cr.loc) lbl = cr.label # generate the function pointer if cr.funtype == cr.FUN_ID: if lc.lowfun is not None: funvar = lc.lowfun else: # not yet split funvar = Opaque(cr.fun) else: assert cr.funtype == cr.FUN_VAR n = "C$mtF$%s" % lbl t = "C$mtF$%s" % lbl thetype = CTypeDecl(loc=cr.loc, name=t, ctype=CType(items=Opaque(text="void (*") + CTypeHead() + ")(void)")) self.cur_scope.decls += thetype funvar = CVarDecl(loc=cr.loc, name=n, ctype=CTypeUse(tdecl=thetype)) self.cur_scope.decls += funvar if lc.lowfun is not None: thefun = lc.lowfun else: # not yet split thefun = CVarUse(decl=cr.fun) newbl += CVarSet(loc=cr.loc, decl=funvar, rhs=CCast(ctype=CTypeUse(tdecl=thetype), expr=thefun)) + ";" funvar = CVarUse(decl=funvar) # prepare structure for memory-passed arguments if c["gl_mem_offset"] is not None: maname = "C$mtM$%s" % lbl mat = "C$mtM$%s" % lbl thestruct = Opaque("struct {") for d in c["memlayout"]: thestruct = thestruct + (Opaque(loc=d["loc"]) + d["ctype"] + " " + d["name"] + ";") thestruct = thestruct + "}" thetype = CTypeDecl(loc=cr.loc, name=mat, ctype=thestruct) self.cur_scope.decls += thetype mavar = CVarDecl(loc=cr.loc, name=maname, ctype=CTypeUse(tdecl=thetype)) self.cur_scope.decls += mavar mavar = CVarUse(decl=mavar) lc.mavar = mavar # Now handle body between create..sync newbl += lc.body.accept(self) # On to the real stuff. First try allocate fidvar = cr.cvar_fid usefvar = CVarUse(decl=fidvar) if cr.extras.has_attr("exclusive"): if not self.newisa: die("exclusive create not supported on this target", cr) allocinsn = "allocatex" elif lc.target_next is None: if cr.extras.has_attr("nowait"): warn("this create may fail and no alternative is available", cr) allocinsn = "allocate" else: allocinsn = "allocates" else: if cr.extras.has_attr("forcewait"): allocinsn = "allocates" else: allocinsn = "allocate" if allocinsn == "allocates" and not self.newisa: die("suspending create is not supported on this target", cr) start = CVarUse(decl=cr.cvar_start) limit = CVarUse(decl=cr.cvar_limit) step = CVarUse(decl=cr.cvar_step) block = CVarUse(decl=cr.cvar_block) strategyuse = CVarUse(cr.cvar_strategy) newbl += ( flatten(cr.loc, '__asm__ __volatile__("%s %%2, %%0\\t! MT: CREATE %s"' ' : "=r"(' % (allocinsn, lbl)) + usefvar + ') : "0"(' + CVarUse(decl=cr.cvar_place) + '), "rP"(' + strategyuse + "));" ) if lc.target_next is not None: if self.newisa: failval = 0 else: failval = -1 newbl += ( flatten(cr.loc, " if (__builtin_expect(%d == (" % failval) + usefvar + "), 0)) " + CGoto(target=lc.target_next) ) + ";" newbl += ( flatten(cr.loc, '__asm__ ("setstart %%0, %%2\\t! MT: CREATE %s"' ' : "=r"(' % lbl) + usefvar + ') : "0"(' + usefvar + '), "rP"(' + start + ")); " '__asm__ ("setstep %%0, %%2\\t! MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rP"(' + step + ")); " '__asm__ ("setblock %%0, %%2\\t! MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rP"(' + block + ")); " ) if self.newisa: newbl += ( Opaque('__asm__ ("setlimit %%0, %%2\\t! MT: CREATE %s"' % lbl) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rP"(' + limit + ")); " ) else: # not self.newisa: # FIXME: this "-1" business is an ugly hack : uT-LEON3 was based on a screwed up # simulator source which used inclusive limits. newbl += ( Opaque('__asm__ ("setlimit %%0, %%2\\t! MT: CREATE %s"' % lbl) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rP"((' + limit + ")-1)); " '__asm__ ("setthread %%0, %%2\\t! MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rP"(' + funvar + ")); " ) argregs = set() crc = Scope() aregn = 0 gargs = [] for g in c["gislots"]: name = g["name"] r = regmagic.vname_to_legacy("l%d" % aregn) var = CVarDecl( loc=cr.loc, name="C$aR$%s$%s" % (cr.label, name), ctype=g["ctype"], init=CVarUse(decl=cr.arg_dic[name].cvar), reg=(not self.newisa) and r or None, ) crc.decls += var gargs.append(CVarUse(decl=var)) aregn += 1 argregs.add(r) if c["gl_mem_offset"] is not None: # one extra global var r = regmagic.vname_to_legacy("l%d" % aregn) var = CVarDecl( loc=cr.loc, name="C$aR$%s$%s" % (cr.label, name), ctype=mat + "*", init=Opaque(text="&") + mavar, reg=(not self.newisa) and r or None, ) crc.decls += var gargs.append(CVarUse(decl=var)) aregn += 1 argregs.add(r) collect = Block() sargs = [] for s in c["sislots"]: name = s["name"] r = regmagic.vname_to_legacy("l%d" % aregn) arg_cvar = cr.arg_dic[name].cvar var = CVarDecl( loc=cr.loc, name="C$aR$%s$%s" % (cr.label, name), ctype=s["ctype"], init=CVarUse(decl=arg_cvar), reg=(not self.newisa) and r or None, ) crc.decls += var sargs.append(CVarUse(decl=var)) collect += CVarSet(decl=arg_cvar, rhs=CVarUse(decl=var)) + Opaque(";") aregn += 1 argregs.add(r) if not self.newisa: # build reg arg lists # start with fidvar/shareds first, as these need to reference each other # and gcc has a limit on the number of back references olist = Opaque('"=r"(') + usefvar + ")" ilist = Opaque('"0"(') + usefvar + ")" roff = 1 for v in sargs: olist += Opaque(', "=r"(') + v + ")" ilist += Opaque(', "%d"(' % roff) + v + ")" roff += 1 for v in gargs: ilist += Opaque(', "r"(') + v + ")" crc += ( flatten( cr.loc, ' __asm__ __volatile__("create %%0, %%0\\t! MT: CREATE %s DRAIN(%s)' '\\n\\tmov %%0, %%0\\t! MT: SYNC %s" : ' % (lbl, ",".join(argregs).replace("%", "%%"), lbl), ) + olist + " : " + ilist + ' : "memory");' ) if cr.sync_type != "normal": warn("detached create not supported on this target, using normal sync instead", cr) else: crc += ( flatten(cr.loc_end, ' __asm__ __volatile__("crei %%2, %%0\\t! MT: CREATE %s"' % lbl) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + ")," + ' "r"(' + funvar + ') : "memory");' ) aoff = 0 for a in gargs: crc += ( flatten(cr.loc_end, ' __asm__("putg %%2, %%0, %d\\t! MT: set sarg"' ' : "=r"(' % aoff) + usefvar + ') : "0"(' + usefvar + '), "r"(' + a + "));" ) aoff += 1 aoff = 0 for a in sargs: crc += ( flatten(cr.loc_end, ' __asm__("puts %%2, %%0, %d\\t! MT: set shared"' ' : "=r"(' % aoff) + usefvar + ') : "0"(' + usefvar + '), "r"(' + a + "));" ) aoff += 1 if cr.sync_type == "normal": crc += ( flatten(cr.loc_end, ' __asm__ __volatile__("sync %%0, %%1; ' ' mov %%1, %%1\\t! MT: SYNC %s"' % lbl) + ' : "=r"(' + usefvar + '), "=r"(' + CVarUse(decl=cr.cvar_exitcode) + ') : "0"(' + usefvar + ') : "memory");' ) aoff = 0 for a in sargs: crc += ( flatten(cr.loc_end, ' __asm__("gets %%0, %d, %%1; ' ' mov %%1, %%1\\t! MT get shared"' % aoff) + ' : "=r"(' + usefvar + '), "=r"(' + a + ') : "0"(' + usefvar + "));" ) crc += ( flatten(cr.loc_end, ' __asm__ __volatile__("release %%0\\t! MT: SYNC %s"' % lbl) + ' : : "r"(' + usefvar + "));" ) # Alias the shared arguments back crc += collect newbl += crc return newbl
def visit_lowcreate(self, lc): #print "IN LOWC (v = %x, d = %x, lc = %x)" % (id(self), id(self.__dict__), id(lc)) cr = self.cur_scope.creates[lc.label] # compute the calling convention c = regmagic.mapcall(cr.args, funcname = "create", loc = cr.loc) newbl = Block(loc = cr.loc) lbl = cr.label # generate allocate + test for alternative fidvar = cr.cvar_fid start = CVarUse(decl = cr.cvar_start) limit = CVarUse(decl = cr.cvar_limit) step = CVarUse(decl = cr.cvar_step) block = CVarUse(decl = cr.cvar_block) usefvar = CVarUse(decl = fidvar) if cr.extras.has_attr('exclusive'): allocinsn = 'allocate/x' elif lc.target_next is None: if cr.extras.has_attr('nowait'): warn("this create may fail and no alternative is available", cr) allocinsn = 'allocate' else: allocinsn = 'allocate/s' else: if cr.extras.has_attr('forcewait'): allocinsn = 'allocate/s' else: allocinsn = 'allocate' strategyuse = CVarUse(cr.cvar_strategy) newbl += (flatten(cr.loc, '__asm__ __volatile__("%s %%2, %%1, %%0\\t# MT: CREATE %s"' ' : "=r"(' % (allocinsn, lbl)) + usefvar + ') : "rI"(' + strategyuse + '), "r"(' + CVarUse(decl = cr.cvar_place) + '));') if lc.target_next is not None: newbl += (flatten(cr.loc, ' if (!__builtin_expect(!!(') + usefvar + '), 1)) ' + CGoto(target = lc.target_next)) + ';' # generate the function pointer if cr.funtype == cr.FUN_ID: if lc.lowfun is not None: funvar = lc.lowfun else: # not yet split funvar = Opaque(cr.fun) else: assert cr.funtype == cr.FUN_VAR n = 'C$mtF$%s' % lbl t = 'C$mtF$%s' % lbl thetype = CTypeDecl(loc = cr.loc, name = t, ctype = CType(items = Opaque(text = "void (*") + CTypeHead() + ')(void)')) self.cur_scope.decls += thetype funvar = CVarDecl(loc = cr.loc, name = n, ctype = CTypeUse(tdecl = thetype)) self.cur_scope.decls += funvar if lc.lowfun is not None: thefun = lc.lowfun else: # not yet split thefun = CVarUse(decl = cr.fun) newbl += CVarSet(loc = cr.loc, decl = funvar, rhs = CCast(ctype = CTypeUse(tdecl = thetype), expr = thefun)) + ';' funvar = CVarUse(decl = funvar) # prepare memory structure for memory-passed arguments ### FIXME: move stuff to cur_scope if c['gl_mem_offset'] is not None: maname = "C$mtM$%s" % lbl mat = 'C$mtM$%s' % lbl thestruct = Opaque('struct {') for d in c['memlayout']: thestruct = thestruct + (Opaque(loc = d['loc']) + d['ctype'] + ' ' + d['name'] + ';') thestruct = thestruct + '}' thetype = CTypeDecl(loc = cr.loc, name = mat, ctype = thestruct) self.cur_scope.decls += thetype mavar = CVarDecl(loc = cr.loc, name = maname, ctype = CTypeUse(tdecl = thetype)) self.cur_scope.decls += mavar mavar = CVarUse(decl = mavar) lc.mavar = mavar # generate create newbl += (flatten(cr.loc, '__asm__ ("setstart %%0, %%2\\t# MT: CREATE %s"' ' : "=r"(' % lbl) + usefvar + ') : "0"(' + usefvar + '), "rI"(' + start + ')); ' + '__asm__ ("setlimit %%0, %%2\\t# MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rI"(' + limit + ')); ' + '__asm__ ("setstep %%0, %%2\\t# MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rI"(' + step + ')); ' + '__asm__ ("setblock %%0, %%2\\t# MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rI"(' + block + ')); ' + '__asm__ __volatile__("crei %%0, 0(%%2)\\t# MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '),' + ' "r"(' + funvar + ') : "memory");') lc.callconv = c['nargs'] lc.fidvar = usefvar newbl += lc.body.accept(self) # done with body, now handle sync # first of all, if there weresome memory-passed arguments, # we need to push the argument register to the child family. # A memory barrier is required because the remote thread(s) may # access the memory as soon as putg completes. if c['gl_mem_offset'] is not None: newbl += (flatten(cr.loc_end, ' __asm__ ("wmb; putg %%2, %%0, %d\\t#MT: set offset for memargs"' % c['gl_mem_offset']) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '),' + ' "r"(&' + mavar + '));') # now, on to the sync. if cr.sync_type == 'normal': # normal, synchronized create # first wait for child family to terminate. newbl += (flatten(cr.loc_end, '__asm__ __volatile__("sync %%0, %%1; ' ' mov %%1, $31\\t# MT: SYNC %s"' % lbl) + ' : "=r"(' + usefvar + '), "=r"(' + CVarUse(decl = cr.cvar_exitcode) + ') : "0"(' + usefvar + ') : "memory");') # then pull shared arguments back. for name, arg in c['nargs'].iteritems(): crarg = cr.arg_dic[name] if not crarg.seen_get: # geta() is not used, so no need to retrieve continue if arg['mode'] == 'reg' and arg['cat'] == 'sh': if arg['species'] == 'f': insn1 = 'fgets' insn2 = 'fmov' rspec = 'f' else: insn1 = 'gets' insn2 = 'mov' rspec = 'r' regnr = arg['regnr'] argvar = crarg.cvar # FIXME: perform "mov" after all "get" have been issued! newbl += (flatten(cr.loc_end, ' __asm__ (' '"%(insn1)s %%0, %(regnr)d, %%1; ' ' %(insn2)s %%1, %%1' '\\t# MT: get shared"' % locals()) + ' : "=r"(' + usefvar + '), "=%(rspec)s"(' % locals() + CVarUse(decl = argvar) + ') : "0"(' + usefvar + '));') if cr.sync_type != 'spawn': # for normal sync and detach, release resources. # spawn will sync+release later. newbl += (flatten(cr.loc_end, ' __asm__ __volatile__("release %%0\\t#MT: SYNC %s"' % lbl) + ' : : "r"(' + usefvar + '));') return newbl
def visit_lowcreate(self, lc): #print "IN LOWC (v = %x, d = %x, lc = %x)" % (id(self), id(self.__dict__), id(lc)) cr = self.cur_scope.creates[lc.label] # compute the calling convention c = regmagic.mapcall(cr.args, funcname = "create", loc = cr.loc) newbl = Block(loc = cr.loc) lbl = cr.label # generate allocate + test for alternative fidvar = cr.cvar_fid has_globals = False nr_globals = c['nrargregs']['gli'] if nr_globals > 0: has_globals = True cr.cvar_rb = CVarDecl(loc = cr.loc, name = 'C$Fb$%s' % cr.label, ctype = 'long') self.cur_scope.decls += cr.cvar_rb gblvar = CVarUse(decl = cr.cvar_rb) lc.gblvar = gblvar start = CVarUse(decl = cr.cvar_start) limit = CVarUse(decl = cr.cvar_limit) step = CVarUse(decl = cr.cvar_step) block = CVarUse(decl = cr.cvar_block) usefvar = CVarUse(decl = fidvar) newbl += (flatten(cr.loc, '__asm__ __volatile__("f_alloc %%0\\t! MT: CREATE %s"' ' : "=r"(' % lbl) + usefvar + '));') if lc.target_next is not None: newbl += (flatten(cr.loc, ' if (!__builtin_expect(!!(') + usefvar + '), 1)) ' + CGoto(target = lc.target_next)) + ';' if has_globals: mask = 0 if nr_globals > 8: mask = 4 # binary 100 elif nr_globals > 4: mask = 6 # binary 110 else: mask = 7 # binary 111 newbl += (flatten(cr.loc, '__asm__ __volatile__("r_allocsrb %%1, %%0\\t! MT: CREATE %s"' ' : "=r"(' % lbl) + gblvar + ') : "rI"(%d));' % mask) if lc.target_next is not None: newbl += (flatten(cr.loc, ' if (!__builtin_expect(!!(') + gblvar + '), 1)) {' + '__asm__ __volatile__("f_fence %%0, 31\\t! MT: ABORT1 CREATE %s" : : "r"(' % lbl + usefvar + '));' + CGoto(target = lc.target_next) + ';};') newbl += (flatten(cr.loc, '__asm__ ("f_mapg %%0, %%2\\t! MT: CREATE %s" : "=r"(' % lbl) + usefvar + ') : "r"(' + usefvar + '), "r"(' + gblvar + '));') # generate the function pointer if cr.funtype == cr.FUN_ID: if lc.lowfun is not None: funvar = lc.lowfun else: # not yet split funvar = Opaque(cr.fun) else: assert cr.funtype == cr.FUN_VAR n = 'C$mtF$%s' % lbl t = 'C$mtF$%s' % lbl thetype = CTypeDecl(loc = cr.loc, name = t, ctype = CType(items = Opaque(text = "void (*") + CTypeHead() + ')(void)')) self.cur_scope.decls += thetype funvar = CVarDecl(loc = cr.loc, name = n, ctype = CTypeUse(tdecl = thetype)) self.cur_scope.decls += funvar if lc.lowfun is not None: thefun = lc.lowfun else: # not yet split thefun = CVarUse(decl = cr.fun) newbl += CVarSet(loc = cr.loc, decl = funvar, rhs = CCast(ctype = CTypeUse(tdecl = thetype), expr = thefun)) + ';' funvar = CVarUse(decl = funvar) # prepare the thread group tgname = "C$htg$%s" % lbl tgdecl = CVarDecl(loc = cr.loc, name = tgname, ctype = 'long') tgvar = CVarUse(decl = tgdecl) self.cur_scope.decls += tgdecl newbl += (flatten(cr.loc, ' if (!__builtin_expect(!!(') + block + '), 1)) ' + block + ' = -1;') newbl += (flatten(cr.loc, '__asm__ __volatile__ ("t_allochtg %%1, %%0, %%0\\t! MT: CREATE %s FUN %%2 ' % lbl) + funvar + '": "=&r"(' + tgvar + ') : "r"(' + block + '), ' + '"r"(' + funvar + '));') if lc.target_next is not None: newbl += (flatten(cr.loc, ' if (!__builtin_expect(!!(') + tgvar + '), 1)) {' + '__asm__ __volatile__("f_fence %%0, 31\\t! MT: ABORT2 CREATE %s" : : "r"(' % lbl + usefvar + '));' + CGoto(target = lc.target_next) + ';};') newbl += (flatten(cr.loc, '__asm__ ("f_maphtg %%0, %%2\\t! MT: CREATE %s"' % lbl) + ': "=r"(' + usefvar + ') : "r"(' + usefvar + '), "r"(' + tgvar + '));') # prepare memory structure for memory-passed arguments ### FIXME: move stuff to cur_scope if c['gl_mem_offset'] is not None: maname = "C$mtM$%s" % lbl mat = 'C$mtM$%s' % lbl thestruct = Opaque('struct {') for d in c['memlayout']: thestruct = thestruct + (Opaque(loc = d['loc']) + d['ctype'] + ' ' + d['name'] + ';') thestruct = thestruct + '}' thetype = CTypeDecl(loc = cr.loc, name = mat, ctype = thestruct) self.cur_scope.decls += thetype mavar = CVarDecl(loc = cr.loc, name = maname, ctype = CTypeUse(tdecl = thetype)) self.cur_scope.decls += mavar mavar = CVarUse(decl = mavar) lc.mavar = mavar # generate create newbl += (flatten(cr.loc, '__asm__ ("f_set_blocksize %%0, %%2\\t! MT: CREATE %s"' ' : "=r"(' % lbl) + usefvar + ') : "0"(' + usefvar + '), "rI"(' + limit + ')); ' + '__asm__ ("f_set_gridsize %%0, %%2\\t! MT: CREATE %s"' % lbl + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '), "rI"(' + step + ')); ' ) lc.callconv = c['nargs'] lc.fidvar = usefvar newbl += lc.body.accept(self) # done with body, now handle sync # first of all, if there weresome memory-passed arguments, # we need to push the argument register to the child family. # A memory barrier is required because the remote thread(s) may # access the memory as soon as r_write completes. if c['gl_mem_offset'] is not None: newbl += (flatten(cr.loc_end, ' __asm__ ("wmb; r_write %%2, %%3\\t!MT: set offset for memargs in %%0"') + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '),' + ' "r"(' + gblvar + '+%d)' % c['gl_mem_offset'] + ', "r"(&' + mavar + '));') # actually create the family newbl += (flatten(cr.loc_end, '__asm__ __volatile__("f_create %%0, %%2, %%0\\t! MT: CREATE %s"' % lbl) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '),' + ' "r"(' + funvar + ') : "memory");') # now, on to the sync. if cr.sync_type == 'normal': # then wait for child family to terminate. newbl += (flatten(cr.loc_end, '__asm__ __volatile__("f_fence %%0, 31; nop;' ' t_wait\\t! MT: SYNC %s"' % lbl) + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + ') : "memory");') elif cr.sync_type == 'detach': # automatically release resources upon termination newbl += (flatten(cr.loc_end, '__asm__ __volatile__("f_fence %%0, 30; nop;') + ' : "=r"(' + usefvar + ') : "0"(' + usefvar + '));') return newbl