def consumePrintToFile(self, ctxt, numOutVar): # print results to file codeout = ctxt.codegen.finish outFile = Variable.val(ptr(CType.FILE), "outFile", codeout) emit(assign(outFile, fopen("queryresult.csv", "w")), codeout) for id, att in self.algExpr.outRelation.items(): emit(fprintf(outFile, att.name + ", "), codeout) emit(fprintf(outFile, "\\n", []), codeout) loopVar = Variable.val(CType.INT, "pv") with ForLoop(assign(declare(loopVar), intConst(0)), smaller(loopVar, numOutVar), assignAdd(loopVar, intConst(1)), codeout): for id, att in self.algExpr.outRelation.items(): ovar = ctxt.attFile.ocolFile[id] if att.dataType == Type.STRING: offs = ovar.arrayAccess(loopVar) charCol = ctxt.attFile.incolFile[att.id][1] emit(call("stringPrint", [charCol, offs, outFile]), codeout) else: emit( fprintf(outFile, CType.printFormat[ovar.dataType] + " ", [ovar.arrayAccess(loopVar)]), codeout) emit(fprintf(outFile, "\\n", []), codeout)
def htInsertFilter(self, ctxt): with IfClause(ctxt.vars.activeVar, ctxt.codegen): # compute a non-unique hash over join attributes hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen) Hash.attributes(self.algExpr.buildKeyAttributes, hashVar, ctxt) # find bucket bucketVar = Variable.val(CType.INT, "bucket", ctxt.codegen, intConst(0)) payl = self.payload.materialize("payl" + str(self.algExpr.opId), ctxt.codegen, ctxt) bucketFound = Variable.val(CType.INT, "bucketFound", ctxt.codegen, intConst(0)) numLookups = Variable.val(CType.INT, "numLookups", ctxt.codegen, intConst(0)) with WhileLoop(notLogic(bucketFound), ctxt.codegen) as loop: # allocate empty bucket or get tid from bucket emit( assign( bucketVar, call(qlib.Fct.HASH_AGG_BUCKET, [ self.htmem.ht, self.htmem.numEntries, hashVar, numLookups, addressof(payl) ])), ctxt.codegen) # verify grouping attributes from bucket probepayl = Variable.val( self.payload.getType(), "probepayl", ctxt.codegen, member(self.htmem.ht.arrayAccess(bucketVar), "payload")) self.payload.checkEquality(bucketFound, payl, probepayl, ctxt)
def __init__(self, tid, scanType, isTempScan, table, scanRelation, algExpr, ctxt): vars = ctxt.vars self.scanType = scanType codegen = ctxt.codegen self.ctxt = ctxt if scanType == scanType.KERNEL: scanKernel = ctxt.codegen.openKernel( Kernel(ident.scanKernel(algExpr) + str(algExpr.opId))) tid.declareAssign(intConst(0), ctxt.codegen) vars.loopVar = Variable.val(CType.UINT, "loopVar") vars.loopVar.declareAssign( add(mul(blockIdx_x(), blockDim_x()), threadIdx_x()), codegen) vars.stepVar = Variable.val(CType.UINT, "step") vars.stepVar.declareAssign(mul(blockDim_x(), gridDim_x()), codegen) vars.flushVar = Variable.val(CType.UINT, "flushPipeline", codegen, intConst(0)) vars.activeVar = Variable.val(CType.INT, "active", codegen, intConst(0)) commentOperator("scan", ctxt.codegen) self.kernelLoop = WhileLoop(notLogic(vars.flushVar), codegen) emit(assign(vars.scanTid, vars.loopVar), codegen) emit(assign(vars.activeVar, smaller(vars.loopVar, table["size"])), codegen) comment("flush pipeline if no new elements", codegen) emit( assign( vars.flushVar, notLogic(ballotIntr(qlib.Const.ALL_LANES, vars.activeVar))), codegen) # inner scan loop elif scanType == scanType.INNER: self.outerActive = Variable.val(CType.INT, "outerActive" + str(algExpr.opId)) self.outerActive.declareAssign(ctxt.vars.activeVar, ctxt.codegen) self.innerLoop = ForLoop(assign(declare(tid), intConst(0)), smaller(tid, table["size"]), increment(tid), ctxt.codegen) emit(assign(ctxt.vars.activeVar, self.outerActive), ctxt.codegen) # map data columns for (id, a) in scanRelation.items(): if not isTempScan: ctxt.attFile.mapInputAttribute(a, table) else: ctxt.attFile.mapTemptableInputAttribute(a, table) # dematerialize with IfClause(ctxt.vars.activeVar, ctxt.codegen): for id, a in scanRelation.items(): ctxt.attFile.dematerializeAttribute(a, tid)
def consumePrintResultSample(self, ctxt, numOutVar): # print sample of results codegen = ctxt.codegen codeout = codegen.finish loopVar = Variable.val(CType.INT, "pv") printLimit = intConst(10) with ForLoop( assign(declare(loopVar), intConst(0)), andLogic(smaller(loopVar, printLimit), smaller(loopVar, numOutVar)), assignAdd(loopVar, intConst(1)), codeout): for id, att in self.algExpr.outRelation.items(): emit(printf(att.name + ": "), codeout) ovar = ctxt.attFile.ocolFile[id] if att.dataType == Type.STRING: offs = ovar.arrayAccess(loopVar) charCol = ctxt.attFile.incolFile[att.id][1] emit(call("stringPrint", [charCol, offs]), codeout) else: emit( printf(CType.printFormat[ovar.dataType], [ovar.arrayAccess(loopVar)]), codeout) emit(printf(" "), codeout) emit(printf("\\n", []), codeout) with IfClause(larger(numOutVar, printLimit), codeout): emit(printf("[...]\\n"), codeout) emit(printf("\\n"), codeout)
def consume(self, ctxt): ctxt.codegen.currentKernel.annotate("P" + str(self.algExpr.opId)) counters = list() emit(printf("<p" + str(self.algExpr.opId) + ">\\n"), ctxt.codegen.finish) for i in range(0, 33): counters.append( ctxt.codegen.newStatisticsCounter( "its" + str(i) + "active_" + "p" + str(self.algExpr.opId), str(i) + ", ")) emit(printf("</p" + str(self.algExpr.opId) + ">\\n\\n"), ctxt.codegen.finish) numActiveProfile = Variable.val( CType.INT, "numActiveProfile" + "_p" + str(self.algExpr.opId), ctxt.codegen) emit( assign( numActiveProfile, popcount(ballotIntr(qlib.Const.ALL_LANES, ctxt.vars.activeVar))), ctxt.codegen) with IfClause(equals(ctxt.codegen.warplane(), intConst(0)), ctxt.codegen): for i in range(0, 33): with IfClause(equals(numActiveProfile, intConst(i)), ctxt.codegen): emit(atomicAdd(counters[i], intConst(1)), ctxt.codegen) self.parent.consume(ctxt)
def bufDeclareSmem(self, bufferVars): ctxt = self.ctxt codegen = ctxt.codegen self.buf_ix = Variable.val(CType.INT, "bufIdx") self.buf_ix.declare(ctxt.codegen) comment("shared memory variables for divergence buffers", codegen.init()) # initialize shared memory buffers and store in dict by variable name self.buffers = dict() for v in self.bufferVars: buf = Variable.val(v.dataType, ident.divergenceBuffer(v)) buf.declareSharedArray(intConst(KernelCall.defaultBlockSize), ctxt.codegen.init()) self.buffers[v.get()] = buf
def __init__(self, relation, tableName, matType, sizeEstimate, ctxt): vars = ctxt.vars codegen = ctxt.codegen self.relation = relation self.tableName = tableName self.numOut = Variable.val(CType.INT, "nout_" + tableName, codegen.declare) codegen.gpumem.mapForWrite(self.numOut) codegen.gpumem.initVar(self.numOut, intConst(0)) codegen.currentKernel.addVar(self.numOut) wp = Variable.val(CType.INT, "wp", codegen) self.useWarpScan = True if not self.useWarpScan: with IfClause(ctxt.activeVar, codegen): emit(assign(wp, atomicAdd(self.numOut, intConst(1))), codegen) codegen.add(codewrite) else: mask = Variable.val(CType.INT, "writeMask", codegen) numactive = Variable.val(CType.INT, "numProj", codegen) emit( assign( mask, ballotIntr(qlib.Const.ALL_LANES, intConst(vars.activeVar))), codegen) emit(assign(numactive, popcount(mask)), codegen) with IfClause(equals(codegen.warplane(), intConst(0)), codegen): emit(assign(wp, atomicAdd(self.numOut, numactive)), codegen) emit( assign(wp, shuffleIntr(qlib.Const.ALL_LANES, wp, intConst(0))), codegen) emit( assign( wp, add(wp, popcount(andBitwise(mask, codegen.prefixlanes())))), codegen) with IfClause(vars.activeVar, codegen): for id, att in relation.items(): if matType == MaterializationType.RESULT: ctxt.attFile.mapOutputAttribute(att, sizeEstimate) elif matType == MaterializationType.TEMPTABLE: ctxt.attFile.mapTemptableOutputAttribute( att, self.getTable(), sizeEstimate) ctxt.attFile.materializeAttribute(att, wp, matType)
def newStatisticsCounter(self, varname, text): counter = Variable.val(CType.UINT, varname) counter.declareAssign(intConst(0), self.declare) self.gpumem.mapForWrite(counter) self.gpumem.initVar(counter, "0u") self.currentKernel.addVar(counter) emit(printf(text + "%i\\n", [counter]), self.finish) return counter
def stringConstant(self, token): self.constCounter += 1 c = Variable.val(CType.STR_TYPE, "c" + str(self.constCounter)) emit( assign(declare(c), call("stringConstant", ["\"" + token + "\"", len(token)])), self.init()) return c
def warpid(self): try: return self.currentKernel.warpid except AttributeError: self.currentKernel.warpid = Variable.val(CType.UINT, "warpid") emit( assign(declare(self.currentKernel.warpid), div(threadIdx_x(), intConst(32))), self.init()) return self.currentKernel.warpid
def warplane(self): try: return self.currentKernel.warplane except AttributeError: self.currentKernel.warplane = Variable.val(CType.UINT, "warplane") emit( assign(declare(self.currentKernel.warplane), modulo(threadIdx_x(), intConst(32))), self.init()) return self.currentKernel.warplane
def close(self): ctxt = self.ctxt endVar = Variable.val(CType.INT, "matchEnd") offsetVar = Variable.val(CType.INT, "matchOffset") matchStepVar = Variable.val(CType.INT, "matchStep") emit( assign( self.mask, ballotIntr(qlib.Const.ALL_LANES, intConst(ctxt.vars.activeVar))), ctxt.codegen) emit(assign(self.numactive, popcount(self.mask)), ctxt.codegen) # closes main buffer loop self.whileLoop.close() # write remaining active tuples to buffer self.bufferHelper.consumeFlushToBuffer(self.numactive, self.mask)
def htProbeMultiMatchSemiAnti(self, ctxt): self.endVar = Variable.val(CType.INT, "matchEnd" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) self.offsetVar = Variable.val(CType.INT, "matchOffset" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) self.matchStepVar = Variable.val(CType.INT, "matchStep" + str(self.algExpr.opId), ctxt.codegen, intConst(1)) filterMatch = Variable.val(CType.INT, "filterMatch" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(probeActive, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_MULTI, [ self.htmem.ht, self.htmem.numEntries, hashVar, self.offsetVar, self.endVar ])), ctxt.codegen) with WhileLoop(probeActive, ctxt.codegen): payl = Variable.val(self.htmem.payload.dataType, "payl", ctxt.codegen) emit(assign(payl, self.htmem.payload.arrayAccess(self.offsetVar)), ctxt.codegen) self.payload.dematerialize(payl, ctxt) emit(assign(filterMatch, intConst(1)), ctxt.codegen) Hash.checkEquality(filterMatch, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) if self.algExpr.conditions is not None: emit( assignAnd(filterMatch, self.algExpr.conditions.translate(ctxt)), ctxt.codegen) emit(assignAdd(self.offsetVar, self.matchStepVar), ctxt.codegen) emit(assignAnd(probeActive, notLogic(filterMatch)), ctxt.codegen) emit(assignAnd(probeActive, smaller(self.offsetVar, self.endVar)), ctxt.codegen) if self.algExpr.joinType == Join.SEMI: emit(assignAnd(ctxt.vars.activeVar, filterMatch), ctxt.codegen) if self.algExpr.joinType == Join.ANTI: emit(assignAnd(ctxt.vars.activeVar, notLogic(filterMatch)), ctxt.codegen) self.parent.consume(ctxt)
def open(self): ctxt = self.ctxt commentOperator("divergence buffer", self.opId, ctxt.codegen) comment( "ensures that the thread activity in each warp (32 threads) lies above a given threshold", ctxt.codegen) comment( "depending on the buffer count inactive lanes are either refilled or flushed to the buffer", ctxt.codegen) self.mask = Variable.val(CType.INT, "activemask" + str(self.opId) + "_") self.numactive = Variable.val(CType.INT, "numactive" + str(self.opId) + "_") self.bailout = Variable.val(CType.INT, "minTuplesInFlight" + str(self.opId) + "_") self.mask.declareAssign( ballotIntr(qlib.Const.ALL_LANES, ctxt.vars.activeVar), ctxt.codegen) self.numactive.declareAssign(popcount(self.mask), ctxt.codegen) # declare buffer variables ( shuffle registers or shared memory ) self.bufferHelper.consumeDeclareBuffer(self.bufferVars) emit( assign( declare(self.bailout), inlineIf(ctxt.vars.flushVar, intConst(0), intConst(self.threshold))), ctxt.codegen) # starts main buffer loop self.whileLoop = WhileLoop( larger(add(ctxt.vars.buffercount, self.numactive), self.bailout), ctxt.codegen) # refill active lanes if activity below threshold self.bufferHelper.consumeRefillFromBuffer(self.numactive, self.mask, self.threshold) return self
def consumeDeclareBuffer(self, bufferVars): ctxt = self.ctxt codegen = ctxt.codegen self.bufferbase = Variable.val(CType.INT, "bufferBase") emit( assign(declare(self.bufferbase), mul(codegen.warpid(), intConst(32))), ctxt.codegen.init()) self.scan = Variable.val(CType.INT, "scan") self.scan.declare(codegen) self.numRemaining = Variable.val(CType.INT, "remaining") self.numRemaining.declare(ctxt.codegen) # remember variables that need to be buffered at this pipeline stage self.bufferVars = bufferVars.copy() if self.buftype is BufferType.SMEM: self.bufDeclareSmem(bufferVars) if self.buftype is BufferType.REG: self.bufDeclareReg(bufferVars)
def htInsertMultiMatch(self, ctxt): # execute only when current thread has active elements with IfClause(ctxt.vars.activeVar, ctxt.codegen): # compute a (possibly) non-unique hash over all join attributes hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(ctxt.vars.activeVar, ctxt.codegen): Hash.attributes(self.algExpr.buildKeyAttributes, hashVar, ctxt) htRangeOffset = Variable.val(CType.INT, "offs" + str(self.algExpr.opId)) ctxt.codegen.gpumem.local(htRangeOffset, intConst(0)) scanCall = KernelCall.library("scanMultiHT", [ self.htmem.ht.getGPU(), self.htmem.numEntries, htRangeOffset.getGPU() ]) ctxt.codegen.kernelCalls.append(scanCall) ctxt.codegen.openMirrorKernel("_ins") emit( call(qlib.Fct.HASH_COUNT_MULTI, [self.htmem.ht, self.htmem.numEntries, hashVar]), ctxt.codegen.currentKernel) ctxt.codegen.mirrorKernel.addVar(htRangeOffset) payl = self.payload.materialize("payl", ctxt.codegen.mirrorKernel, ctxt) emit( call(qlib.Fct.HASH_INSERT_MULTI, [ self.htmem.ht, self.htmem.payload, htRangeOffset, self.htmem.numEntries, hashVar, addressof(payl) ]), ctxt.codegen.mirrorKernel)
def bufDeclareReg(self, bufferVars): ctxt = self.ctxt comment("register variables for divergence buffers", ctxt.codegen) self.shuffleSourceLane = Variable.val(CType.INT, "shuffleSourceLane") self.shuffleSourceLane.declareSharedArray( intConst(KernelCall.defaultBlockSize), ctxt.codegen.init()) self.sourceLane = Variable.val(CType.INT, "sourceLane" + str(self.opId) + "_") self.sourceLane.declare(ctxt.codegen.init()) self.activeDest = Variable.val(CType.INT, "activeDest" + str(self.opId) + "_") self.activeDest.declare(ctxt.codegen.init()) self.activeSource = Variable.val(CType.INT, "activeSource" + str(self.opId) + "_") self.activeSource.declare(ctxt.codegen.init()) self.buffers = dict() self.shuffleBuffers = dict() for v in self.bufferVars: buf = Variable.val(v.dataType, ident.registerBuffer(v)) sbuf = Variable.val(v.dataType, ident.registerShuffleBuffer(v)) buf.declare(ctxt.codegen.init()) sbuf.declare(ctxt.codegen.init()) self.buffers[v.get()] = buf self.shuffleBuffers[v.get()] = sbuf
def prefixlanes(self): try: return self.currentKernel.prefixlanes except AttributeError: self.currentKernel.prefixlanes = Variable.val( CType.UINT, "prefixlanes") emit( assign( declare(self.currentKernel.prefixlanes), shiftRight(bitmask32f(), sub(intConst(32), self.warplane()))), self.init()) return self.currentKernel.prefixlanes
def __init__(self, ctxt, bufferVars, threshold=0.8): self.ctxt = ctxt self.threshold = int(32 * threshold) self.hasBalancingCode = False self.bufferVars = bufferVars buffercount = Variable.val(CType.INT, "buffercount") buffercount.declareAssign(intConst(0), ctxt.codegen.init()) ctxt.vars.buffercount = buffercount self.bufferHelper = BufferHelperSharedMemory(ctxt, BufferType.SMEM) self.open()
def produce(self, ctxt): algExpr = self.algExpr ctxt.vars.scanTid = Variable.tidLit(algExpr.table, algExpr.scanTableId) with ScanLoop(ctxt.vars.scanTid, ScanType.KERNEL, algExpr.isTempScan, self.algExpr.table, algExpr.outRelation, algExpr, ctxt): if self.algExpr.isTempScan: numOutVar = Variable.val(CType.INT, "nout_" + algExpr.table["name"]) ctxt.codegen.currentKernel.addVar(numOutVar) # call parent operator self.parent.consume(ctxt)
def translate ( self, ctxt ): code = Code() var = Variable.val ( ctxt.codegen.langType ( self.type ), "casevar" + str(self.exprId) ) var.declare ( ctxt.codegen ) #declare variable w0,t0 = self.exprListWhenThen[0] with lang.IfClause ( w0.translate ( ctxt ), ctxt.codegen ): lang.emit ( lang.assign ( var, t0.translate ( ctxt ) ), ctxt.codegen ) for w,t in self.exprListWhenThen[1:]: with lang.ElseIfClause ( w.translate ( ctxt ), ctxt.codegen ): lang.emit ( lang.assign ( var, t.translate ( ctxt ) ), ctxt.codegen ) if self.exprElse != None: with lang.ElseClause ( ctxt.codegen ): lang.emit ( lang.assign ( var, self.exprElse.translate ( ctxt ) ), ctxt.codegen ) return var.get()
def consumeHashTable(self, ctxt): htmem = self.htmem ctxt.vars.scanTid = Variable.tidLit(htmem.getTable(self.algExpr.opId), self.algExpr.opId) self.algExpr.table = htmem.getTable(self.algExpr.opId) self.algExpr.scanTableId = 1 with ScanLoop(ctxt.vars.scanTid, ScanType.KERNEL, False, htmem.getTable(self.algExpr.opId), dict(), self.algExpr, ctxt): commentOperator("scan aggregation ht", self.algExpr.opId, ctxt.codegen) htmem.addToKernel(ctxt.codegen.currentKernel) if self.algExpr.doGroup: with IfClause(ctxt.vars.activeVar, ctxt.codegen): emit( assignAnd( ctxt.vars.activeVar, equals( member(htmem.ht.arrayAccess(ctxt.vars.scanTid), "lock.lock"), "OnceLock::LOCK_DONE")), ctxt.codegen) with IfClause(ctxt.vars.activeVar, ctxt.codegen): payl = Variable.val(self.payload.getType(), "payl") payl.declareAssign( member(htmem.ht.arrayAccess(ctxt.vars.scanTid), "payload"), ctxt.codegen) self.payload.dematerialize(payl, ctxt) with IfClause(ctxt.vars.activeVar, ctxt.codegen): htmem.dematerializeAggregationAttributes( ctxt.vars.scanTid, ctxt) for (id, att) in self.algExpr.avgAggregates.items(): count = self.algExpr.countAttr type = ctxt.codegen.langType(att.dataType) emit( assign( ctxt.attFile.access(att), div(ctxt.attFile.access(att), cast(type, ctxt.attFile.access(count)))), ctxt.codegen) # call parent operator self.parent.consume(ctxt)
def htProbeFilter(self, ctxt): with IfClause(ctxt.vars.activeVar, ctxt.codegen): hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) numLookups = Variable.val(CType.INT, "numLookups" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) location = Variable.val(CType.INT, "location" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) filterMatch = Variable.val(CType.INT, "filterMatch" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeCall = call(qlib.Fct.HASH_AGG_CHECK, [ self.htmem.ht, self.htmem.numEntries, hashVar, numLookups, location ]) activeProbe = Variable.val(CType.INT, "activeProbe" + str(self.algExpr.opId), ctxt.codegen, intConst(1)) with WhileLoop(andLogic(notLogic(filterMatch), activeProbe), ctxt.codegen) as loop: emit(assign(activeProbe, probeCall), ctxt.codegen) # verify grouping attributes from bucket with IfClause(activeProbe, ctxt.codegen): probepayl = Variable.val( self.payload.getType(), "probepayl", ctxt.codegen, member(self.htmem.ht.arrayAccess(location), "payload")) self.payload.dematerialize(probepayl, ctxt) emit(assign(filterMatch, intConst(1)), ctxt.codegen) Hash.checkEquality(filterMatch, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) if self.algExpr.conditions is not None: emit( assignAnd(filterMatch, self.algExpr.conditions.translate(ctxt)), ctxt.codegen) if self.algExpr.joinType == Join.SEMI: emit(assignAnd(ctxt.vars.activeVar, filterMatch), ctxt.codegen) if self.algExpr.joinType == Join.ANTI: emit(assignAnd(ctxt.vars.activeVar, notLogic(filterMatch)), ctxt.codegen) self.parent.consume(ctxt)
def htInsertSingleMatch(self, ctxt): # execute only when current thread has active elements with IfClause(ctxt.vars.activeVar, ctxt.codegen): # prepare payload payl = self.payload.materialize("payl" + str(self.algExpr.opId), ctxt.codegen, ctxt) # compute a non-unique hash over join attributes hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen) Hash.attributes(self.algExpr.buildKeyAttributes, hashVar, ctxt) # do hash insert call emit( call(qlib.Fct.HASH_BUILD_UNIQUE, [ self.htmem.ht, self.htmem.numEntries, hashVar, addressof(payl) ]), ctxt.codegen)
def htProbeMultiMatchMultiBroadcastSemiAnti(self, ctxt): commentOperator("semi/anti multiprobe multi broadcast", self.algExpr.opId, ctxt.codegen) endVar = Variable.val(CType.INT, "matchEnd" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) endVarBuf = Variable.val(CType.INT, "matchEndBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVar = Variable.val(CType.INT, "matchOffset" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVarBuf = Variable.val(CType.INT, "matchOffsetBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) filterMatch = Variable.val(CType.INT, "filterMatch" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) bufferAtts = dict() bufferAtts.update(self.algExpr.probeKeyAttributes) bufferAtts.update(self.algExpr.conditionProbeAttributes) bufferVars = [] for id, att in bufferAtts.items(): var = ctxt.attFile.regFile[id] bufVar = copy.deepcopy(var) bufVar.name = bufVar.name + "_bc_buf" + str(self.algExpr.opId) bufVar.declare(ctxt.codegen) bufferVars.append((var, bufVar)) hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(probeActive, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_MULTI, [ self.htmem.ht, self.htmem.numEntries, hashVar, offsetVarBuf, endVarBuf ])), ctxt.codegen) activeProbes = Variable.val(CType.UINT, "activeProbes" + str(self.algExpr.opId)) activeProbes.declareAssign( ballotIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen) # number of tuples in each buffered match numbuf = Variable.val(CType.INT, "num" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) emit(assign(numbuf, sub(endVarBuf, offsetVarBuf)), ctxt.codegen) wideProbes = Variable.val(CType.UINT, "wideProbes" + str(self.algExpr.opId)) wideProbes.declareAssign( ballotIntr(qlib.Const.ALL_LANES, largerEqual(numbuf, intConst(32))), ctxt.codegen) # write register state to buffer to prevent overwriting for var, bufVar in bufferVars: emit(assign(bufVar, var), ctxt.codegen) with WhileLoop(larger(activeProbes, intConst(0)), ctxt.codegen): tupleLane = Variable.val(CType.UINT, "tupleLane", ctxt.codegen) broadcastLane = Variable.val(CType.UINT, "broadcastLane", ctxt.codegen) numFilled = Variable.val(CType.INT, "numFilled", ctxt.codegen, intConst(0)) num = Variable.val(CType.INT, "num", ctxt.codegen, intConst(0)) siblingsMask = Variable.val(CType.UINT, "siblingsMask", ctxt.codegen) firstBroadcastDest = Variable.val(CType.INT, "firstBroadcastDest", ctxt.codegen, intConst(-1)) with WhileLoop( andLogic(smaller(numFilled, intConst(32)), activeProbes), ctxt.codegen) as l: # select leader with IfClause(larger(wideProbes, intConst(0)), ctxt.codegen): emit(assign(tupleLane, sub(ffsIntr(wideProbes), 1)), ctxt.codegen) emit( assignSub(wideProbes, (shiftLeft(intConst(1), tupleLane))), ctxt.codegen) with ElseClause(ctxt.codegen): emit(assign(tupleLane, sub(ffsIntr(activeProbes), 1)), ctxt.codegen) # broadcast leader number of matches emit( assign( num, shuffleIntr(qlib.Const.ALL_LANES, numbuf, tupleLane)), ctxt.codegen) with IfClause( andLogic(numFilled, larger(add(numFilled, num), 32)), ctxt.codegen): l.break_() with IfClause(equals(ctxt.codegen.warplane(), tupleLane), ctxt.codegen): emit(assign(firstBroadcastDest, numFilled), ctxt.codegen) with IfClause(largerEqual(ctxt.codegen.warplane(), numFilled), ctxt.codegen): emit(assign(broadcastLane, tupleLane), ctxt.codegen) emit( assign(offsetVar, sub(ctxt.codegen.warplane(), numFilled)), ctxt.codegen) emit(assign(siblingsMask, qlib.Const.ALL_LANES), ctxt.codegen) emit( assignAnd( siblingsMask, inverse( shiftRight(qlib.Const.ALL_LANES, sub(intConst(32), numFilled)))), ctxt.codegen) emit( assignAnd( siblingsMask, inverse( shiftLeft(qlib.Const.ALL_LANES, add(numFilled, num)))), ctxt.codegen) emit(assignAdd(numFilled, num), ctxt.codegen) # mark buffered probe tuple as processed emit( assignSub(activeProbes, (shiftLeft(intConst(1), tupleLane))), ctxt.codegen) # shuffle gather offset emit( assignAdd( offsetVar, shuffleIntr(qlib.Const.ALL_LANES, offsetVarBuf, broadcastLane)), ctxt.codegen) # shuffle gather end emit( assign( endVar, shuffleIntr(qlib.Const.ALL_LANES, endVarBuf, broadcastLane)), ctxt.codegen) # shuffle other register vars for var, bufVar in bufferVars: emit( assign( var, shuffleIntr(qlib.Const.ALL_LANES, bufVar, broadcastLane)), ctxt.codegen) emit(assign(filterMatch, intConst(0)), ctxt.codegen) emit(assign(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) with WhileLoop(anyIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen): with IfClause(probeActive, ctxt.codegen): payl = Variable.val(self.htmem.payload.dataType, "payl", ctxt.codegen) emit( assign(payl, self.htmem.payload.arrayAccess(offsetVar)), ctxt.codegen) self.payload.dematerialize(payl, ctxt) emit(assign(filterMatch, intConst(1)), ctxt.codegen) Hash.checkEquality(filterMatch, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) if self.algExpr.conditions is not None: emit( assignAnd(filterMatch, self.algExpr.conditions.translate(ctxt)), ctxt.codegen) emit( assign( filterMatch, larger( andBitwise( ballotIntr(qlib.Const.ALL_LANES, filterMatch), siblingsMask), intConst(0))), ctxt.codegen) emit(assignAnd(probeActive, notLogic(filterMatch)), ctxt.codegen) emit(assignAdd(offsetVar, intConst(32)), ctxt.codegen) emit(assignAnd(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) emit( assign( filterMatch, shuffleIntr(qlib.Const.ALL_LANES, filterMatch, firstBroadcastDest)), ctxt.codegen) with IfClause(largerEqual(firstBroadcastDest, intConst(0)), ctxt.codegen): if self.algExpr.joinType == Join.SEMI: emit(assignAnd(ctxt.vars.activeVar, filterMatch), ctxt.codegen) if self.algExpr.joinType == Join.ANTI: emit(assignAnd(ctxt.vars.activeVar, notLogic(filterMatch)), ctxt.codegen) # write register state to buffer to prevent overwriting for var, bufVar in bufferVars: emit(assign(var, bufVar), ctxt.codegen) self.parent.consume(ctxt)
def consume(self, ctxt): commentOperator("aggregation", ctxt.codegen) # create aggregation hash table with grouping payload if self.algExpr.doGroup: self.payload = Payload("apayl" + str(self.algExpr.opId), self.algExpr.groupAttributes, ctxt) htmem = HashTableMemory.createAgg("aht" + str(self.algExpr.opId), self.algExpr.tupleNum * 2.0, self.payload, ctxt.codegen) else: htmem = HashTableMemory(1, ctxt.codegen) # create and initialize aggregation buckets htmem.addAggregationAttributes(self.algExpr.aggregateAttributes, self.algExpr.aggregateTuples, ctxt) htmem.addToKernel(ctxt.codegen.currentKernel) # find bucket bucketVar = Variable.val(CType.INT, "bucket", ctxt.codegen, intConst(0)) if self.algExpr.doGroup: with IfClause(ctxt.vars.activeVar, ctxt.codegen): #payload hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) Hash.attributes(self.algExpr.groupAttributes, hashVar, ctxt) payl = self.payload.materialize("payl", ctxt.codegen, ctxt) bucketFound = Variable.val(CType.INT, "bucketFound", ctxt.codegen, intConst(0)) numLookups = Variable.val(CType.INT, "numLookups", ctxt.codegen, intConst(0)) with WhileLoop(notLogic(bucketFound), ctxt.codegen) as loop: # allocate empty bucket or get tid from bucket emit( assign( bucketVar, call(qlib.Fct.HASH_AGG_BUCKET, [ htmem.ht, htmem.numEntries, hashVar, numLookups, addressof(payl) ])), ctxt.codegen) # verify grouping attributes from bucket probepayl = Variable.val( self.payload.getType(), "probepayl", ctxt.codegen, member(htmem.ht.arrayAccess(bucketVar), "payload")) self.payload.checkEquality(bucketFound, payl, probepayl, ctxt) # atomic summation of aggregates with IfClause(ctxt.vars.activeVar, ctxt.codegen): for id, (inId, reduction) in self.algExpr.aggregateTuples.items(): typ = ctxt.codegen.langType( self.algExpr.aggregateAttributes[id].dataType) agg = addressof(htmem.accessAggregationAttribute( id, bucketVar)) # count if reduction == Reduction.COUNT: sys.stdout.flush() atomAdd = atomicAdd(agg, cast(typ, intConst(1))) if inId in ctxt.attFile.isNullFile: with IfClause(notLogic(ctxt.attFile.isNullFile[inId]), ctxt.codegen): emit(atomAdd, ctxt.codegen) else: emit(atomAdd, ctxt.codegen) continue val = cast( typ, ctxt.attFile.access( self.algExpr.aggregateInAttributes[inId])) # min if reduction == Reduction.MIN: emit(atomicMin(agg, val), ctxt.codegen) # max elif reduction == Reduction.MAX: emit(atomicMax(agg, val), ctxt.codegen) # sum elif reduction == Reduction.SUM: emit(atomicAdd(agg, val), ctxt.codegen) # avg elif reduction == Reduction.AVG: emit(atomicAdd(agg, val), ctxt.codegen) self.htmem = htmem
def htProbeMultiMatchMultiBroadcast(self, ctxt): commentOperator("multiprobe multi broadcast", self.algExpr.opId, ctxt.codegen) endVar = Variable.val(CType.INT, "matchEnd" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) endVarBuf = Variable.val(CType.INT, "matchEndBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVar = Variable.val(CType.INT, "matchOffset" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVarBuf = Variable.val(CType.INT, "matchOffsetBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) bufferAtts = dict() bufferAtts.update(self.algExpr.rightChild.outRelation) bufferAtts.update(self.algExpr.conditionProbeAttributes) bufferVars = [] for id, att in bufferAtts.items(): var = ctxt.attFile.regFile[id] bufVar = copy.deepcopy(var) bufVar.name = bufVar.name + "_bcbuf" + str(self.algExpr.opId) bufVar.declare(ctxt.codegen) bufferVars.append((var, bufVar)) hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(probeActive, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_MULTI, [ self.htmem.ht, self.htmem.numEntries, hashVar, offsetVarBuf, endVarBuf ])), ctxt.codegen) activeProbes = Variable.val(CType.UINT, "activeProbes" + str(self.algExpr.opId)) activeProbes.declareAssign( ballotIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen) numbuf = Variable.val(CType.INT, "num" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) emit(assign(numbuf, sub(endVarBuf, offsetVarBuf)), ctxt.codegen) wideProbes = Variable.val(CType.UINT, "wideProbes" + str(self.algExpr.opId)) wideProbes.declareAssign( ballotIntr(qlib.Const.ALL_LANES, largerEqual(numbuf, intConst(32))), ctxt.codegen) # write register state to buffer to prevent overwriting for var, bufVar in bufferVars: emit(assign(bufVar, var), ctxt.codegen) with WhileLoop(larger(activeProbes, intConst(0)), ctxt.codegen): tupleLane = Variable.val(CType.UINT, "tupleLane", ctxt.codegen) broadcastLane = Variable.val(CType.UINT, "broadcastLane", ctxt.codegen) numFilled = Variable.val(CType.INT, "numFilled", ctxt.codegen, intConst(0)) num = Variable.val(CType.INT, "num", ctxt.codegen, intConst(0)) with WhileLoop( andLogic(smaller(numFilled, intConst(32)), activeProbes), ctxt.codegen) as l: # select leader with IfClause(larger(wideProbes, intConst(0)), ctxt.codegen): emit(assign(tupleLane, sub(ffsIntr(wideProbes), 1)), ctxt.codegen) emit( assignSub(wideProbes, (shiftLeft(intConst(1), tupleLane))), ctxt.codegen) with ElseClause(ctxt.codegen): emit(assign(tupleLane, sub(ffsIntr(activeProbes), 1)), ctxt.codegen) # broadcast leader number of matches emit( assign( num, shuffleIntr(qlib.Const.ALL_LANES, numbuf, tupleLane)), ctxt.codegen) with IfClause( andLogic(numFilled, larger(add(numFilled, num), 32)), ctxt.codegen): l.break_() with IfClause(largerEqual(ctxt.codegen.warplane(), numFilled), ctxt.codegen): emit(assign(broadcastLane, tupleLane), ctxt.codegen) emit( assign(offsetVar, sub(ctxt.codegen.warplane(), numFilled)), ctxt.codegen) emit(assignAdd(numFilled, num), ctxt.codegen) # mark buffered probe tuple as processed emit( assignSub(activeProbes, (shiftLeft(intConst(1), tupleLane))), ctxt.codegen) # shuffle gather offset emit( assignAdd( offsetVar, shuffleIntr(qlib.Const.ALL_LANES, offsetVarBuf, broadcastLane)), ctxt.codegen) # shuffle gather end emit( assign( endVar, shuffleIntr(qlib.Const.ALL_LANES, endVarBuf, broadcastLane)), ctxt.codegen) # shuffle other register vars for var, bufVar in bufferVars: emit( assign( var, shuffleIntr(qlib.Const.ALL_LANES, bufVar, broadcastLane)), ctxt.codegen) emit(assign(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) ctxt.innerLoopCount += 1 with WhileLoop(anyIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen): emit(assign(ctxt.vars.activeVar, probeActive), ctxt.codegen) #ctxt.codegen.laneActivityProfile ( ctxt ) emit(assign(ctxt.vars.activeVar, intConst(0)), ctxt.codegen) payl = Variable.val(self.htmem.payload.dataType, "payl", ctxt.codegen) with IfClause(probeActive, ctxt.codegen): emit( assign(payl, self.htmem.payload.arrayAccess(offsetVar)), ctxt.codegen) self.payload.dematerialize(payl, ctxt) emit(assign(ctxt.vars.activeVar, intConst(1)), ctxt.codegen) Hash.checkEquality(ctxt.vars.activeVar, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) emit(assignAdd(offsetVar, intConst(32)), ctxt.codegen) emit(assignAnd(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) self.parent.consume(ctxt) ctxt.innerLoopCount -= 1
def htProbeMultiMatch(self, ctxt): self.endVar = Variable.val(CType.INT, "matchEnd" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) self.offsetVar = Variable.val(CType.INT, "matchOffset" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) self.matchStepVar = Variable.val(CType.INT, "matchStep" + str(self.algExpr.opId), ctxt.codegen, intConst(1)) matchFound = Variable.val(CType.INT, "matchFound" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) #ctxt.vars.buf.extend ( [ self.endVar, self.offsetVar, self.matchStepVar, matchFound, probeActive ] ) if self.algExpr.joinType == Join.OUTER: doOuter = Variable.val(CType.INT, "doOuter" + str(self.algExpr.opId), ctxt.codegen, intConst(1)) outerActive = Variable.val(CType.INT, "outerActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) for id, nullable in self.buildRelation.items(): ctxt.attFile.isNullFile[nullable.id] = notLogic(matchFound) # execute only when current thread has active elements hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(probeActive, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_MULTI, [ self.htmem.ht, self.htmem.numEntries, hashVar, self.offsetVar, self.endVar ])), ctxt.codegen) emit(assign(ctxt.vars.activeVar, probeActive), ctxt.codegen) self.probeWithDivergenceBuffer = False # -- start probe loop -- if self.probeWithDivergenceBuffer: vars = [self.offsetVar, self.endVar] for att in self.algExpr.leftChild.outRelation: vars.append(ctxt.attFile.regFile[att]) probeloop = BufferedLoop(ctxt, vars, 0.8) unrollDepth = self.unrollDepth else: probeloop = WhileLoop( anyIntr(qlib.Const.ALL_LANES, ctxt.vars.activeVar), ctxt.codegen) unrollDepth = 1 ctxt.innerLoopCount += 1 with UnrolledForLoop(unrollDepth, ctxt.codegen): # ctxt.codegen.laneActivityProfile ( ctxt ) emit(assign(probeActive, ctxt.vars.activeVar), ctxt.codegen) payl = Variable.val(self.htmem.payload.dataType, "payl", ctxt.codegen) with IfClause(probeActive, ctxt.codegen): emit( assign(payl, self.htmem.payload.arrayAccess(self.offsetVar)), ctxt.codegen) self.payload.dematerialize(payl, ctxt) Hash.checkEquality(ctxt.vars.activeVar, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) emit(assignAdd(matchFound, ctxt.vars.activeVar), ctxt.codegen) self.parent.consume(ctxt) # coalesced access for broadcast matches emit(assignAdd(self.offsetVar, self.matchStepVar), ctxt.codegen) # finish join matches emit(assignAnd(probeActive, smaller(self.offsetVar, self.endVar)), ctxt.codegen) emit(assign(ctxt.vars.activeVar, probeActive), ctxt.codegen) # handle nullable attributes for outer join if self.algExpr.joinType == Join.OUTER: with IfClause( andLogic( notLogic(anyIntr(qlib.Const.ALL_LANES, probeActive)), doOuter), ctxt.codegen): # remember null indicator for each attribute from build relation with IfClause(notLogic(matchFound), ctxt.codegen): emit(assign(ctxt.vars.activeVar, outerActive), ctxt.codegen) emit(assign(doOuter, intConst(0)), ctxt.codegen) # -- close probe loop -- probeloop.close() ctxt.innerLoopCount -= 1
def htProbeSingleMatch(self, ctxt): # compute a (possibly) non-unique hash over all join attributes hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(ctxt.vars.activeVar, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) payl = Variable.ptr(self.payload.getType(), "probepayl" + str(self.algExpr.opId), ctxt.codegen) # execute only when current thread has active elements numLookups = Variable.val(CType.INT, "numLookups" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) # allocate empty bucket or get tid from bucket with IfClause(ctxt.vars.activeVar, ctxt.codegen): emit( assign( ctxt.vars.activeVar, call(qlib.Fct.HASH_PROBE_UNIQUE, [ self.htmem.ht, self.htmem.numEntries, hashVar, numLookups, addressof(payl) ])), ctxt.codegen) bucketFound = Variable.val(CType.INT, "bucketFound" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, intConst(ctxt.vars.activeVar)) with WhileLoop(andLogic(probeActive, notLogic(bucketFound)), ctxt.codegen) as loop: paylVal = Variable.val(self.payload.typeName, "jprobepayl" + str(self.algExpr.opId)) emit(assign(declare(paylVal), deref(payl)), ctxt.codegen) self.payload.dematerialize(paylVal, ctxt) emit(assign(bucketFound, intConst(1)), ctxt.codegen) Hash.checkEquality(bucketFound, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) with IfClause(notLogic(bucketFound), ctxt.codegen): emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_UNIQUE, [ self.htmem.ht, self.htmem.numEntries, hashVar, numLookups, addressof(payl) ])), ctxt.codegen) if self.algExpr.joinType == Join.INNER: emit(assign(ctxt.vars.activeVar, bucketFound), ctxt.codegen) if self.algExpr.joinType == Join.OUTER: # remember null indicator for each attribute from build relation for nullable in self.buildRelation: ctxt.attFile.isNullFile[nullable.id] = notLogic(bucketFound) #emit ( atomicAdd ( numCollisions, numLookups ), ctxt.codegen ) # consume for parent operators self.parent.consume(ctxt)
def htProbeMultiMatchSingleBroadcastSemiAnti(self, ctxt): commentOperator("semi/anti multiprobe single broadcast", self.algExpr.opId, ctxt.codegen) endVar = Variable.val(CType.INT, "matchEnd" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) endVarBuf = Variable.val(CType.INT, "matchEndBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVar = Variable.val(CType.INT, "matchOffset" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) offsetVarBuf = Variable.val(CType.INT, "matchOffsetBuf" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) filterMatch = Variable.val(CType.INT, "filterMatch" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) probeActive = Variable.val(CType.INT, "probeActive" + str(self.algExpr.opId), ctxt.codegen, ctxt.vars.activeVar) bufferAtts = dict() bufferAtts.update(self.algExpr.probeKeyAttributes) bufferAtts.update(self.algExpr.conditionProbeAttributes) bufferVars = [] for id, att in bufferAtts.items(): var = ctxt.attFile.regFile[id] bufVar = copy.deepcopy(var) bufVar.name = bufVar.name + "_bcbuf" + str(self.algExpr.opId) bufVar.declare(ctxt.codegen) bufferVars.append((var, bufVar)) hashVar = Variable.val(CType.UINT64, "hash" + str(self.algExpr.opId), ctxt.codegen, intConst(0)) with IfClause(probeActive, ctxt.codegen): Hash.attributes(self.algExpr.probeKeyAttributes, hashVar, ctxt) emit( assign( probeActive, call(qlib.Fct.HASH_PROBE_MULTI, [ self.htmem.ht, self.htmem.numEntries, hashVar, offsetVarBuf, endVarBuf ])), ctxt.codegen) activeProbes = Variable.val(CType.UINT, "activeProbes" + str(self.algExpr.opId)) activeProbes.declareAssign( ballotIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen) # write register state to buffer to prevent overwriting for var, bufVar in bufferVars: emit(assign(bufVar, var), ctxt.codegen) with WhileLoop(larger(activeProbes, intConst(0)), ctxt.codegen): tupleLane = Variable.val(CType.UINT, "tupleLane", ctxt.codegen) emit(assign(tupleLane, sub(ffsIntr(activeProbes), 1)), ctxt.codegen) # shuffle gather offset emit( assign( offsetVar, add( shuffleIntr(qlib.Const.ALL_LANES, offsetVarBuf, tupleLane), ctxt.codegen.warplane())), ctxt.codegen) # shuffle gather end emit( assign(endVar, shuffleIntr(qlib.Const.ALL_LANES, endVarBuf, tupleLane)), ctxt.codegen) # shuffle other register vars for var, bufVar in bufferVars: emit( assign( var, shuffleIntr(qlib.Const.ALL_LANES, bufVar, tupleLane)), ctxt.codegen) # mark lane as processed emit(assignSub(activeProbes, (shiftLeft(intConst(1), tupleLane))), ctxt.codegen) emit(assign(filterMatch, intConst(0)), ctxt.codegen) emit(assign(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) with WhileLoop(anyIntr(qlib.Const.ALL_LANES, probeActive), ctxt.codegen): with IfClause(probeActive, ctxt.codegen): payl = Variable.val(self.htmem.payload.dataType, "payl", ctxt.codegen) emit( assign(payl, self.htmem.payload.arrayAccess(offsetVar)), ctxt.codegen) self.payload.dematerialize(payl, ctxt) emit(assign(filterMatch, intConst(1)), ctxt.codegen) Hash.checkEquality(filterMatch, self.algExpr.buildKeyAttributes, self.algExpr.probeKeyAttributes, ctxt) if self.algExpr.conditions is not None: emit( assignAnd(filterMatch, self.algExpr.conditions.translate(ctxt)), ctxt.codegen) emit( assign(filterMatch, anyIntr(qlib.Const.ALL_LANES, filterMatch)), ctxt.codegen) emit(assignAnd(probeActive, notLogic(filterMatch)), ctxt.codegen) emit(assignAdd(offsetVar, intConst(32)), ctxt.codegen) emit(assignAnd(probeActive, smaller(offsetVar, endVar)), ctxt.codegen) with IfClause(equals(ctxt.codegen.warplane(), tupleLane), ctxt.codegen): if self.algExpr.joinType == Join.SEMI: emit(assignAnd(ctxt.vars.activeVar, filterMatch), ctxt.codegen) if self.algExpr.joinType == Join.ANTI: emit(assignAnd(ctxt.vars.activeVar, notLogic(filterMatch)), ctxt.codegen) # write register state to buffer to prevent overwriting for var, bufVar in bufferVars: emit(assign(var, bufVar), ctxt.codegen) self.parent.consume(ctxt)