def visitSum(self, node: AST.Sum): ''' expr_out i = 0 for (j = 0; j < n; j++) expr_in = prog_in expr_out = expr_out + expr_in i++ 1. for i in [0, C]: 2. expr_out[i] = expr_out[i] + shr(expr_in[i]) ''' var_idf = node.name self.decls[var_idf] = Type.Int() (prog_in, expr_in) = self.visit(node.expr) start, end = node.start, node.end expr_out = self.getTempVar() type_out = node.type var = IR.Var(var_idf) var_iter = self.getTempIterator() iters = self.getTempIterators(type_out.dim) (scale_out, height_shr, height_noshr) = self.getScaleForTreeSum(self.scales[expr_in.idf], end - start) intv_out = self.getIntervalForTreeSum(self.intvs[expr_in.idf], end - start) # Tree sum to sum output of each iteration expr_in_idx = IRUtil.addIndex(expr_in, iters) expr_out_idx = IRUtil.addIndex(expr_out, iters) cmd1 = IR.Memset(expr_out, type_out.size()) cmd2 = IR.Assn( expr_out_idx, IRUtil.add(expr_out_idx, IRUtil.shr(expr_in_idx, height_shr))) treeSum = IRUtil.loop(type_out.shape, iters, [cmd2]) # Final program to sum output of each iteration prog_sum = [ cmd1, IR.Assn(var, IR.Int(start)), IR.For(var_iter, 0, IRUtil.lt(var_iter, IR.Int(end - start)), prog_in.cmd_l + treeSum + [IR.Assn(var, IRUtil.inc(var))]) ] prog_out = IR.Prog(prog_sum) self.decls[expr_out.idf] = type_out self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out return (prog_out, expr_out)
def printSuffix(self, expr: IR.Expr): self.out.printf('\n') type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('cout << ', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(' << endl;\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.close()
def visitMathExp(self, node: AST.Func): # Tunable parameter MIN = 0.1 (prog_in, expr_in) = self.visit(node.expr) type_in = node.expr.type scale_in = self.scales[expr_in.idf] intv_in = self.intvs[expr_in.idf] ''' 1. y = ((int) (exp(((float)e) / shr1) * shr2)) ''' maxExp = np.exp(-MIN) expr_out = self.getTempVar() scale_out = self.getScale(maxExp) intv_out = self.getInterval(scale_out, maxExp, maxExp) shr1 = IR.Int(2**-scale_in) shr2 = IR.Int(2**-scale_out) expr_in_idx = IRUtil.addIndex(expr_in, [IRUtil.zero] * type_in.dim) expr_out_idx = IRUtil.addIndex(expr_out, [IRUtil.zero] * type_in.dim) cmd0 = IR.Comment('exp(' + expr_in.idf + ')') cmd_assn = IR.Assn( expr_out_idx, IRUtil.castToInt( IRUtil.mul( IR.Exp(IRUtil.div(IRUtil.castToFloat(expr_in_idx), shr1)), shr2))) prog_exp = IR.Prog([cmd0, cmd_assn]) prog_out = IRUtil.concatPrograms(prog_in, prog_exp) self.decls[expr_out.idf] = type_in self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out return (prog_out, expr_out)
def visitIndex(self, node: AST.Index): (prog_in, expr_in) = self.visit(node.expr) (prog_idx, expr_idx) = self.visit(node.index) prog_out = IRUtil.concatPrograms(prog_in, prog_idx) expr_out = IRUtil.addIndex(expr_in, [expr_idx]) return (prog_out, expr_out)
def visitUop(self, node: AST.Uop): (prog_in, expr_in) = self.visit(node.expr) op = node.op if op == SeeDotParser.ADD: return (prog_in, expr_in) assert op == SeeDotParser.SUB type_out = node.type # e : Int if Type.isInt(type_out): prog_out = prog_in expr_out = IRUtil.negate(expr_in) # e: Tensor(), or Tensor(..) else: expr_out = self.getTempVar() iters = self.getTempIterators(type_out.dim) scale_out = self.scales[expr_in.idf] (m, M) = self.intvs[expr_in.idf] intv_out = (-M, -m) expr_in_idx = IRUtil.addIndex(expr_in, iters) expr_out_idx = IRUtil.addIndex(expr_out, iters) rhs = IRUtil.negate(expr_in_idx) loop = IRUtil.loop(type_out.shape, iters, [IR.Assn(expr_out_idx, rhs)]) prog_uop = IR.Prog(loop) prog_out = IRUtil.concatPrograms(prog_in, prog_uop) self.decls[expr_out.idf] = type_out self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out return (prog_out, expr_out)
def visitSgn(self, node: AST.Func): (prog_in, expr_in) = self.visit(node.expr) expr_out = self.getTempVar() type_in = node.expr.type expr_in_idx = IRUtil.addIndex(expr_in, [IRUtil.zero] * type_in.dim) cmd0 = IR.Comment('sgn(' + expr_in.idf + ')') cmd1 = IR.Assn(expr_out, IRUtil.cond_zero(expr_in_idx, IRUtil.one, IRUtil.zero)) prog_sgn = IR.Prog([cmd0, cmd1]) prog_out = IRUtil.concatPrograms(prog_in, prog_sgn) self.decls[expr_out.idf] = Type.Int() return (prog_out, expr_out)
def printSuffix(self, expr: IR.Expr): self.out.printf('\n') type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('Serial.println(', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(', 6);\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.close() with open(os.path.join(self.outputDir, "ram.usage"), "w") as f: f.write("Estimate RAM usage :: %d bytes" % (self.maxRAMestimate))
def printSuffix(self, expr: IR.Expr): self.out.printf('\n') if config.vbwEnabled and forFixed(): bw = self.varsForBitwidth['X'] typ_str = "int%d_t" % bw size = self.decls['X'].shape sizestr = ''.join([("[%d]" % i) for i in size]) Xindexstr = '' Xintstar = ''.join(["*" for i in size]) for i in range(len(size)): Xindexstr += (("[i%d]" % (i - 1)) if i > 0 else "") self.out.printf("for (int i%d = 0; i%d < %d; i%d ++ ){\n" % (i, i, size[i], i), indent=True) self.out.increaseIndent() for i in range(len(size) - 1, -1, -1): self.out.decreaseIndent() self.out.printf("}\n", indent=True) self.out.printf("delete[] X%s;\n" % (Xindexstr), indent=True) Xindexstr = Xindexstr[:-4] if len(Xindexstr) > 0 else Xindexstr assert len( size ) < 10, "Too simple logic for printing indices used, cannot handle 10+ Dim Tensors" type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('cout << ', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(' << endl;\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) def isInt(a): try: int(a) return True except: return False if forFixed(): if (int(self.printSwitch) if isInt(self.printSwitch) else -2) > -1: self.out.printf("const int switches = %d;\n" % (int(self.printSwitch)), indent=True) self.out.printf( 'void seedotFixedSwitch(int i, MYINT **X_temp, int& res) {\n', indent=True) self.out.increaseIndent() self.out.printf('switch(i) {\n', indent=True) self.out.increaseIndent() for i in range(int(self.printSwitch)): self.out.printf( 'case %d: res = seedotFixed%d(X_temp); return;\n' % (i, i + 1), indent=True) self.out.printf('default: res = -1; return;\n', indent=True) self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.decreaseIndent() self.out.printf('}\n', indent=True) if debugCompiler(): print("Closing File after outputting cpp code: ID " + self.idStr) self.out.close()
def visitReshape(self, node: AST.Reshape): (prog_in, expr_in) = self.visit(node.expr) ''' reshape(A, n, h, w) cmd1: t1 = t2 = 0; loop2: for n in 0:N: for h in 0:H: for w in 0:W: cmd3: B[n][h][w] = A[t1][t2][t3] cmd4: t3++; cmd5: if (t3 == WW) t3 = 0; t2++; if (t2 == HH) t2 = 0; t1++; ''' type_in = node.expr.type type_out = node.type # Compute scaling factors scale_out = self.scales[expr_in.idf] intv_out = self.intvs[expr_in.idf] # Declare variables expr_out = self.getTempVar() iters_in = self.getTempIterators(type_in.dim) iters_out = self.getTempVars(type_out.dim) # Initialize to 0 cmd1 = [IR.Assn(var, IRUtil.zero) for var in iters_out] # Incrementing the first index first_iter = iters_out[0] cmd4 = IRUtil.incCmd(first_iter) # Incrementing other indices using a loop cmd5 = [cmd4] for i in range(1, type_out.dim): curr_iter = iters_out[i] curr_size = IR.Int(type_out.shape[i]) cmd5 = [ IRUtil.incCmd(curr_iter), IR.If(IRUtil.eq(curr_iter, curr_size), [IRUtil.initVarToZero(curr_iter)] + cmd5) ] # Outer loop loopShape = [] loopIters = [] for order in node.order: order = order - 1 loopShape.append(type_in.shape[order]) loopIters.append(iters_in[order]) loop2 = IRUtil.loop(loopShape, loopIters, [ IR.Assn(IRUtil.addIndex(expr_out, iters_out), IRUtil.addIndex(expr_in, iters_in)) ] + cmd5) # Finalize comment = IR.Comment("reshape(" + expr_in.idf + ", " + ', '.join(str(e) for e in type_out.shape) + ")") prog_reshape = IR.Prog([comment] + cmd1 + loop2) prog_out = IRUtil.concatPrograms(prog_in, prog_reshape) # Update context self.decls[expr_out.idf] = type_out self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out # Update declarations self.decls.update(dict((var.idf, Type.Int()) for var in iters_out)) return (prog_out, expr_out)
def visitCond(self, node: AST.Cond): (prog_in_cond, expr_in_cond) = self.visit(node.expr) (prog_in_A, expr_in_A) = self.visit(node.trueBlock) (prog_in_B, expr_in_B) = self.visit(node.falseBlock) type_in_cond = node.expr.type type_in_A = node.trueBlock.type if Type.isInt(type_in_cond): expr_in_cond_idx = expr_in_cond else: expr_in_cond_idx = IRUtil.addIndex(expr_in_cond, [IRUtil.zero] * type_in_cond.dim) # e2,e3 : Int if Type.isInt(type_in_A): # TODO: Update the scale and intv of expr_out based on in_A and # in_B prog_out = IRUtil.concatPrograms(prog_in_cond, prog_in_A, prog_in_B) expr_out = IRUtil.cond_zero(expr_in_cond_idx, expr_in_A, expr_in_B) # e2,e3 : Tensor(), or Tensor(..) else: expr_out = self.getTempVar() iters = self.getTempIterators(type_in_A.dim) scale_in_A, scale_in_B = self.scales[expr_in_A.idf], self.scales[ expr_in_B.idf] intv_in_A, intv_in_B = self.intvs[expr_in_A.idf], self.intvs[ expr_in_B.idf] m_2, M_2 = intv_in_A m_3, M_3 = intv_in_B if scale_in_A >= scale_in_B: shr_n_2, shr_n_3 = 0, scale_in_A - scale_in_B else: shr_n_2, shr_n_3 = scale_in_B - scale_in_A, 0 scale_out = max(scale_in_A, scale_in_B) intv_out = (min(m_2 >> shr_n_2, m_3 >> shr_n_3), max(M_2 >> shr_n_2, M_3 >> shr_n_3)) # prog_assn expr_in_A_idx = IRUtil.addIndex(expr_in_A, iters) expr_in_B_idx = IRUtil.addIndex(expr_in_B, iters) expr_out_idx = IRUtil.addIndex(expr_out, iters) rhs = IRUtil.cond_zero(expr_in_cond_idx, IRUtil.shr(expr_in_A_idx, shr_n_2), IRUtil.shr(expr_in_B_idx, shr_n_3)) cmdl_assn = IRUtil.loop(type_in_A.shape, iters, [IR.Assn(expr_out_idx, rhs)]) prog_cond = IR.Prog(cmdl_assn) prog_out = IRUtil.concatPrograms(prog_in_cond, prog_in_A, prog_in_B, prog_cond) self.decls[expr_out.idf] = type_in_A self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out return (prog_out, expr_out)
def visitTableExp(self, node: AST.Func): (prog_in, expr_in) = self.visit(node.expr) # TODO: use MAX_VAL_EXP type_in = node.expr.type scale_in = self.scales[expr_in.idf] intv_in = self.intvs[expr_in.idf] [m, M] = self.expRange [m_scale, M_scale] = [int(np.ldexp(m, -scale_in)), int(np.ldexp(M, -scale_in))] max = int(np.ldexp(M - m, -scale_in)) shl = self.getShl(max) input = self.getTempVar() [i, j] = self.getTempVars(2) expr_out = self.getTempVar() ''' 1. if ((-x) < min) { 2. i = 0; 3. j = 0; 4. } 5. else { 6. y = ((-x) - min) << shl 7. i = (y >> shrI) & (2^b-1) 8. j = (y >> shrJ) & (2^b-1) 9. } 10. ans = T[i] * U[j] ''' mask = IR.Int(2**self.expB - 1) shrI = Common.wordLength - self.expB shrJ = Common.wordLength - self.expB * 2 table = self.getExpTable(scale_in) scale1 = self.getScale(1) scale2 = self.getScale(abs(np.exp(-m))) [shr1, shr2] = self.getShrForMul(scale1, scale2) expr_1_elt = IRUtil.addIndex(expr_in, [IRUtil.zero] * type_in.dim) expr_2_elt = IRUtil.addIndex(expr_out, [IRUtil.zero] * type_in.dim) cond = IRUtil.lt(IRUtil.negate(expr_1_elt), IR.Int(m_scale)) cmd2 = IR.Assn(i, IR.Int(0)) cmd3 = IR.Assn(j, IR.Int(0)) cmd6 = IR.Assn( input, IRUtil.shl(IRUtil.sub(IRUtil.negate(expr_1_elt), IR.Int(m_scale)), shl)) cmd7 = IR.Assn(i, IRUtil.bitAnd(IRUtil.shrUint(input, shrI), mask)) cmd8 = IR.Assn(j, IRUtil.bitAnd(IRUtil.shrUint(input, shrJ), mask)) cmd1 = IR.If(cond, [cmd2, cmd3], [cmd6, cmd7, cmd8]) cmd10 = IR.Assn( expr_2_elt, IRUtil.mul(IRUtil.shrUint(IRUtil.addIndex(table[0], [i]), shr1), IRUtil.shrUint(IRUtil.addIndex(table[1], [j]), shr2))) scale_out = self.getScaleForExp(scale1, shr1, scale2, shr2) intv_out = self.getIntervalForExp(scale_out, [-m_scale, -M_scale]) cmd0 = IR.Comment('exp(' + expr_in.idf + ')') prog_exp = IR.Prog([cmd0, cmd1, cmd10]) prog_out = IRUtil.concatPrograms(prog_in, prog_exp) self.decls[expr_out.idf] = type_in self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out self.decls.update(dict((var.idf, Type.Int()) for var in [input, i, j])) return (prog_out, expr_out)