def visit_AugAssign(self, node):
     node.value = self.visit(node.value)
     if not self.vectorize:
         node.target = self.visit(node.target)
         return node
     if util.contains_symbol(node.target, self.loop_var):
         return simd_macros.mm256_store_ps(
             node.target,
             C.BinaryOp(self.visit(node.target), node.op, node.value))
     elif isinstance(node.op, C.Op.Add) and isinstance(node.value, C.BinaryOp) and \
             isinstance(node.value.op, C.Op.Mul):
         # if not isinstance(node.target, C.SymbolRef):
         #     node.value = C.FunctionCall(C.SymbolRef("vsum"), [node.value])
         #     return node
         # else:
         return C.Assign(
             node.target,
             C.FunctionCall(
                 C.SymbolRef("_mm256_fmadd_ps"),
                 [node.value.left, node.value.right, node.target]))
     elif isinstance(node.op, C.Op.Add) and isinstance(
             node.value, C.FunctionCall):
         # TODO: Verfiy it's a vector intrinsic
         return C.Assign(
             node.target,
             C.FunctionCall(C.SymbolRef("_mm256_add_ps"),
                            [node.value, node.target]))
     elif isinstance(node.target, C.BinaryOp) and isinstance(
             node.target.op, C.Op.ArrayRef):
         raise NotImplementedError()
     node.target = self.visit(node.target)
     return node
Example #2
0
 def test_recursive_fold(self):
     tree = C.Assign(
         C.SymbolRef("c"),
         C.Add(C.Add(C.Constant(2), C.Constant(-2)),
               C.SymbolRef("b")))
     tree = ConstantFold().visit(tree)
     self.assertEqual(
         str(tree),
         str(C.Assign(C.SymbolRef("c"), C.SymbolRef("b"))))
Example #3
0
    def transform(self, py_ast, program_cfg):
        arg_cfg, tune_cfg = program_cfg
        tree = PyBasicConversions().visit(py_ast)
        param_dict = {}
        tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]()))
        # Annotate arguments
        for param, type in zip(tree.body[0].params, arg_cfg):
            param.type = type()
            param_dict[param.name] = type._dtype_

        length = np.prod(arg_cfg[0]._shape_)
        transformer = MapTransformer("i", param_dict, "retval")
        body = list(map(transformer.visit, tree.body[0].defn))

        tree.body[0].defn = [C.For(
                C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)),
                C.Lt(C.SymbolRef("i"), C.Constant(length)),
                C.PostInc(C.SymbolRef("i")),
                body=body,
                pragma="ivdep"
            )]

        tree = DeclarationFiller().visit(tree)
        defns = []
        tree = HwachaVectorize(param_dict, defns).visit(tree)
        file_body = [
            StringTemplate("#include <stdlib.h>"),
            StringTemplate("#include <stdint.h>"),
            StringTemplate("#include <assert.h>"),
            StringTemplate("extern \"C\" void __hwacha_body(void);"),
        ]
        file_body.extend(defns)
        file_body.append(tree)
        return [CFile("generated", file_body)]
Example #4
0
 def gen_loop_nest(self, loopvars, cfg):
     body = []
     node = C.For(
         C.Assign(C.SymbolRef(loopvars[0], ct.c_int()), C.Constant(0)),
         C.Lt(C.SymbolRef(loopvars[0]), C.Constant(cfg.shape[0])),
         C.PostInc(C.SymbolRef(loopvars[0])), body)
     curr_node = node
     for loopvar, dim in zip(loopvars[1:], cfg.shape[1:]):
         curr_node = C.For(
             C.Assign(C.SymbolRef(loopvar, ct.c_int()), C.Constant(0)),
             C.Lt(C.SymbolRef(loopvar), C.Constant(dim)),
             C.PostInc(C.SymbolRef(loopvar)), [])
         body.append(curr_node)
         body = curr_node.body
     self.loop_shape_map[loopvars] = cfg.shape
     return node, curr_node
Example #5
0
 def visit(self, node):
     node = super().visit(node)
     if hasattr(node, 'body'):
         # [collector.visit(s) for s in node.body]
         new_body = []
         seen = {}
         stores = []
         collector = VectorLoadCollector()
         for s in node.body:
             collector.visit(s)
             for stmt in collector.loads.keys():
                 if stmt not in seen:
                     reg = self._gen_register()
                     load_node, number, func = collector.loads[stmt]
                     seen[stmt] = (reg, load_node, func)
                     self.sym[reg] = get_simd_type()()
                     new_body.append(
                         C.Assign(
                             C.SymbolRef(reg,
                                         get_simd_type()()),
                             C.FunctionCall(C.SymbolRef(func),
                                            [load_node])))
             if isinstance(
                     s, C.FunctionCall
             ) and "_mm" in s.func.name and "_store" in s.func.name:
                 if s.args[0].codegen() in seen:
                     stores.append((s.args[0], seen[s.args[0].codegen()][0],
                                    s.func.name))
                     s = C.Assign(C.SymbolRef(seen[s.args[0].codegen()][0]),
                                  s.args[1])
             for stmt in seen.keys():
                 reg, load_node, func = seen[stmt]
                 replacer = VectorLoadReplacer(
                     C.FunctionCall(C.SymbolRef(func),
                                    [load_node]).codegen(),
                     C.SymbolRef(reg))
                 s = replacer.visit(s)
             new_body.append(s)
         for target, value, name in stores:
             if "epi32" in name:
                 new_body.append(store_epi32(target, C.SymbolRef(value)))
             elif "ps" in name:
                 new_body.append(store_ps(target, C.SymbolRef(value)))
             else:
                 assert (false)
         node.body = util.flatten(new_body)
     return node
Example #6
0
    def visit_For(self, node):
        node.body = util.flatten([self.visit(s) for s in node.body])
        if node.init.left.name == self.enclosing_loop_var:
            new_body = []
            added_code = False
            prefetch_count = self.prefetch_count
            for stmt in node.body:
                new_body.append(stmt)
                if prefetch_count > 0 and isinstance(stmt, C.BinaryOp) and isinstance(stmt.op, C.Op.Assign) and \
                   isinstance(stmt.right, C.FunctionCall) and "_mm" in stmt.right.func.name \
                   and ("_load_" in stmt.right.func.name or "_set1" in stmt.right.func.name or "_broadcast" in stmt.right.func.name):
                    ast.dump(stmt.right.args[0])
                    if check_name(stmt.right.args[0], self.prefetch_field):
                        array_ref = deepcopy(stmt.right.args[0])
                        new_array_ref = self.rewrite_arg(array_ref)
                        where_to_add = new_body
                        prefetch_count -= 1
                        if node.init.left.name != self.prefetch_dest_loop:
                            where_to_add = HoistPrefetch.escape_body
                        added_code = True
                        where_to_add.append(
                            C.FunctionCall(
                                C.SymbolRef(prefetch_symbol_table[
                                    self.cacheline_hint]),
                                [
                                    C.Add(new_array_ref,
                                          C.SymbolRef("prefetch_offset_var"))
                                ]))
                        where_to_add.append(
                            C.Assign(
                                C.SymbolRef("prefetch_offset_var"),
                                C.Add(C.SymbolRef("prefetch_offset_var"),
                                      C.Constant(self.prefetch_offset))))

            if added_code:
                InitPrefetcher.init_body.append(
                    C.Assign(
                        C.SymbolRef("prefetch_offset_var", ctypes.c_int()),
                        C.Constant(0)))
            node.body = new_body
        return node
Example #7
0
 def visit_AugAssign(self, node):
     node.value = self.visit(node.value)
     if util.contains_symbol(node.target, self.loop_var):
         if not util.contains_symbol(node.target.right, self.loop_var):
             target = self.visit(deepcopy(node.target))
             curr_node = node.target
             idx = 1
             while curr_node.left.right.name != self.loop_var:
                 curr_node = curr_node.left
                 idx += 1
             curr_node.left = curr_node.left.left
             node.target = C.ArrayRef(node.target,
                                      C.SymbolRef(self.loop_var))
             while not isinstance(curr_node, C.SymbolRef):
                 curr_node = curr_node.left
             if curr_node.name in self.transposed_buffers and self.transposed_buffers[
                     curr_node.name] != idx:
                 raise NotImplementedError()
             self.transposed_buffers[curr_node.name] = idx
             curr_node.name += "_transposed"
             if isinstance(node.target.right,
                           C.Constant) and node.target.value == 0.0:
                 return store_ps(node.target.left,
                                 C.BinaryOp(target, node.op, node.value))
             else:
                 return store_ps(C.Ref(node.target),
                                 C.BinaryOp(target, node.op, node.value))
         else:
             if isinstance(node.target.right,
                           C.Constant) and node.target.value == 0.0:
                 return store_ps(
                     node.target.left,
                     C.BinaryOp(self.visit(node.target), node.op,
                                node.value))
             else:
                 return store_ps(
                     C.Ref(node.target),
                     C.BinaryOp(self.visit(node.target), node.op,
                                node.value))
     elif isinstance(node.op, C.Op.Add) and isinstance(
             node.value, C.FunctionCall):
         # TODO: Verfiy it's a vector intrinsic
         return C.Assign(
             node.target,
             C.FunctionCall(C.SymbolRef("_mm256_add_ps"),
                            [node.value, node.target]))
     elif isinstance(node.target, C.BinaryOp) and isinstance(
             node.target.op, C.Op.ArrayRef):
         raise NotImplementedError(node)
     node.target = self.visit(node.target)
     return node
Example #8
0
    def visit_For(self, node):
        node.body = util.flatten([s for s in node.body])
        new_body = []
        for stmt in node.body:
          if isinstance(stmt, C.FunctionCall) and "_mm" in stmt.func.name \
             and "_store" in stmt.func.name and inReplaceMapSource(stmt.args[0], self.replace_map):
                  
                  if isinstance(stmt.args[1], C.SymbolRef):
                    sym_arr_ref = extract_reference(stmt.args)  
                    store_in_du_map(sym_arr_ref)  
                    reg = stmt.args[1]
                    self.seen[reg.name] = None
                    new_body.append(stmt)

                  elif isinstance(stmt.args[1], C.FunctionCall) and "_mm" in stmt.func.name:
                      tmp = self._gen_register()
                      new_body.append(C.Assign(C.SymbolRef(tmp, get_simd_type()()), deepcopy(stmt.args[1])))
                      new_body.append(C.FunctionCall(C.SymbolRef(stmt.func.name),  [stmt.args[0],C.SymbolRef(tmp, None)]))
                      sym_arr_ref = extract_reference(C.FunctionCall(C.SymbolRef(stmt.func.name),  [stmt.args[0],C.SymbolRef(tmp, None)]).args)  
                      store_in_du_map(sym_arr_ref)
                  # if stmt.args[0].type:
                  #    self.seen[reg.name] = stmt.args[0].type     
                  #else:
                      self.seen[tmp] = None

          elif isinstance(stmt, C.BinaryOp) and \
             isinstance(stmt.op, C.Op.Assign) and \
             isinstance(stmt.left, C.SymbolRef) and \
             isinstance(stmt.right, C.FunctionCall) and "_mm" in stmt.right.func.name and "_load" in stmt.right.func.name and inReplaceMapSink(stmt.right.args[0], self.replace_map): 
                  #print(stmt.right.args[0])                         
                  source = get_alias(stmt.right.args, self.replace_map)
                  #print(source)      
                  if (source is not None):
                    sym_arr_ref = construct_arr_reference(source, deepcopy(stmt.right.args))
                    if in_du_map(sym_arr_ref):
                       reg = get_register(sym_arr_ref)
                       #print(reg.name)   
                       if str(reg.name) in self.seen: 
                          #print(reg.name)  
                          sym_map[stmt.left.name] = reg
                       else:
                          new_body.append(stmt) 
                    else:
                       new_body.append(stmt)    
                  else:
                      new_body.append(stmt)
                            
          else:
              new_body.append(stmt)  
        node.body = util.flatten([self.visit(s) for s in new_body])
        return node
Example #9
0
    def visit(self, node):
        node = super().visit(node)
        if hasattr(node, 'body'):
            # [collector.visit(s) for s in node.body]
            newbody = []
            for s in node.body:
                if isinstance(s, C.BinaryOp) and isinstance(s.op, C.Op.Assign):
                    # Anand - needs more work 27th June 2017
                    if isinstance(s.left, C.SymbolRef) and (s.left.type is not None) and s.left.name in self.variables \
                         and s.left.name not in self.defs:
                        y = self._gen_register()

                        new_stmt = C.Assign(
                            C.SymbolRef(y,
                                        get_simd_type(s.left.type)()),
                            broadcast_ss(C.SymbolRef(s.left.name, None),
                                         s.left.type))
                        newbody.append(s)
                        newbody.append(new_stmt)
                        self.defs[s.left.name] = C.SymbolRef(y, None)
                        self.symbol_table[y] = get_simd_type(s.left.type)()
                    else:
                        for i in self.defs:
                            s = replace_symbol(i, self.defs[i], s)

                        if (isinstance(s.left.type,
                                       get_simd_type(ctypes.c_int()))
                                or isinstance(
                                    s.left.type, get_simd_type(
                                        ctypes.c_float()))) and isinstance(
                                            s.right, C.SymbolRef):
                            s.right = broadcast_ss(
                                C.SymbolRef(s.right.name, None), s.left.type)

                        elif isinstance(s.left, C.SymbolRef) and s.left.name in self.symbol_table and\
                             (isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_int())) or isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_float()))) and isinstance(s.right, C.SymbolRef):
                            s.right = broadcast_ss(
                                C.SymbolRef(s.right.name, None),
                                self.symbol_table[s.left.name])

                        newbody.append(s)

                else:

                    for i in self.defs:
                        s = replace_symbol(i, self.defs[i], s)

                    newbody.append(s)
            node.body = util.flatten(newbody)
        return node
Example #10
0
 def visit_FunctionDecl(self, node):
     new_body = []
     count = 0
     for statement in node.defn:
                
         if isinstance(statement, ast.For) or isinstance(statement, C.For):
             pre =  C.SubAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', []))
             post =  C.AddAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', []))
             new_body.append(pre)
             new_body.append(statement)
             new_body.append(post)
             count = count + 1
         else:
             new_body.append(statement)
     
     memset = C.Assign(C.SymbolRef('times'), C.FunctionCall(C.SymbolRef('calloc_doubles'),[C.Constant(count)]))
     new_body.insert(0,  memset)
     new_body.insert(0, C.Assign(C.SymbolRef("*times", ctypes.c_double()), C.Constant(0)))
     for i in range(0,count):
       print_stmt = C.FunctionCall(C.SymbolRef('printf'),[C.String("\ttimes[%d] = %g\\n"), C.Constant(i), C.ArrayRef(C.SymbolRef('times'), C.Constant(i))])
       new_body.append(print_stmt)
     node.defn = new_body     
     return node
Example #11
0
def gen_vector_cmp_instruction(dest, src1, src2, type_map, symbol_map):

    src1_type = get_type(src1, type_map, symbol_map)
    src2_type = get_type(src2, type_map, symbol_map)

    assert (src1_type is not None)
    assert (src2_type is not None)
    if isinstance(src1_type, simd.types.m256) and isinstance(
            src2_type, simd.types.m256):
        return C.Assign(
            dest,
            C.FunctionCall(
                C.SymbolRef("_mm256_cmp_ps_mask"),
                [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)]))
    elif isinstance(src1_type, simd.types.m512) and isinstance(
            src2_type, simd.types.m512):
        return C.Assign(
            dest,
            C.FunctionCall(
                C.SymbolRef("_mm512_cmp_ps_mask"),
                [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)]))
    elif isinstance(src1_type, simd.types.m256i) and isinstance(
            src2_type, simd.types.m256i):
        return C.Assign(
            dest,
            C.FunctionCall(
                C.SymbolRef("_mm256_cmp_epi32_mask"),
                [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)]))
    elif isinstance(src1_type, simd.types.m512i) and isinstance(
            src2_type, simd.types.m512i):
        return C.Assign(
            dest,
            C.FunctionCall(
                C.SymbolRef("_mm512_cmp_epi32_mask"),
                [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)]))
    else:
        assert (False)
    def visit_For(self, node):
        """
        Find the innermost loop to insert a load and store of the target register

        target is either "value" or "grad" depending on direction
        """
        node.body = [self.visit(s) for s in node.body]
        if node.init.left.name == self.target_loop_var:
            for var, seen in self.seen.values():
                node.body.insert(0,
                    C.Assign(
                        C.SymbolRef(var, ctypes.c_float()), 
                        seen
                    ))

            # we only store the value register as "grad" is only read by definition
            if self.target == "value":
                for var, seen in self.seen.values():
                    node.body.append( 
                        C.Assign(
                            seen,
                            C.SymbolRef(var)
                        ))
        return node
Example #13
0
def gen_mask_move_instruction(dest, src1, selector, src2, type_map,
                              symbol_map):

    src1_type = get_type(src1, type_map, symbol_map)
    src2_type = get_type(src2, type_map, symbol_map)

    #assert(src1_type == src2_type)

    assert (src1_type is not None)
    assert (src2_type is not None)
    if isinstance(src1_type, simd.types.m256) and isinstance(
            src2_type, simd.types.m256):
        return C.Assign(
            dest,
            C.FunctionCall(C.SymbolRef("_mm256_mask_mov_ps"),
                           [src1, selector, src2]))
    elif isinstance(src1_type, simd.types.m512) and isinstance(
            src2_type, simd.types.m512):
        return C.Assign(
            dest,
            C.FunctionCall(C.SymbolRef("_mm512_mask_mov_ps"),
                           [src1, selector, src2]))
    elif isinstance(src1_type, simd.types.m256i) and isinstance(
            src2_type, simd.types.m256i):
        return C.Assign(
            dest,
            C.FunctionCall(C.SymbolRef("_mm256_mask_mov_epi32"),
                           [src1, selector, src2]))
    elif isinstance(src1_type, simd.types.m512i) and isinstance(
            src2_type, simd.types.m512i):
        return C.Assign(
            dest,
            C.FunctionCall(C.SymbolRef("_mm512_mask_mov_epi32"),
                           [src1, selector, src2]))
    else:
        assert (False)
Example #14
0
 def block_loop(self, node):
     loopvar = node.init.left.name
     loopvar += loopvar
     self.nest.insert(
         0,
         C.For(
             C.Assign(C.SymbolRef(loopvar, node.init.left.type),
                      node.init.right),
             C.Lt(C.SymbolRef(loopvar), node.test.right),
             C.AddAssign(C.SymbolRef(loopvar),
                         C.Constant(self.block_factor)), [None]))
     node.init.right = C.SymbolRef(loopvar)
     node.test.right = C.FunctionCall(C.SymbolRef("fmin"), [
         C.Add(C.SymbolRef(loopvar), C.Constant(self.block_factor)),
         node.test.right
     ])
Example #15
0
 def visit_FunctionDef(self, node):
     self.decls = {}
     node.defn = [self.visit(s) for s in node.body]
     new_params = []
     for param in node.args.args:
         if sys.version_info > (3, 0):
             _id = param.arg
         else:
             _id = param.id
         if _id == 'self':
             continue
         value = self.symbol_table[_id]
         if isinstance(value, Array):
             _type = np.ctypeslib.ndpointer(value.dtype, value.ndim,
                                            value.shape)()
         else:
             _type = get_ctype(value)
         new_params.append(C.SymbolRef(_id, _type))
     for name, value in self.decls.items():
         if isinstance(value, Array):
             type = np.ctypeslib.ndpointer(value.dtype, value.ndim,
                                           value.shape)()
             value = value.ctypes.data
             new_params.append(C.SymbolRef(name, type))
         else:
             if value is True:
                 value = 1
                 type = ct.c_int()
             elif value is False:
                 value = 0
                 type = ct.c_int()
             else:
                 type = get_ctype(value)
             node.body.insert(
                 0, C.Assign(C.SymbolRef(name, type), C.Constant(value)))
     node.args.args = new_params
     return node
Example #16
0
    def visit_RangeDim(self, node):
        iter = node.child_for.iter
        ensemble = node.ensemble
        ndim = node.mapping.ndim
        dim = iter.args[1].n
        offset = node.mapping.get_offset(dim)
        step = node.mapping.get_step(dim)
        length = len(node.mapping.shape[dim])
        if isinstance(iter, ast.Call) and iter.func.id == "range_dim":
            loop_var = node.child_for.target.id

            body = []
            body += [self.visit(s) for s in node.child_for.body]
            # FIXME: This check does not cover general cases
            #ANAND-special casing for LRN, needs refactoring
            if isinstance(self.ensemble, latte.ensemble.LRNEnsemble
                          ) and length < latte.config.SIMDWIDTH:
                if (
                        self.direction == "forward"
                        and "inputs" in self.ensemble.tiling_info
                        and any(dim == x[0]
                                for x in self.ensemble.tiling_info["inputs"])
                ) or (self.direction in ["backward", "update_internal"]
                      and "grad_inputs" in self.ensemble.tiling_info and any(
                          dim == x[0]
                          for x in self.ensemble.tiling_info["grad_inputs"])):
                    body = [
                        UpdateInputIndices(
                            loop_var + "_outer",
                            C.Div(
                                C.Add(
                                    C.SymbolRef(loop_var),
                                    C.SymbolRef(
                                        "_input_offset_{}_inner".format(dim +
                                                                        1))),
                                C.Constant(latte.config.SIMDWIDTH))).visit(s)
                        for s in body
                    ]
                    body = [
                        UpdateInputIndices(
                            "_input_offset_{}_inner".format(dim + 1),
                            C.Constant(0)).visit(s) for s in body
                    ]
                    body = [
                        UpdateInputIndices(
                            loop_var + "_inner",
                            C.Mod(
                                C.Add(
                                    C.SymbolRef(loop_var),
                                    C.SymbolRef(
                                        "_input_offset_{}_inner".format(dim +
                                                                        1))),
                                C.Constant(latte.config.SIMDWIDTH))).visit(s)
                        for s in body
                    ]
                    return C.For(
                        C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                                 C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                        body,
                        # "unroll_and_jam({})".format(length)
                        # "unroll"
                    )
                else:
                    body = [
                        UpdateInputIndices(
                            loop_var,
                            C.Mul(C.SymbolRef(loop_var),
                                  C.Constant(step))).visit(s) for s in body
                    ]
                    return C.For(
                        C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                                 C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                        body,
                        # "unroll_and_jam({})".format(length)
                        # "unroll"
                    )

            elif (
                    self.direction == "forward"
                    and "inputs" in self.ensemble.tiling_info
                    and any(dim == x[0]
                            for x in self.ensemble.tiling_info["inputs"])
            ) or (self.direction in ["backward", "update_internal"]
                  and "grad_inputs" in self.ensemble.tiling_info
                  and any(dim == x[0]
                          for x in self.ensemble.tiling_info["grad_inputs"])):
                outer_loop = C.For(
                    C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()),
                             C.Constant(0)),
                    C.Lt(C.SymbolRef(loop_var + "_outer"),
                         C.Constant(length // latte.config.SIMDWIDTH)),
                    C.AddAssign(C.SymbolRef(loop_var + "_outer"),
                                C.Constant(1)), [])
                self.tiled_loops.append(outer_loop)
                if self.direction == "forward" and length < latte.config.SIMDWIDTH:
                    inner_loop = C.For(
                        C.Assign(
                            C.SymbolRef(loop_var + "_inner", ctypes.c_int()),
                            C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var + "_inner"),
                             C.Constant(length)),
                        C.AddAssign(C.SymbolRef(loop_var + "_inner"),
                                    C.Constant(1)),
                        body,
                    )
                else:
                    inner_loop = C.For(
                        C.Assign(
                            C.SymbolRef(loop_var + "_inner", ctypes.c_int()),
                            C.Constant(0)),
                        C.Lt(C.SymbolRef(loop_var + "_inner"),
                             C.Constant(latte.config.SIMDWIDTH)),
                        C.AddAssign(C.SymbolRef(loop_var + "_inner"),
                                    C.Constant(1)),
                        body,
                    )

                return inner_loop
            else:
                body = [
                    UpdateInputIndices(
                        loop_var, C.Mul(C.SymbolRef(loop_var),
                                        C.Constant(step))).visit(s)
                    for s in body
                ]
                return C.For(
                    C.Assign(C.SymbolRef(loop_var, ctypes.c_int()),
                             C.Constant(0)),
                    C.Lt(C.SymbolRef(loop_var), C.Constant(length)),
                    C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)),
                    body,
                    # "unroll_and_jam({})".format(length)
                    # "unroll"
                )
        raise NotImplementedError()
Example #17
0
    def visit_For(self, node):
        if hasattr(node, 'parallel') and node.parallel:
            # Supports depth one nesting with collapse
            loopvar1 = node.init.left.name
            looplen1 = node.test.right
            to_return = []
            if all(isinstance(s, C.For) and hasattr(s, 'parallel') and s.parallel for s in node.body):
                for s in node.body:
                    body = s.body
                    kernel_args = set()
                    loopvar2 = s.init.left.name
                    looplen2 = s.test.right
                    kernel_name = self._gen_unique_kernel_name()
                    params = self.collect_args_and_insert_casts(kernel_args, body)
                    body.insert(0, C.Assign(
                        C.SymbolRef(loopvar1, ctypes.c_int()), 
                        C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(0)])
                    ))
                    body.insert(0, C.Assign(
                        C.SymbolRef(loopvar2, ctypes.c_int()), 
                        C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(1)])
                    ))
                    kernel_src = C.FunctionDecl(None, C.SymbolRef(kernel_name), params, body)
                    kernel_src.set_kernel()
                    self.build_kernel(kernel_src, kernel_name, kernel_args)
                    to_return.append(StringTemplate(
                        """
                        size_t global_size_{kernel_name}[2] = {{{looplen1}, {looplen2}}};
                        clEnqueueNDRangeKernel(queue, {kernel_name}, 2, NULL, global_size_{kernel_name}, NULL, 0, NULL, NULL);
                        clFinish(queue);
                        """.format(
                            kernel_name=kernel_name, 
                            looplen1=looplen1,
                            looplen2=looplen2)
                    ))
            else:
                kernel_args = set()
                body = node.body
                kernel_name = self._gen_unique_kernel_name()
                params = self.collect_args_and_insert_casts(kernel_args, body)
                body.insert(0, C.Assign(
                    C.SymbolRef(loopvar1, ctypes.c_int()), 
                    C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(0)])
                ))
                kernel_src = C.FunctionDecl(None, C.SymbolRef(kernel_name), params, body)
                kernel_src.set_kernel()
                self.build_kernel(kernel_src, kernel_name, kernel_args)
                to_return.append(StringTemplate(
                    """
                    size_t global_size_{kernel_name}[1] = {{{looplen1}}};
                    clEnqueueNDRangeKernel(queue, {kernel_name}, 1, NULL, global_size_{kernel_name}, NULL, 0, NULL, NULL);
                    clFinish(queue);
                    """.format(
                        kernel_name=kernel_name, 
                        looplen1=looplen1)
                ))
            if hasattr(node, 'reduce_vars') and len(node.reduce_vars) > 0:
                for var in node.reduce_vars:
                    size = np.prod(self.buffers[var].shape[1:])
                    to_return.append(self._gen_reduce_for_loop(node, var, size))
            return to_return

        else:
            raise NotImplementedError(node)
        return node
Example #18
0
 def visit_Return(self, node):
     node.value = self.visit(node.value)
     return C.Assign(C.ArrayRef(C.SymbolRef(self.retval_name),
                                C.SymbolRef(self.loopvar)),
                     node.value)
 def visit_Return(self, node):
     value = self.visit(node.value)
     return C.Assign(StringTemplate(self.target.get_element()), value)
Example #20
0
    def visit_For(self, node):

        for j in range(1, self.factor):
            UnrollStatementsNoJam.new_body[j] = []

        # UnrollStatementsNoJam.new_body={}
        #for i in node.body:
        #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body)
        #node.body = [self.visit(s) for s in node.body]

        newbody = []

        for s in node.body:
            temp = deepcopy(UnrollStatementsNoJam.new_body)

            t = self.visit(s)
            stmt2 = deepcopy(t)
            stmt = deepcopy(t)
            if self.unroll_type == 0:
                s = util.replace_symbol(
                    self.target_var,
                    C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt)
            else:
                s = util.replace_symbol(
                    self.target_var,
                    C.Add(
                        C.Mul(C.Constant(self.factor),
                              C.SymbolRef(self.target_var)), C.Constant(0)),
                    stmt)

            newbody.append(t)

            if not isinstance(t, C.For):
                for i in range(1, self.factor):
                    stmt = deepcopy(stmt2)

                    if self.unroll_type == 0:
                        if i in UnrollStatementsNoJam.new_body:
                            UnrollStatementsNoJam.new_body[i].append(
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(C.SymbolRef(self.target_var),
                                          C.Constant(i)), stmt))
                        else:
                            UnrollStatementsNoJam.new_body[i] = [
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(C.SymbolRef(self.target_var),
                                          C.Constant(i)), stmt)
                            ]
                    elif self.unroll_type == 1:
                        if i in UnrollStatementsNoJam.new_body:
                            UnrollStatementsNoJam.new_body[i].append(
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(
                                        C.Mul(C.Constant(self.factor),
                                              C.SymbolRef(self.target_var)),
                                        C.Constant(i)), stmt))
                        else:
                            UnrollStatementsNoJam.new_body[i] = [
                                util.replace_symbol(
                                    self.target_var,
                                    C.Add(
                                        C.Mul(C.Constant(self.factor),
                                              C.SymbolRef(self.target_var)),
                                        C.Constant(i)), stmt)
                            ]
                    else:
                        assert (false)

            else:
                var = t.init.left.name

                #if var != self.target_var:
                for j in range(1, self.factor):
                    temp[j].append(
                        C.For(
                            C.Assign(C.SymbolRef(var, ctypes.c_int()),
                                     C.Constant(0)),
                            C.Lt(C.SymbolRef(var),
                                 C.Constant(t.test.right.value)),
                            C.AddAssign(C.SymbolRef(var),
                                        C.Constant(t.incr.value.value)),
                            UnrollStatementsNoJam.new_body[j]))

                UnrollStatementsNoJam.new_body = deepcopy(temp)

        node.body = newbody
        return node
Example #21
0
def gen_for(loopvar, start, end, body, pragma=""):
    return C.For(
        C.Assign(C.SymbolRef(loopvar, ctypes.c_int()), C.Constant(start)),
        C.Lt(C.SymbolRef(loopvar), C.Constant(end)),
        C.PostInc(C.SymbolRef(loopvar)), body, pragma)
Example #22
0
    def visit_Call(self, node):
        if isinstance(node.func, ast.Name) and node.func.id == 'len':
            target = self.eval_in_table(node.args[0])
            return C.Constant(len(target))

        if self.table_contains(node.func):
            fn = self.eval_in_table(node.func)
            params = []
            args = []
            for arg in node.args:
                if isinstance(arg, ast.Subscript):
                    value = self.eval_in_table(arg.value)
                    if isinstance(arg.slice.value, ast.Tuple):
                        index = self.eval_with_loop(arg.slice.value.elts)
                    else:
                        index = self.eval_with_loop([arg.slice.value])
                    params.append(value[index])
                    arg = self.visit(arg)
                    if isinstance(value[index], Array):
                        arg = C.Ref(arg)
                    args.append(arg)
                elif isinstance(arg, ast.Attribute):
                    value = self.eval_in_table(arg)
                    params.append(value)
                else:
                    arg = self.visit(arg)
                    if isinstance(arg, C.SymbolRef):
                        params.append(self.decls[arg.name])
                        args.append(arg)
                    elif isinstance(arg, ast.Tuple):
                        elts = ()
                        for elt in arg.elts:
                            if isinstance(elt, C.SymbolRef):
                                elts += (self.eval_in_table(elt), )
                            else:
                                elts += (elt, )
                        params.append(elts)
            if hasattr(fn, 'specialized_dispatch'):
                if fn.num_args:
                    trimmed = params[:fn.num_args]
                else:
                    trimmed = params
                fn = fn.fn(*params)
                params = trimmed
            cfg = fn._specializer.get_program_config(params, {})
            dir_name = fn._specializer.config_to_dirname(cfg)
            result = fn._specializer.get_transform_result(cfg,
                                                          dir_name,
                                                          cache=False)
            block = C.Block()
            cfile = result[0]
            func = cfile.find(C.FunctionDecl, name=cfile.name)
            cfile.body = [s for s in cfile.body if s is not func]
            self.files.extend(cfile.body)
            block.body = func.defn
            for arg, param in zip(args, func.params):
                block.body.insert(0, C.Assign(param, arg))
            return block
            # node.args = args
            # node.func = ast.Name(result[0].body[-1].name, ast.Load())
        else:
            node.args = [self.visit(arg) for arg in node.args]
        return node