예제 #1
0
파일: main.py 프로젝트: ucb-sejits/akx
    def transform(self, py_ast, program_config):
        """
        Convert the Python AST to a C AST according to the directions
        given in program_config.
        """
        arg_config, tuner_config = program_config
        len_A = arg_config['A_len']
        A_dtype = arg_config['A_dtype']
        A_ndim = arg_config['A_ndim']
        A_shape = arg_config['A_shape']
        A_powers = arg_config['A_powers']

        len_B = arg_config['B_len']
        B_dtype = arg_config['B_dtype']
        B_ndim = arg_config['B_ndim']
        B_shape = arg_config['B_shape']

        inner_type_A = get_ctree_type(A_dtype)
        array_type_A = NdPointer(A_dtype, A_ndim, A_shape)

        inner_type_B = get_ctree_type(B_dtype)
        array_type_B = NdPointer(B_dtype, B_ndim, B_shape)

        apply_one_typesig = FuncType(inner_type_A, [inner_type_A, inner_type_B])

        tree = CFile("generated", [
            py_ast.body[0],
            FunctionDecl(
                Void(), "apply_all",
                params=[SymbolRef("A", array_type_A), SymbolRef("B", array_type_B)],
                defn=[
                    For(Assign(SymbolRef("k", Int()), Constant(1)),
                        Lt(SymbolRef("k"), Constant(A_powers)),
                        PostInc(SymbolRef("k")),
                        [
                            For(Assign(SymbolRef("i", Int()), Constant(0)),
                                Lt(SymbolRef("i"), Constant(len_A)),
                                PostInc(SymbolRef("i")),
                                [
                                    Assign(ArrayRef(SymbolRef("A"), Add(Mul(Constant(len_A), SymbolRef("k")), SymbolRef("i"))),
                                           FunctionCall(SymbolRef("apply"), [ArrayRef(SymbolRef("A"),
                                                        Add(Mul(Constant(len_A), Sub(SymbolRef("k"), Constant(1))),
                                                        SymbolRef("i"))),ArrayRef(SymbolRef("B"), SymbolRef("i"))]))
                                ]
                            )
                        ]
                    ),
                ]
            ),
        ])

        tree = PyBasicConversions().visit(tree)

        apply_one = tree.find(FunctionDecl, name="apply")
        apply_one.set_static().set_inline()
        apply_one.set_typesig(apply_one_typesig)

        entry_point_typesig = tree.find(FunctionDecl, name="apply_all").get_type().as_ctype()

        return Project([tree]), entry_point_typesig
예제 #2
0
    def transform(self, py_ast, program_config):
        """
        Convert the Python AST to a C AST according to the directions
        given in program_config.
        """
        arg_config, tuner_config = program_config
        len_A   = arg_config['A_len']
        A_dtype = arg_config['A_dtype']
        A_ndim  = arg_config['A_ndim']
        A_shape = arg_config['A_shape']

        inner_type = get_ctree_type(A_dtype)
        array_type = NdPointer(A_dtype, A_ndim, A_shape)
        apply_one_typesig = FuncType(inner_type, [inner_type])

        template_entries = {
            'array_decl': SymbolRef("A", array_type),
            'array_ref' : SymbolRef("A"),
            'num_items' : Constant(len_A),
        }

        tree = CFile("generated", [
            py_ast.body[0],
            StringTemplate("""\
            void apply_all($array_decl) {
                for (int i = 0; i < $num_items; i++) {
                    $array_ref[i] = apply( $array_ref[i] );
                }
            }
            """, template_entries)
        ])

        tree = PyBasicConversions().visit(tree)

        apply_one = tree.find(FunctionDecl, name="apply")
        apply_one.set_static().set_inline()
        apply_one.set_typesig(apply_one_typesig)

        with open("graph.dot", 'w') as f:
            f.write( to_dot(tree) )

        entry_point_typesig = FuncType(Void(), [array_type]).as_ctype()
        return Project([tree]), entry_point_typesig
예제 #3
0
파일: dgemm.py 프로젝트: i-Zaak/ctree
    def transform(self, py_ast, program_config):
        """
        Convert the Python AST to a C AST according to the directions
        given in program_config.
        """
        self._current_config = program_config

        arg_config, tuner_config = program_config
        n, dtype  = arg_config['n'], arg_config['dtype']
        rx, ry = tuner_config['rx']*4, tuner_config['ry']*4
        cx, cy = tuner_config['cx']*4, tuner_config['cy']*4
        unroll = tuner_config['ry']*4

        elem_type = get_ctree_type(dtype)
        array_type = NdPointer(dtype, 2, (n,n))

        dgemm_typesig = FuncType(Void(), [array_type, array_type, array_type, Ptr(Double())])

        A = SymbolRef("A", array_type)
        B = SymbolRef("B", array_type)
        C = SymbolRef("C", array_type)

        N = Constant(n)
        RX, RY = Constant(rx), Constant(ry)
        CX, CY = Constant(cx), Constant(cy)
        UNROLL = Constant(unroll)

        template_args = {
            "A_decl": A.copy(declare=True),
            "B_decl": B.copy(declare=True),
            "C_decl": C.copy(declare=True),
            "RX": RX,
            "RY": RY,
            "CX": CX,
            "CY": CY,
            "UNROLL": UNROLL,
            "lda": N,
        }

        preamble =  StringTemplate("""
        #include <immintrin.h>
        #define min(x,y) (((x)<(y))?(x):(y))
        """, copy.deepcopy(template_args))

        reg_template_args = {
            'load_c_block': self._gen_load_c_block(rx, ry, n),
            'store_c_block': self._gen_store_c_block(rx, ry, n),
            'k_rank1_updates': self._gen_k_rank1_updates(rx, ry, cx, cy, unroll, n),
        }
        reg_template_args.update(copy.deepcopy(template_args))

        register_dgemm = StringTemplate("""
        void register_dgemm( $A_decl, $B_decl, $C_decl, int K )  {
            __m256d c[$RY/4][$RX];

            $load_c_block

            while ( K >= $UNROLL ) {
              $k_rank1_updates

              A += $UNROLL*$CY;
              B += $UNROLL;
              K -= $UNROLL;
            }

            $store_c_block
        }
        """, reg_template_args)

        fast_dgemm = StringTemplate("""
        void fast_dgemm( int M, int N, int K, $A_decl, $B_decl, $C_decl ) {
            static double a[$CX*$CY] __attribute__ ((aligned (16)));

            //  make a local aligned copy of A's block
            for( int j = 0; j < K; j++ )
                for( int i = 0; i < M; i++ )
                    a[i+j*$CY] = A[i+j*$lda];

            //  multiply using the copy
            for( int j = 0; j < N; j += $RX )
                for( int i = 0; i < M; i += $RY )
                    register_dgemm( a + i, B + j*$lda, C + i + j*$lda, K );
        }""", template_args)

        fringe_dgemm = StringTemplate("""
        void fringe_dgemm( int M, int N, int K, $A_decl, $B_decl, $C_decl )
        {
            for( int j = 0; j < N; j++ )
               for( int i = 0; i < M; i++ )
                    for( int k = 0; k < K; k++ )
                         C[i+j*$lda] += A[i+k*$lda] * B[k+j*$lda];
        }
        """, copy.deepcopy(template_args))

        wall_time = StringTemplate("""
        #include <sys/time.h>

        double wall_time () {
          struct timeval t;
          gettimeofday (&t, NULL);
          return 1.*t.tv_sec + 1.e-6*t.tv_usec;
        }

        """, {})

        dgemm =  StringTemplate("""
        int align( int x, int y ) { return x <= y ? x : (x/y)*y; }

        void dgemm($C_decl, $A_decl, $B_decl, double *duration) {
            double start_time = wall_time();

            for( int i = 0; i < $lda; ) {
                int I = align( min( $lda-i, $CY ), $RY );
                for( int j = 0; j < $lda; ) {
                    int J = align( $lda-j, $RX );
                    for( int k = 0; k < $lda; ) {
                        int K = align( min( $lda-k, $CX ), $UNROLL );
                        if( (I%$RY) == 0 && (J%$RX) == 0 && (K%$UNROLL) == 0 )
                            fast_dgemm ( I, J, K, A + i + k*$lda, B + k + j*$lda, C + i + j*$lda );
                        else
                            fringe_dgemm( I, J, K, A + i + k*$lda, B + k + j*$lda, C + i + j*$lda );
                        k += K;
                    }
                    j += J;
                }
                i += I;
            }

            // report time back for tuner
            *duration = wall_time() - start_time;
        }
        """, copy.deepcopy(template_args))

        tree = CFile("generated", [
            preamble,
            wall_time,
            register_dgemm,
            fast_dgemm,
            fringe_dgemm,
            dgemm,
        ])

        return Project([tree]), dgemm_typesig.as_ctype()
예제 #4
0
 def args_to_subconfig(self, args):
     return {'arg_typesig': tuple(get_ctree_type(arg) for arg in args)}
예제 #5
0
    def transform(self, py_ast, program_config):
        """
        Convert the Python AST to a C AST according to the directions
        given in program_config.
        """
        self._current_config = program_config

        arg_config, tuner_config = program_config
        n, dtype = arg_config['n'], arg_config['dtype']
        rx, ry = tuner_config['rx'] * 4, tuner_config['ry'] * 4
        cx, cy = tuner_config['cx'] * 4, tuner_config['cy'] * 4
        unroll = tuner_config['ry'] * 4

        elem_type = get_ctree_type(dtype)
        array_type = NdPointer(dtype, 2, (n, n))

        dgemm_typesig = FuncType(
            Void(), [array_type, array_type, array_type,
                     Ptr(Double())])

        A = SymbolRef("A", array_type)
        B = SymbolRef("B", array_type)
        C = SymbolRef("C", array_type)

        N = Constant(n)
        RX, RY = Constant(rx), Constant(ry)
        CX, CY = Constant(cx), Constant(cy)
        UNROLL = Constant(unroll)

        template_args = {
            "A_decl": A.copy(declare=True),
            "B_decl": B.copy(declare=True),
            "C_decl": C.copy(declare=True),
            "RX": RX,
            "RY": RY,
            "CX": CX,
            "CY": CY,
            "UNROLL": UNROLL,
            "lda": N,
        }

        preamble = StringTemplate(
            """
        #include <immintrin.h>
        #define min(x,y) (((x)<(y))?(x):(y))
        """, copy.deepcopy(template_args))

        reg_template_args = {
            'load_c_block':
            self._gen_load_c_block(rx, ry, n),
            'store_c_block':
            self._gen_store_c_block(rx, ry, n),
            'k_rank1_updates':
            self._gen_k_rank1_updates(rx, ry, cx, cy, unroll, n),
        }
        reg_template_args.update(copy.deepcopy(template_args))

        register_dgemm = StringTemplate(
            """
        void register_dgemm( $A_decl, $B_decl, $C_decl, int K )  {
            __m256d c[$RY/4][$RX];

            $load_c_block

            while ( K >= $UNROLL ) {
              $k_rank1_updates

              A += $UNROLL*$CY;
              B += $UNROLL;
              K -= $UNROLL;
            }

            $store_c_block
        }
        """, reg_template_args)

        fast_dgemm = StringTemplate(
            """
        void fast_dgemm( int M, int N, int K, $A_decl, $B_decl, $C_decl ) {
            static double a[$CX*$CY] __attribute__ ((aligned (16)));

            //  make a local aligned copy of A's block
            for( int j = 0; j < K; j++ )
                for( int i = 0; i < M; i++ )
                    a[i+j*$CY] = A[i+j*$lda];

            //  multiply using the copy
            for( int j = 0; j < N; j += $RX )
                for( int i = 0; i < M; i += $RY )
                    register_dgemm( a + i, B + j*$lda, C + i + j*$lda, K );
        }""", template_args)

        fringe_dgemm = StringTemplate(
            """
        void fringe_dgemm( int M, int N, int K, $A_decl, $B_decl, $C_decl )
        {
            for( int j = 0; j < N; j++ )
               for( int i = 0; i < M; i++ )
                    for( int k = 0; k < K; k++ )
                         C[i+j*$lda] += A[i+k*$lda] * B[k+j*$lda];
        }
        """, copy.deepcopy(template_args))

        wall_time = StringTemplate(
            """
        #include <sys/time.h>

        double wall_time () {
          struct timeval t;
          gettimeofday (&t, NULL);
          return 1.*t.tv_sec + 1.e-6*t.tv_usec;
        }

        """, {})

        dgemm = StringTemplate(
            """
        int align( int x, int y ) { return x <= y ? x : (x/y)*y; }

        void dgemm($C_decl, $A_decl, $B_decl, double *duration) {
            double start_time = wall_time();

            for( int i = 0; i < $lda; ) {
                int I = align( min( $lda-i, $CY ), $RY );
                for( int j = 0; j < $lda; ) {
                    int J = align( $lda-j, $RX );
                    for( int k = 0; k < $lda; ) {
                        int K = align( min( $lda-k, $CX ), $UNROLL );
                        if( (I%$RY) == 0 && (J%$RX) == 0 && (K%$UNROLL) == 0 )
                            fast_dgemm ( I, J, K, A + i + k*$lda, B + k + j*$lda, C + i + j*$lda );
                        else
                            fringe_dgemm( I, J, K, A + i + k*$lda, B + k + j*$lda, C + i + j*$lda );
                        k += K;
                    }
                    j += J;
                }
                i += I;
            }

            // report time back for tuner
            *duration = wall_time() - start_time;
        }
        """, copy.deepcopy(template_args))

        tree = CFile("generated", [
            preamble,
            wall_time,
            register_dgemm,
            fast_dgemm,
            fringe_dgemm,
            dgemm,
        ])

        return Project([tree]), dgemm_typesig.as_ctype()
예제 #6
0
파일: types.py 프로젝트: lowks/ctree
 def get_base_type(self):
     return get_ctree_type(self.ptr._dtype_)
예제 #7
0
파일: types.py 프로젝트: lowks/ctree
 def visit_Constant(self, node):
     return get_ctree_type(node.value)
예제 #8
0
 def args_to_subconfig(self, args):
     return {'arg_type': get_ctree_type(args[0])}
예제 #9
0
파일: types.py 프로젝트: leonardt/ctree
 def get_base_type(self):
     return get_ctree_type(self.ptr._dtype_)
예제 #10
0
파일: types.py 프로젝트: leonardt/ctree
 def visit_Constant(self, node):
     return get_ctree_type(node.value)