Esempio n. 1
0
 def get_native_compile_info(self, input_types, devtype):
     assert devtype == "gpu"   
     d = dict(cdtype=core.np2c[input_types[0].dtype])     
     cuda_code=r"""
         #include "cgt_cuda.h"
         #include "lrn.cuh"
         void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 
             %(cdtype)s* topdata, %(cdtype)s* scalingdata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata) {
             int nblocks, nthreads;
             int size = num_img * width * height;
             cgt_get_bt(size, nblocks, nthreads);
             LRNComputeDiff<%(cdtype)s><<<nblocks, nthreads, 0>>>(size, (%(cdtype)s*)Xdata, (%(cdtype)s*)topdata, 
                 (%(cdtype)s*)scalingdata, (%(cdtype)s*)topdiffdata,  num_img, channels, height, width, localsize, 
                 -beta, 2. * alpha * beta / localsize, (%(cdtype)s*)bottomdiffdata);
             CUDA_CHECK_ERROR("CrossChannelLRNBackward");
         }
     """%d
     code = """        
         void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, 
             %(cdtype)s* topdata, %(cdtype)s* scaledata, %(cdtype)s* topdiffdata, %(cdtype)s* bottomdiffdata);
         CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtArray* bottom_diff) {
         cgtArray *X=reads[0], *top=reads[1], *scaling=reads[2], *top_diff=reads[3];
         int num_img = X->shape()[0],
             channels = X->shape()[1],
             height = X->shape()[2],
             width = X->shape()[3];
         launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), 
             (%(cdtype)s*)top->data(), (%(cdtype)s*)scaling->data(), (%(cdtype)s*)top_diff->data(), (%(cdtype)s*)bottom_diff->data());            
         }"""%d
     return core.NativeCompileInfo(code, closure_triples = make_closure(self.info),
         link_flags="-lcudart", gpu_deref_mask=(True,True,True,True), 
         extra_srcs=[core.SrcFile("cuda",cuda_code)])
Esempio n. 2
0
    def get_native_compile_info(self, input_types, devtype):
        assert devtype == "gpu"
        d = dict(cdtype=core.np2c[input_types[0].dtype])
        cuda_code = r"""
            #include "cgt_cuda.h"
            #include "lrn.cuh"
            void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata) {
                int size = num_img * height * width;
                int nblocks, nthreads;
                cgt_get_bt(size, nblocks, nthreads);
                LRNFillScale<%(cdtype)s><<<nblocks, nthreads, 0>>>(
                    size, Xdata, num_img, channels, height, width, localsize, alpha / localsize, scaledata);
                CUDA_CHECK_ERROR("LRNFillScale");

                size = num_img * channels * width * height;
                cgt_get_bt(size, nblocks, nthreads);
                LRNComputeOutput<%(cdtype)s><<<nblocks, nthreads, 0>>>(size, Xdata, scaledata, -beta, topdata);
                CUDA_CHECK_ERROR("LRNComputeOutput");
            }"""%d
        code = r"""
            extern void launchker_$function(int num_img, int channels, int height, int width, int localsize, double alpha, double beta, %(cdtype)s* Xdata, %(cdtype)s* topdata, %(cdtype)s* scaledata);
            CGT_EXPORT_C void $function($closure* cldata, cgtArray** reads, cgtTuple* write) {
                cgtArray* X = reads[0];
                int num_img = X->shape()[0],
                    channels = X->shape()[1],
                    height = X->shape()[2],
                    width = X->shape()[3];
                cgtArray* top = (cgtArray*)write->getitem(0);                
                cgtArray* scale = (cgtArray*)write->getitem(1);
                launchker_$function(num_img, channels, height, width, cldata->localsize, cldata->alpha, cldata->beta, (%(cdtype)s*)X->data(), (%(cdtype)s*)top->data(), (%(cdtype)s*)scale->data());

            }"""%d
        return core.NativeCompileInfo(code, closure_triples = make_closure(self.info),
            link_flags="-lcudart", gpu_deref_mask=(True,), 
            extra_srcs=[core.SrcFile("cuda",cuda_code)])
Esempio n. 3
0
    def get_native_compile_info(self, input_types, devtype):
        code = r"""
CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtTuple* write) {
    max_pool<%(cdtype)s>(cl, reads[0], static_cast<cgtArray*>(write->getitem(0)), static_cast<cgtArray*>(write->getitem(1)));
}""" % dict(cdtype=core.np2c[input_types[0].dtype])
        return core.NativeCompileInfo(code,
                                      closure_triples=info2closure(self.info),
                                      includes=["pooling.h"])
Esempio n. 4
0
    def get_native_compile_info(self, input_types, devtype):
        code = r"""
CGT_EXPORT_C void $function(conv_closure* cl, cgtArray** reads, cgtArray* write) {
    max_pool_pullback<%(cdtype)s>(reads[0], reads[1], reads[2], reads[3], write);
}""" % dict(cdtype=core.np2c[input_types[0].dtype])
        return core.NativeCompileInfo(code,
                                      closure_triples=info2closure(self.info),
                                      includes=["pooling.h"])
Esempio n. 5
0
 def get_native_compile_info(self, input_types, devtype):
     assert devtype == "gpu"
     code = """
         CGT_EXPORT_C void $setup(conv_closure* closure) {setup_cudnn(closure);}
         CGT_EXPORT_C void $teardown(conv_closure* closure) {teardown_cudnn(closure);}
         CGT_EXPORT_C void $function(conv_closure* closure, cgtArray** reads, cgtArray* write) {
             if (!closure->handle) setup_cudnn(closure);
             performConvBackwardData(closure, reads[1], reads[2], write);
         }"""
     return core.NativeCompileInfo(code,
                                   closure_triples=make_closure(
                                       self.ph, self.pw, self.sv, self.sh),
                                   includes=["cudnn_support.h"],
                                   link_flags="-lcudnn -lcudart")
Esempio n. 6
0
 def get_native_compile_info(self, _input_types, devtype):
     assert devtype == "gpu"
     code = """
         CGT_EXPORT_C void $setup(pooling_closure* closure) {setup_cudnn(closure);}
         CGT_EXPORT_C void $teardown(pooling_closure* closure) {teardown_cudnn(closure);}
         CGT_EXPORT_C void $function(pooling_closure* closure, cgtArray** reads, cgtArray* write) {
             if (!closure->handle) setup_cudnn(closure);
             performPoolingForward(closure, reads[0], write);
         }"""
     return core.NativeCompileInfo(code,
                                   closure_triples=poolinfo2closure(
                                       self.info),
                                   includes=["cudnn_support.h"],
                                   link_flags="-lcudnn -lcudart")
Esempio n. 7
0
    def get_native_compile_info(self, input_types, devtype):
        assert devtype == "cpu"
        code = """
CGT_EXPORT_C void $function(void* cldata, cgtArray** reads, cgtTuple* write) {
    float* x = static_cast<float*>(reads[0]->data());
    float* y = static_cast<float*>(static_cast<cgtArray*>(write->getitem(0))->data());
    float* z = static_cast<float*>(static_cast<cgtArray*>(write->getitem(1))->data());
    for (int i=0; i < reads[0]->size(); ++i) {
        y[i] = sinf(x[i]);
        z[i] = cosf(x[i]);    
    }
}"""
        return core.NativeCompileInfo(code,
                                      includes=["math.h"],
                                      link_flags="-lm")
Esempio n. 8
0
 def get_native_compile_info(self, input_types, devtype):
     d = dict(cdtype=core.np2c[input_types[0].dtype])
     d.update(self.info._asdict())
     code = r"""
         CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) {
             cgtArray* col = reads[0];
             size_t batchsize = reads[1]->at<size_t>(0),
                    channels  = reads[2]->at<size_t>(0),
                    height    = reads[3]->at<size_t>(0),
                    width     = reads[4]->at<size_t>(0);
             for (int i=0; i < batchsize; ++i) {
                 col2im_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s>
                 ((%(cdtype)s*)col->data() + col->stride(0)*i, channels, height, width,(%(cdtype)s*)write->data() + write->stride(0)*i);
             }
         }""" % d
     return core.NativeCompileInfo(code,
                                   includes=["im2col.h"],
                                   closure_triples=info2closure(self.info))
Esempio n. 9
0
 def get_native_compile_info(self, input_types, devtype):
     assert devtype == "cpu"
     d = dict(cdtype=core.np2c[input_types[0].dtype])
     d.update(self.info._asdict())
     code = r"""
         CGT_EXPORT_C void $function($closure* cl, cgtArray** reads, cgtArray* write) {
             cgtArray* im = reads[0];
             const size_t* imshape = im->shape();
             int batchsize = imshape[0],
                 channels = imshape[1],
                 height = imshape[2],
                 width = imshape[3];
             for (int i=0; i < batchsize; ++i) {
                 im2col_cpu<%(cdtype)s, %(kernel_h)s,%(kernel_w)s,%(pad_h)s,%(pad_w)s,%(stride_h)s,%(stride_w)s>
                 ((%(cdtype)s*)im->data() + im->stride(0)*i, channels, height, width, (%(cdtype)s*)write->data() + write->stride(0)*i);
             }
         }""" % d
     return core.NativeCompileInfo(code,
                                   includes=["im2col.h"],
                                   closure_triples=info2closure(self.info))