def _ctc_check_compile(ctc_lib_path): preambule = """ #include <string.h> #include "ctc.h" """ body = """ ctcOptions options; memset(&options, 0, sizeof(ctcOptions)); options.loc = CTC_CPU; options.num_threads = 1; """ params = ['-I%s' % (os.path.dirname(__file__))] if ctc_lib_path is not None: params.extend(["-I%s" % (os.path.join(config.ctc.root, "include"))]) params.extend(["-L%s" % (ctc_lib_path)]) params.extend(["-l", "warpctc"]) compiler_res = GCC_compiler.try_flags( params, preambule=preambule, body=body, try_run=False, output=True) avail, out, err = compiler_res if isinstance(compiler_res, tuple) else (compiler_res, None, None) if not avail: return False, ("cannot compile with warp-ctc. " "We got this error:\n" + str(err)) return True, None
def _dnn_check_compile(): preambule = """ #include <stdio.h> #include <cudnn.h> #include <cudnn_helper.h> """ # No need for the context in here since we won't execute that code body = """ cudnnHandle_t _handle = NULL; cudnnStatus_t err; if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { fprintf(stderr, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); return 1; } """ params = ["-l", "cudnn", "-I" + os.path.dirname(__file__)] if config.dnn.include_path: params.append("-I" + config.dnn.include_path) if config.dnn.library_path: params.append("-L" + config.dnn.library_path) # Do not run here the test program. It would run on the # default gpu, not the one selected by the user. If mixed # GPU are installed or if the GPUs are configured in # exclusive mode, this cause bad detection. avail, out, err = GCC_compiler.try_flags( params, preambule=preambule, body=body, try_run=False, output=True) if not avail: return False, ("Theano cannot compile with cuDNN. " "We got this error:\n" + str(err)) return True, None
def check_force_gemv_init(): if check_force_gemv_init._force_init_beta is None: from theano.gof.cmodule import GCC_compiler """ Test issue 1569. Namely when evaluating beta*y + alpha*dot(A, x) where we set y * beta = zeros of the correct dimensions we do not actually set y = zeros and instead let the BLAS perform beta*y with uninitialized memory for speed. Occasionally the memory contains values that are equivalent to NaN in which case the product beta*y contains NaN's for correctly implemented BLAS libraries. In this situation, since we are introducing the NaN's, we need to test whether the BLAS performs correctly. If it *does*, i.e. it actually performs the multiplication beta*y which will result in NaN's in the result, then we need intialize the memory to zeros. """ test_code = """ #include <math.h> extern "C" void dgemv_(char*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int *); int main() { double A[2][2] = {{1., 1.}, {1., 1.}}; double x[2] = {1., 1.}; double y[2] = {NAN, NAN}; const int s = 2; const int inc = 1; const double alpha = 1.0; const double beta = 0.0; dgemv_("T", &s, &s, &alpha, A, &s, x, &inc, &beta, &y, &inc); return (isnan(y[0]) || isnan(y[1]) ? 1 : 0; } """ res = GCC_compiler.try_compile_tmp( test_code, tmp_prefix="check_beta_", flags=ldflags(libs=True, flags=True, libs_dir=True), try_run=True, ) if res: if res[0]: check_force_gemv_init._force_init_beta = res[1] else: check_force_gemv_init._force_init_beta = False else: check_force_gemv_init._force_init_beta = False return check_force_gemv_init._force_init_beta
def check_force_gemv_init(): if check_force_gemv_init._force_init_beta is None: from theano.gof.cmodule import GCC_compiler """ Test issue 1569. Namely when evaluating beta*y + alpha*dot(A, x) where we set y * beta = zeros of the correct dimensions we do not actually set y = zeros and instead let the BLAS perform beta*y with uninitialized memory for speed. Occasionally the memory contains values that are equivalent to NaN in which case the product beta*y contains NaN's for correctly implemented BLAS libraries. In this situation, since we are introducing the NaN's, we need to test whether the BLAS performs correctly. If it *does*, i.e. it actually performs the multiplication beta*y which will result in NaN's in the result, then we need intialize the memory to zeros. """ test_code = """ #include <math.h> extern "C" void dgemv_(char*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int *); int main() { double A[2][2] = {{1., 1.}, {1., 1.}}; double x[2] = {1., 1.}; double y[2] = {NAN, NAN}; const int s = 2; const int inc = 1; const double alpha = 1.0; const double beta = 0.0; dgemv_("T", &s, &s, &alpha, A, &s, x, &inc, &beta, &y, &inc); return (isnan(y[0]) || isnan(y[1]) ? 1 : 0; } """ res = GCC_compiler.try_compile_tmp(test_code, tmp_prefix='check_beta_', flags=ldflags(libs=True, flags=True, libs_dir=True), try_run=True) if res: if res[0]: check_force_gemv_init._force_init_beta = res[1] else: check_force_gemv_init._force_init_beta = False else: check_force_gemv_init._force_init_beta = False return check_force_gemv_init._force_init_beta
def test_gxx_support(): code = """ #include <omp.h> int main( int argc, const char* argv[] ) { int res[10]; for(int i=0; i < 10; i++){ res[i] = i; } } """ default_openmp = GCC_compiler.try_compile_tmp( src_code=code, tmp_prefix="test_omp_", flags=["-fopenmp"], try_run=False ) return default_openmp
def test_gxx_support(): code = """ #include <omp.h> int main( int argc, const char* argv[] ) { int res[10]; for(int i=0; i < 10; i++){ res[i] = i; } } """ default_openmp = GCC_compiler.try_compile_tmp(src_code=code, tmp_prefix='test_omp_', flags=['-fopenmp'], try_run=False) return default_openmp
def test_flag_detection(): # Check that the code detecting blas flags does not raise any exception. # It used to happen on python 3 because of improper string handling, # but was not detected because that path is not usually taken, # so we test it here directly. GCC_compiler.try_flags(["-lblas"])
def dnn_available(): if dnn_available.avail is not None: return dnn_available.avail if pygpu is None: dnn_available.msg = "PyGPU not available" dnn_available.avail = False return False if not init_dev.device.startswith('cuda'): dnn_available.msg = "Not on a CUDA device. Got %s." % init_dev.device dnn_available.avail = False return False # This is a hack because bin_id is in the from of # "sm_<major><minor>" for cuda devices. if pygpu.get_default_context().bin_id[:-2] < '30': dnn_available.msg = "Device not supported by cuDNN" dnn_available.avail = False preambule = """ #include <stdio.h> #include <cuda.h> #include <cudnn.h> #include <cudnn_helper.h> """ body = """ cudnnHandle_t _handle = NULL; cudnnStatus_t err; if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { fprintf(stderr, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); return 1; } """ # Do not run here the test program. It would run on the # default gpu, not the one selected by the user. If mixed # GPU are installed or if the GPUs are configured in # exclusive mode, this cause bad detection. comp, out, err = GCC_compiler.try_flags( ["-l", "cudnn", "-I" + os.path.dirname(__file__), "-I" + config.dnn.include_path, "-L" + config.dnn.library_path], preambule=preambule, body=body, try_run=False, output=True) dnn_available.avail = comp if not dnn_available.avail: dnn_available.msg = ( "Theano cannot compile with cuDNN. We got this error:\n" + str(err)) else: # If we can compile, check that we can import and run. v = version() if v < 2000: dnn_available.avail = False dnn_available.msg = ( "You have an old release of CuDNN (or a release candidate) " "that isn't supported. Please update to at least v2 final " "version.") raise RuntimeError(dnn_available.msg) if v >= 3000 and v < 3007: dnn_available.avail = False dnn_available.msg = ( "You have installed a release candidate of CuDNN v3. This " "isn't supported. Please update to v3 final version.") raise RuntimeError(dnn_available.msg) return dnn_available.avail
def dnn_available(): if dnn_available.avail is not None: return dnn_available.avail if pygpu is None: dnn_available.msg = "PyGPU not available" dnn_available.avail = False return False if not init_dev.device.startswith('cuda'): dnn_available.msg = "Not on a CUDA device. Got %s." % init_dev.device dnn_available.avail = False return False # This is a hack because bin_id is in the from of # "sm_<major><minor>" for cuda devices. if pygpu.get_default_context().bin_id[:-2] < '30': dnn_available.msg = "Device not supported by cuDNN" dnn_available.avail = False preambule = """ #include <stdio.h> #include <cudnn.h> #include <cudnn_helper.h> """ body = """ cudnnHandle_t _handle = NULL; cudnnStatus_t err; if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { fprintf(stderr, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); return 1; } """ params = ["-l", "cudnn", "-I" + os.path.dirname(__file__)] if config.dnn.include_path: params.append("-I" + config.dnn.include_path) if config.dnn.library_path: params.append("-L" + config.dnn.library_path) # Do not run here the test program. It would run on the # default gpu, not the one selected by the user. If mixed # GPU are installed or if the GPUs are configured in # exclusive mode, this cause bad detection. comp, out, err = GCC_compiler.try_flags(params, preambule=preambule, body=body, try_run=False, output=True) dnn_available.avail = comp if not dnn_available.avail: dnn_available.msg = ( "Theano cannot compile with cuDNN. We got this error:\n" + str(err)) else: # If we can compile, check that we can import and run. v = version() if v < 2000: dnn_available.avail = False dnn_available.msg = ( "You have an old release of CuDNN (or a release candidate) " "that isn't supported. Please update to at least v2 final " "version.") raise RuntimeError(dnn_available.msg) if v >= 3000 and v < 3007: dnn_available.avail = False dnn_available.msg = ( "You have installed a release candidate of CuDNN v3. This " "isn't supported. Please update to v3 final version.") raise RuntimeError(dnn_available.msg) return dnn_available.avail