Esempio n. 1
0
    def compile_args():
        """
        Re-implementation of compile_args that does not create an
        additionnal context on the GPU.

        """
        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
        if config.nvcc.fastmath:
            flags.append('-use_fast_math')
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(
                os.path.split(theano.sandbox.cuda.__file__)[0],
                'cuda_ndarray.cuh'))
        flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)

        # numpy 1.7 deprecated the following macros but they didn't
        # exist in the past
        numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(['-arch=sm_' in f for f in flags]):
            dev = theano.sandbox.gpuarray.init_dev.device
            if dev is None:
                raise Exception("Trying to compile GPU code without a context")
            if dev.startswith("opencl"):
                raise Exception("Trying to call nvcc with an OpenCL context")
            assert dev.startswith('cuda')
            if dev == 'cuda':
                n = theano.sandbox.cuda.use.device_number
            else:
                n = int(dev[4:])
            p = theano.sandbox.cuda.device_properties(n)
            flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))

        return flags
Esempio n. 2
0
    def compile_args():
        """
        Re-implementation of compile_args that does not create an
        additionnal context on the GPU.

        """
        flags = [flag for flag in config.nvcc.flags.split(" ") if flag]
        if config.nvcc.fastmath:
            flags.append("-use_fast_math")
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(os.path.split(theano.sandbox.cuda.__file__)[0], "cuda_ndarray.cuh")
        )
        flags.append("-DCUDA_NDARRAY_CUH=" + cuda_ndarray_cuh_hash)

        # numpy 1.7 deprecated the following macros but they didn't
        # exist in the past
        numpy_ver = [int(n) for n in numpy.__version__.split(".")[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(["-arch=sm_" in f for f in flags]):
            dev = theano.sandbox.gpuarray.init_dev.device
            if dev is None:
                raise Exception("Trying to compile GPU code without a context")
            if dev.startswith("opencl"):
                raise Exception("Trying to call nvcc with an OpenCL context")
            assert dev.startswith("cuda")
            if dev == "cuda":
                n = theano.sandbox.cuda.use.device_number
            else:
                n = int(dev[4:])
            p = theano.sandbox.cuda.device_properties(n)
            flags.append("-arch=sm_" + str(p["major"]) + str(p["minor"]))

        return flags