예제 #1
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.

        """
        flags = [flag for flag in config.nvcc.flags.split(" ") if flag]
        if config.nvcc.fastmath:
            flags.append("-use_fast_math")
        cuda_ndarray_cuh_hash = hash_from_file(os.path.join(os.path.split(__file__)[0], "cuda_ndarray.cuh"))
        flags.append("-DCUDA_NDARRAY_CUH=" + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API. I updated most of the places
        # to use the new API, but not everywhere. When finished, enable
        # the following macro to assert that we don't bring new code
        # that use the old API.
        flags.append("-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # numpy 1.7 deprecated the following macro but the didn't
        # existed in the past
        numpy_ver = [int(n) for n in numpy.__version__.split(".")[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(["-arch=sm_" in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda

            if hasattr(theano.sandbox, "cuda"):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now."
                    )
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False,
                    )
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append("-arch=sm_" + str(p["major"]) + str(p["minor"]))

        return flags
예제 #2
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.

        """
        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
        if config.nvcc.fastmath:
            flags.append('-use_fast_math')
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
        flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API. I updated most of the places
        # to use the new API, but not everywhere. When finished, enable
        # the following macro to assert that we don't bring new code
        # that use the old API.
        flags.append("-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # numpy 1.7 deprecated the following macro but the didn't
        # existed in the past
        numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(['-arch=sm_' in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda
            if hasattr(theano.sandbox, 'cuda'):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now.")
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False)
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append('-arch=sm_' + str(p['major']) +
                             str(p['minor']))

        return flags
예제 #3
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.

        """
        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
        if config.nvcc.fastmath:
            flags.append('-use_fast_math')
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
        flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API.
        # The following macro asserts that we don't bring new code
        # that use the old API.
        flags.append("-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # If the user didn't specify architecture flags add them
        if not any(['-arch=sm_' in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda
            if hasattr(theano.sandbox, 'cuda'):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now.")
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False)
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append('-arch=sm_' + str(p['major']) +
                             str(p['minor']))

        return flags