Beispiel #1
0
def test_caching_kernel_using_same_context(filter_str):
    """Test kernel caching for the scenario where different SYCL queues that
    share a SYCL context are used to submit a kernel.

    Args:
        filter_str: SYCL filter selector string
    """
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Set the global queue to the default device so that the cached_kernel gets
    # created for that device
    dpctl.set_global_queue(filter_str)
    func = dppy.kernel(data_parallel_sum)
    default_queue = dpctl.get_current_queue()
    cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
        func._get_argtypes(a, b, c), default_queue)
    for i in range(0, 10):
        # Each iteration create a fresh queue that will share the same context
        with dpctl.device_context(filter_str) as gpu_queue:
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel
Beispiel #2
0
def test_get_current_backend():
    dpctl.get_current_backend()
    dpctl.get_current_device_type()
    q = dpctl.SyclQueue()
    dpctl.set_global_queue(q)
    if has_gpu():
        dpctl.set_global_queue("gpu")
    elif has_cpu():
        dpctl.set_global_queue("cpu")
Beispiel #3
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dpctl
import syclbuffer as sb
import numpy as np

X = np.full((10**4, 4098), 1e-4, dtype="d")

# warm-up
print("=" * 10 + " Executing warm-up " + "=" * 10)
print("NumPy result: ", X.sum(axis=0))

dpctl.set_global_queue("opencl:cpu")
print("SYCL({}) result: {}".format(
    dpctl.get_current_queue().get_sycl_device().get_device_name(),
    sb.columnwise_total(X),
))

dpctl.set_default_queue("opencl:gpu")
print("SYCL({}) result: {}".format(
    dpctl.get_current_queue().get_sycl_device().get_device_name(),
    sb.columnwise_total(X),
))

import timeit

print("Times for 'opencl:cpu'")
print(
Beispiel #4
0
# GPU computation
Xgpu = bs.black_scholes_price(opts)

# compute prices in CPython
X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype="d")

print(np.allclose(Xgpu, X_ref, atol=1e-5))

n_opts = 3 * 10**6

# compute on CPU sycl device
import timeit

for _ in range(3):

    dpctl.set_global_queue("opencl:cpu:0")
    print("Using : {}".format(
        dpctl.get_current_queue().get_sycl_device().get_device_name()))

    t0 = timeit.default_timer()
    opts1 = gen_option_params(n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01,
                              0.05, 0.01, 0.05, "d")
    X1 = bs.black_scholes_price(opts1)
    t1 = timeit.default_timer()

    print("Elapsed: {}".format(t1 - t0))

    # compute on GPU sycl device
    dpctl.set_global_queue("level_zero:gpu:0")
    print("Using : {}".format(
        dpctl.get_current_queue().get_sycl_device().get_device_name()))