def testCopyHostToDevicePerformance(self, bits): # do cuda arithmetic f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1) a_gpu = f_gpu() b_gpu = f_gpu() GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu) metrics = GF2nStub.getMetrics("bn_a") PerformanceDataLogger().addPerfResult("copy host -> device", bits, "Cuda", metrics["copyToDevice_time"])
def testCopyDeviceToHostPerformance(self, bits): # do cuda arithmetic f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1) a_gpu = f_gpu() b_gpu = f_gpu() res = GF2nStub.run("parAdd", a_gpu, b_gpu) res_value = res._value metrics = GF2nStub.getMetrics("res") PerformanceDataLogger().addPerfResult("copy device -> host", bits, "Cuda", metrics["copyToHost_time"])