def testCopyHostToDevicePerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu)

        metrics = GF2nStub.getMetrics("bn_a")
        PerformanceDataLogger().addPerfResult("copy host -> device", bits,
                                              "Cuda",
                                              metrics["copyToDevice_time"])
    def testCopyDeviceToHostPerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        res = GF2nStub.run("parAdd", a_gpu, b_gpu)
        res_value = res._value

        metrics = GF2nStub.getMetrics("res")
        PerformanceDataLogger().addPerfResult("copy device -> host", bits,
                                              "Cuda",
                                              metrics["copyToHost_time"])