def testBigKernelPerformance( self, bits, framework ): runs = 10 # do cuda arithmetic f_gpu = GF2nStub.GF2nStub(framework, bits, -1) a_gpu = f_gpu() b_gpu = f_gpu() flags = 0 GF2nStub.run("parAdd", a_gpu, b_gpu, flags, runs) times = GF2nStub.getEllapsedTime_ms() for time in times: PerformanceDataLogger().addPerfResult("parAdd small", bits, framework, time) flags = 1 chunk_size = 32 if GF2nStub.getRegisterSize() == 64: chunk_size = 64 for num_grids in [2**n for n in range(0,5)]: for num_threads in [1024, 512, 256, 128]: num_blocks = (bits+1)/chunk_size/num_grids/num_threads GF2nStub.setProperty("bn_a", "num_threads", str(num_threads)) GF2nStub.setProperty("bn_a", "num_blocks", str(num_blocks)) GF2nStub.run("parAddLoop", a_gpu, b_gpu, flags, runs) times = GF2nStub.getEllapsedTime_ms() for time in times: PerformanceDataLogger().addPerfResult("parAdd big " + str(num_threads) + " " + str(num_grids), bits, framework, time)
def testAddPerformance(self, bits, framework, func): runs = 100 # do cuda arithmetic f_gpu = GF2nStub.GF2nStub(framework, bits, -1) a_gpu = f_gpu() b_gpu = f_gpu() flags = 0 if (func == "parAddOwnStream" or func == "parAdd2OwnStream" or func == "parAdd4OwnStream" or func == "parAdd8OwnStream" or func == 'parAddOwnStream1024Threads' or func == 'parAddOwnStream512Threads' or func == 'parAddOwnStream256Threads' or func == 'parAddOwnStream128Threads') \ and \ framework == "Cuda": flags = flags | 2 if framework == "Cuda": GF2nStub.run(func, a_gpu, b_gpu, flags, runs) else: GF2nStub.run("add", a_gpu, b_gpu, flags, runs) times = GF2nStub.getEllapsedTime_ms() for time in times: PerformanceDataLogger().addPerfResult( func, bits, framework, time)
def testInvWithExpElementPerformance(self, bits, function, framework): runs = 1 f = GF2nStub.GF2nStub(framework, bits) a = f() if framework == "OpenSSL": res = GF2nStub.run("inverse", a, 0, 0, runs) else: res = GF2nStub.run(function, a, 0, 0, runs) times = GF2nStub.getEllapsedTime_ms() for time in times: PerformanceDataLogger().addPerfResult(function, bits, framework, time)
def testExpPerformance(self, bits, k, function, framework): runs = 10 rand_irred_poly = GF2nStub.getRandomNumber(bits + 1, 23) f = GF2nStub.GF2nStub(framework, bits, rand_irred_poly | 1) a = f() if framework == "OpenSSL": res = GF2nStub.run("exp", a, k, 0, runs) else: res = GF2nStub.run(function, a, k, 0, runs) times = GF2nStub.getEllapsedTime_ms() for time in times: PerformanceDataLogger().addPerfResult(function + ", k=" + str(k), bits, framework, time)
def testFrameworkOverhead(self, bits): # do cuda arithmetic f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1) a_gpu = f_gpu() b_gpu = f_gpu() # measure test run with python framework and prng start_stub_run = timeit.default_timer() GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu) PerformanceDataLogger().addPerfResult( "FrameworkOverhead - All", bits, "Cuda", timeit.default_timer() - start_stub_run) # measure test run without python framwork and without prng times = GF2nStub.getEllapsedTime_ms() PerformanceDataLogger().addPerfResult("FrameworkOverhead - OnlyFunc", bits, "Cuda", times[0])