예제 #1
0
	def testBigKernelPerformance( self, bits, framework ):

		runs = 10

		# do cuda arithmetic
		f_gpu = GF2nStub.GF2nStub(framework, bits, -1)

		a_gpu = f_gpu()
		b_gpu = f_gpu()

		flags = 0
		
		GF2nStub.run("parAdd", a_gpu, b_gpu, flags, runs)
		
		times = GF2nStub.getEllapsedTime_ms()
		for time in times:
			PerformanceDataLogger().addPerfResult("parAdd small", bits, framework, time)
	
		flags = 1
		chunk_size = 32
		if GF2nStub.getRegisterSize() == 64:
			chunk_size = 64

		for num_grids in [2**n for n in range(0,5)]:
			for num_threads in [1024, 512, 256, 128]:
				num_blocks = (bits+1)/chunk_size/num_grids/num_threads
			
				GF2nStub.setProperty("bn_a", "num_threads", str(num_threads))
				GF2nStub.setProperty("bn_a", "num_blocks", str(num_blocks))

				GF2nStub.run("parAddLoop", a_gpu, b_gpu, flags, runs)
					
				times = GF2nStub.getEllapsedTime_ms()
				for time in times:
					PerformanceDataLogger().addPerfResult("parAdd big " + str(num_threads) + " " + str(num_grids), bits, framework, time)
예제 #2
0
    def testAddPerformance(self, bits, framework, func):

        runs = 100

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub(framework, bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        flags = 0

        if (func == "parAddOwnStream" or
           func == "parAdd2OwnStream" or
           func == "parAdd4OwnStream" or
           func == "parAdd8OwnStream" or
           func == 'parAddOwnStream1024Threads' or
           func == 'parAddOwnStream512Threads' or
           func == 'parAddOwnStream256Threads' or
           func == 'parAddOwnStream128Threads') \
           and \
           framework == "Cuda":
            flags = flags | 2

        if framework == "Cuda":
            GF2nStub.run(func, a_gpu, b_gpu, flags, runs)
        else:
            GF2nStub.run("add", a_gpu, b_gpu, flags, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(
              func, bits, framework, time)
예제 #3
0
    def testCopyHostToDevicePerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu)

        metrics = GF2nStub.getMetrics("bn_a")
        PerformanceDataLogger().addPerfResult("copy host -> device", bits,
                                              "Cuda",
                                              metrics["copyToDevice_time"])
예제 #4
0
    def testMySecondMul(self, bits, func):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        res_gpu = GF2nStub.run(func, a_gpu, b_gpu)

        rand_a = GF2nStub.getRandomNumber(bits, 42)
        rand_b = GF2nStub.getRandomNumber(bits, 84)

        print "a = ", rand_a
        print "b = ", rand_b

        f_ref = GF2n.GF2n(bits)

        a_ref = f_ref(rand_a)
        b_ref = f_ref(rand_b)

        res_ref = a_ref * b_ref

        print "res_ref = ", res_ref

        # compare results
        self.assertEqual(res_gpu, res_ref)
예제 #5
0
    def testInvWithExpElementPerformance(self, bits, function, framework):

        runs = 1

        f = GF2nStub.GF2nStub(framework, bits)

        a = f()

        if framework == "OpenSSL":
            res = GF2nStub.run("inverse", a, 0, 0, runs)
        else:
            res = GF2nStub.run(function, a, 0, 0, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(function, bits, framework,
                                                  time)
예제 #6
0
    def testExpPerformance(self, bits, k, function, framework):

        runs = 10

        rand_irred_poly = GF2nStub.getRandomNumber(bits + 1, 23)
        f = GF2nStub.GF2nStub(framework, bits, rand_irred_poly | 1)

        a = f()

        if framework == "OpenSSL":
            res = GF2nStub.run("exp", a, k, 0, runs)
        else:
            res = GF2nStub.run(function, a, k, 0, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(function + ", k=" + str(k),
                                                  bits, framework, time)
예제 #7
0
    def testFrameworkOverhead(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        # measure test run with python framework and prng
        start_stub_run = timeit.default_timer()
        GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu)
        PerformanceDataLogger().addPerfResult(
            "FrameworkOverhead - All", bits, "Cuda",
            timeit.default_timer() - start_stub_run)

        # measure test run without python framwork and without prng
        times = GF2nStub.getEllapsedTime_ms()
        PerformanceDataLogger().addPerfResult("FrameworkOverhead - OnlyFunc",
                                              bits, "Cuda", times[0])
예제 #8
0
    def testCopyDeviceToHostPerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        res = GF2nStub.run("parAdd", a_gpu, b_gpu)
        res_value = res._value

        metrics = GF2nStub.getMetrics("res")
        PerformanceDataLogger().addPerfResult("copy device -> host", bits,
                                              "Cuda",
                                              metrics["copyToHost_time"])
예제 #9
0
    def testOpenSSLExponentiation(self, bits, exp):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("OpenSSL", bits)

        a_gpu = f_gpu()

        res_gpu = GF2nStub.run("exp", a_gpu, exp)

        # calcualte reference
        rand_a = GF2nStub.getRandomNumber(bits, 42)

        f_ref = GF2n.GF2n(bits)

        a_ref = f_ref(rand_a)

        res_ref = a_ref**exp

        # compare results
        self.assertEqual(res_gpu, res_ref)
예제 #10
0
    def testOpenSSLInverseElement(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("OpenSSL", bits)

        a_gpu = f_gpu()

        res_gpu = GF2nStub.run("inverse", a_gpu, 0)

        # calcualte reference
        rand_a = GF2nStub.getRandomNumber(bits, 42)

        f_ref = GF2n.GF2n(bits)

        a_ref = f_ref(rand_a)

        res_ref = a_ref.inverse()

        # compare results
        self.assertEqual(res_gpu, res_ref)
예제 #11
0
    def testCudaExponentiation(self, bits, k, func):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits)

        a_gpu = f_gpu()

        res_gpu = GF2nStub.run(func, a_gpu, k)

        # calcualte reference
        rand_a = GF2nStub.getRandomNumber(bits, 42)

        f_ref = GF2n.GF2n(bits)

        a_ref = f_ref(rand_a)

        res_ref = a_ref**k

        # compare results
        self.assertEqual(res_gpu, res_ref)
예제 #12
0
    def testCudaMultiplication(self, bits, func):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        res_gpu = GF2nStub.run(func, a_gpu, b_gpu)

        # calcualte reference
        rand_a = GF2nStub.getRandomNumber(bits, 42)
        rand_b = GF2nStub.getRandomNumber(bits, 84)

        f_ref = GF2n.GF2n(bits)

        a_ref = f_ref(rand_a)
        b_ref = f_ref(rand_b)

        res_ref = a_ref * b_ref

        # compare results
        self.assertEqual(res_gpu, res_ref)