Beispiel #1
0
	def testBigKernelPerformance( self, bits, framework ):

		runs = 10

		# do cuda arithmetic
		f_gpu = GF2nStub.GF2nStub(framework, bits, -1)

		a_gpu = f_gpu()
		b_gpu = f_gpu()

		flags = 0
		
		GF2nStub.run("parAdd", a_gpu, b_gpu, flags, runs)
		
		times = GF2nStub.getEllapsedTime_ms()
		for time in times:
			PerformanceDataLogger().addPerfResult("parAdd small", bits, framework, time)
	
		flags = 1
		chunk_size = 32
		if GF2nStub.getRegisterSize() == 64:
			chunk_size = 64

		for num_grids in [2**n for n in range(0,5)]:
			for num_threads in [1024, 512, 256, 128]:
				num_blocks = (bits+1)/chunk_size/num_grids/num_threads
			
				GF2nStub.setProperty("bn_a", "num_threads", str(num_threads))
				GF2nStub.setProperty("bn_a", "num_blocks", str(num_blocks))

				GF2nStub.run("parAddLoop", a_gpu, b_gpu, flags, runs)
					
				times = GF2nStub.getEllapsedTime_ms()
				for time in times:
					PerformanceDataLogger().addPerfResult("parAdd big " + str(num_threads) + " " + str(num_grids), bits, framework, time)
Beispiel #2
0
    def testAddMetrics(self, bits, func):

        runs = 10

        async = 0

        if func == "parAddOwnStream" or func == "parAdd2OwnStream" or func == "parAdd4OwnStream" or func == "parAdd8OwnStream" or func == 'parAddOwnStream1024Threads' or func == 'parAddOwnStream512Threads' or func == 'parAddOwnStream256Threads' or func == 'parAddOwnStream128Threads':
            async = 1

        cmd = "nvprof --normalized-time-unit ms"

        metrics = [
            "achieved_occupancy", "gld_transactions", "gst_transactions",
            "inst_per_warp", "gst_throughput", "gld_throughput",
            "gld_efficiency", "gst_efficiency", "sm_efficiency"
        ]
        events = [
            "active_warps", "warps_launched", "threads_launched",
            "gld_request", "gst_request"
        ]

        if metrics:
            cmd = cmd + " --metrics " + ",".join(metrics)

        if events:
            cmd = cmd + " --events " + ",".join(events)

        cmd = cmd + " ./runFunction " + func + " " + str(bits) + " " + str(
            runs) + " " + str(async)

        res = commands.getoutput(cmd).split("\n")

        # store metrics
        for m in metrics:
            line = filter(
                lambda x: x != "",
                filter(lambda x: x.find(m) >= 0, res)[0].strip().split(" "))
            PerformanceDataLogger().addMetricResult(func, m, bits, runs,
                                                    convertUnit(line[-3]),
                                                    convertUnit(line[-2]),
                                                    convertUnit(line[-1]))

        # store events
        for e in events:
            line = filter(
                lambda x: x != "",
                filter(lambda x: x.find(e) >= 0, res)[0].strip().split(" "))
            PerformanceDataLogger().addMetricResult(func, e, bits, runs,
                                                    convertUnit(line[-3]),
                                                    convertUnit(line[-2]),
                                                    convertUnit(line[-1]))
Beispiel #3
0
    def testAddTimeDeviation(self, bits, func):

        runs = 10

        async = 0

        if func == "parAddOwnStream" or func == "parAdd2OwnStream" or func == "parAdd4OwnStream" or func == "parAdd8OwnStream" or func == 'parAddOwnStream1024Threads' or func == 'parAddOwnStream512Threads' or func == 'parAddOwnStream256Threads' or func == 'parAddOwnStream128Threads':
            async = 1

        profiler_cmd = "nvprof --normalized-time-unit ms"

        cmd = profiler_cmd + " ./runFunction " + func + " " + str(
            bits) + " " + str(runs) + " " + str(async)

        res = commands.getoutput(cmd).split(
            "\n")  ## min max avg -> avg min max

        # store netto runtime
        line = filter(
            lambda x: x != "",
            filter(lambda x: x.lower().find((func + "kernel").lower()) >= 0,
                   res)[0].strip().split(" "))
        PerformanceDataLogger().addMetricResult(func, "Kernel runtime", bits,
                                                runs, convertUnit(line[4]),
                                                convertUnit(line[5]),
                                                convertUnit(line[3]))
    def testAddPerformance(self, bits, framework, func):

        runs = 100

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub(framework, bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        flags = 0

        if (func == "parAddOwnStream" or
           func == "parAdd2OwnStream" or
           func == "parAdd4OwnStream" or
           func == "parAdd8OwnStream" or
           func == 'parAddOwnStream1024Threads' or
           func == 'parAddOwnStream512Threads' or
           func == 'parAddOwnStream256Threads' or
           func == 'parAddOwnStream128Threads') \
           and \
           framework == "Cuda":
            flags = flags | 2

        if framework == "Cuda":
            GF2nStub.run(func, a_gpu, b_gpu, flags, runs)
        else:
            GF2nStub.run("add", a_gpu, b_gpu, flags, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(
              func, bits, framework, time)
Beispiel #5
0
    def testFrameworkOverhead(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        # measure test run with python framework and prng
        start_stub_run = timeit.default_timer()
        GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu)
        PerformanceDataLogger().addPerfResult(
            "FrameworkOverhead - All", bits, "Cuda",
            timeit.default_timer() - start_stub_run)

        # measure test run without python framwork and without prng
        times = GF2nStub.getEllapsedTime_ms()
        PerformanceDataLogger().addPerfResult("FrameworkOverhead - OnlyFunc",
                                              bits, "Cuda", times[0])
    def testCopyHostToDevicePerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        GF2nStub.run("measureKernelLaunchOverhead", a_gpu, b_gpu)

        metrics = GF2nStub.getMetrics("bn_a")
        PerformanceDataLogger().addPerfResult("copy host -> device", bits,
                                              "Cuda",
                                              metrics["copyToDevice_time"])
    def testCopyDeviceToHostPerformance(self, bits):

        # do cuda arithmetic
        f_gpu = GF2nStub.GF2nStub("Cuda", bits, -1)

        a_gpu = f_gpu()
        b_gpu = f_gpu()

        res = GF2nStub.run("parAdd", a_gpu, b_gpu)
        res_value = res._value

        metrics = GF2nStub.getMetrics("res")
        PerformanceDataLogger().addPerfResult("copy device -> host", bits,
                                              "Cuda",
                                              metrics["copyToHost_time"])
Beispiel #8
0
    def testInvWithExpElementPerformance(self, bits, function, framework):

        runs = 1

        f = GF2nStub.GF2nStub(framework, bits)

        a = f()

        if framework == "OpenSSL":
            res = GF2nStub.run("inverse", a, 0, 0, runs)
        else:
            res = GF2nStub.run(function, a, 0, 0, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(function, bits, framework,
                                                  time)
Beispiel #9
0
    def testExpPerformance(self, bits, k, function, framework):

        runs = 10

        rand_irred_poly = GF2nStub.getRandomNumber(bits + 1, 23)
        f = GF2nStub.GF2nStub(framework, bits, rand_irred_poly | 1)

        a = f()

        if framework == "OpenSSL":
            res = GF2nStub.run("exp", a, k, 0, runs)
        else:
            res = GF2nStub.run(function, a, k, 0, runs)

        times = GF2nStub.getEllapsedTime_ms()
        for time in times:
            PerformanceDataLogger().addPerfResult(function + ", k=" + str(k),
                                                  bits, framework, time)
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import getopt

sys.path.append("../01_Testbench/")
sys.path.append("../01_Testbench/pyGF2n/")

from GF2nTest import GF2nTestSuite
from PerformanceDataLogger import PerformanceDataLogger

if __name__ == '__main__':
    opts, args = getopt.getopt(sys.argv[1:], "hg:", ["help", "testgroups="])

    testgroup_filter = None

    for o, a in opts:
        if o in ["-h", "--help"]:
            pass
        elif o in ["-g", "--testgroups"]:
            testgroup_filter = a

    suite = GF2nTestSuite(ts_name="libcumffa Performance Tests")
    suite.run(testgroup_filter)

    PerformanceDataLogger().analyse_and_print_perf_results()
    PerformanceDataLogger().analyse_and_print_metric_results()