B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64) A2 = fp.array(A1.astype(np.int16), iwlA) B2 = fp.array(B1.astype(np.int16), iwlB) # pick a reasonable output integer word length iwlC = ((struct.unpack('I', struct.pack('f', float( 0x7fff * 0x7fff * k / 2)))[0] & 0x7f800000) >> 23) - 126 C2 = fp.empty((m, n), iwlC) start = drv.Event() end = drv.Event() start.record() for r in range(repeat): if op == 'nt': fp.dot(A2.T, B2, C2) elif op == 'nn': fp.dot(A2, B2, C2) elif op == 'tn': fp.dot(A2, B2.T, C2) end.record() end.synchronize() msecs = end.time_since(start) / repeat gflops = (m * n * k * 2.0) / (msecs * 1000000.0) print 'GFLOPS: ', gflops
A1 = np.random.randint(0x0, 0x7fff, size=dim1).astype(np.int64) B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64) A2 = fp.array(A1.astype(np.int16), iwlA) B2 = fp.array(B1.astype(np.int16), iwlB) # pick a reasonable output integer word length iwlC = ((struct.unpack('I',struct.pack('f',float(0x7fff * 0x7fff * k / 2)))[0] & 0x7f800000) >> 23)-126 C2 = fp.empty((m,n), iwlC) start = drv.Event() end = drv.Event() start.record() for r in range(repeat): if op == 'nt': fp.dot(A2.T, B2, C2) elif op == 'nn': fp.dot(A2, B2, C2) elif op == 'tn': fp.dot(A2, B2.T, C2) end.record() end.synchronize() msecs = end.time_since(start) / repeat gflops = (m * n * k * 2.0) / (msecs * 1000000.0) print 'GFLOPS: ', gflops