def test_occupancy_32_9_21_threads(self):
        mat_a = DenseMatrix(num_rows=32,
                            num_cols=56,
                            addressing="strided",
                            bbox=[0, 0, 31, 20],
                            transpose=False)

        mat_b = DenseMatrix(num_rows=32,
                            num_cols=56,
                            addressing="strided",
                            bbox=[0, 0, 20, 8],
                            transpose=False)

        mat_c = DenseMatrix(num_rows=32,
                            num_cols=9,
                            bbox=[0, 0, 31, 8],
                            addressing="strided",
                            transpose=False)

        self.gen.generate(mat_a, mat_b, mat_c, alpha=1.1, beta=1.1)

        # one multiplication per block because the kernel requires too much of
        # shared memory
        self.assertEqual(self.gen.num_active_threads, 32)
        self.assertEqual(self.gen.num_mult_per_block, 2)
  def test_exact_transpose_loader(self):
    # load a column in one go
    loader = shm_mem_factory(vm=self._vm,
                             matrix=DenseMatrix(num_rows=33,
                                                num_cols=56,
                                                addressing='none',
                                                bbox=[0, 0, 15, 20],
                                                transpose=True),
                             num_active_threads=32,
                             load_and_transpose=True)
    self.assertIsInstance(loader, ExactTransposePatchLoader)

    # multiple hops to load a column
    loader = shm_mem_factory(vm=self._vm,
                             matrix=DenseMatrix(num_rows=65,
                                                num_cols=56,
                                                addressing='none',
                                                bbox=[0, 0, 34, 20],
                                                transpose=True),
                             num_active_threads=32,
                             load_and_transpose=True)
    self.assertIsInstance(loader, ExactTransposePatchLoader)
    def test_occupancy_56_9_9_threads(self):

        mat_a = DenseMatrix(num_rows=56,
                            num_cols=9,
                            addressing="strided",
                            bbox=[0, 0, 55, 8],
                            transpose=False)

        mat_b = DenseMatrix(num_rows=9,
                            num_cols=9,
                            addressing="strided",
                            bbox=[0, 0, 8, 8],
                            transpose=False)

        mat_c = DenseMatrix(num_rows=56,
                            num_cols=9,
                            bbox=[0, 0, 55, 8],
                            addressing="strided",
                            transpose=False)

        self.gen.generate(mat_a, mat_b, mat_c, alpha=1.1, beta=1.1)
        self.assertEqual(self.gen.num_active_threads, 64)
        self.assertEqual(self.gen.num_mult_per_block, 1)
Example #4
0
def produce_matrix(spec):
    return DenseMatrix(num_rows=spec["num_rows"],
                       num_cols=spec["num_cols"],
                       addressing=spec["addressing"],
                       bbox=spec["bbox"],
                       transpose=spec["trans"])
Example #5
0
 def _produce_matrix(self, matrix_spec):
     return DenseMatrix(num_rows=matrix_spec["rows"],
                        num_cols=matrix_spec["cols"],
                        addressing=matrix_spec["addressing"],
                        bbox=matrix_spec["bbox"],
                        transpose=matrix_spec["trans"])
Example #6
0
    "--manufacturer",
    action="store",
    help="Name of the Manufacturer, currently nvidia and amd are supported",
    default="nvidia")
parser.add_argument(
    "-s",
    "--sub_arch",
    action="store",
    help="Sub_arch of the GPU, e.g sm_60 for Nvidia or gfx906 for AMD",
    default="sm_60")

args = parser.parse_args()

mat_a = DenseMatrix(num_rows=9,
                    num_cols=9,
                    addressing="strided",
                    bbox=[0, 0, 8, 8],
                    transpose=False)

mat_b = DenseMatrix(num_rows=9,
                    num_cols=9,
                    addressing="strided",
                    bbox=[0, 0, 8, 8],
                    transpose=False)

try:
    vm = vm_factory(name=args.manufacturer,
                    sub_name=args.sub_arch,
                    fp_type="float")

    gen = CsaGenerator(vm)