def _gen_partition_full(self, wlkey='BASE', dnkey='BASE'): ''' Generate all PartitionScheme regardless of equivalence. ''' layer = self.layers[wlkey] dim_nodes = self.dim_nodes[dnkey] for ph, pw in itertools.product(util.factorize(dim_nodes.h, pe.NUM), util.factorize(dim_nodes.w, pe.NUM)): pdims = [PhyDim2(h, w) for h, w in zip(ph, pw)] # BATP. if self.batch_size % pdims[pe.BATP].size() != 0: continue # OUTP. if not util.approx_dividable(layer.nofm, pdims[pe.OUTP].size()): continue # OFMP. if not util.approx_dividable(layer.hofm, pdims[pe.OFMP].h) \ or not util.approx_dividable(layer.wofm, pdims[pe.OFMP].w): continue # INPP. if isinstance(layer, ConvLayer): if not util.approx_dividable(layer.nifm, pdims[pe.INPP].size()): continue elif isinstance(layer, LocalRegionLayer): if pdims[pe.INPP].size() > 1: continue # Fully utilize one dimension. pdims_no_ofmp = pdims[:pe.OFMP] + pdims[pe.OFMP + 1:] if any(pd.h != 1 and pd.h != dim_nodes.h and pd.w != 1 and pd.w != dim_nodes.w for pd in pdims_no_ofmp): continue for order in itertools.permutations(range(pe.NUM)): # Batch parallelism should be at the top. filtered_order = [ pae for pae in order if pdims[pae].size() > 1 ] if pe.BATP in filtered_order and filtered_order[0] != pe.BATP: continue yield PartitionScheme(order=order, pdims=pdims)
def test_part_apprdiv(self): ''' OUTP, OFMP, and INPP approximately dividable. ''' for wlkey in self.layers: layer = self.layers[wlkey] for dnkey in self.dim_nodes: for part in self._gen_partition(wlkey=wlkey, dnkey=dnkey): dim_ofmp = part.dim(pe.OFMP) sz_outp = part.size(pe.OUTP) sz_inpp = part.size(pe.INPP) self.assertTrue( util.approx_dividable(layer.hofm, dim_ofmp.h)) self.assertTrue( util.approx_dividable(layer.wofm, dim_ofmp.w)) self.assertTrue(util.approx_dividable(layer.nofm, sz_outp)) self.assertTrue(util.approx_dividable(layer.nifm, sz_inpp))
def test_int(self): ''' Int. ''' self.assertTrue(util.approx_dividable(24, 2, overhead=0)) self.assertTrue(util.approx_dividable(24, 3, overhead=0)) self.assertTrue(util.approx_dividable(24, 4, overhead=0)) self.assertTrue(util.approx_dividable(7, 2)) self.assertTrue(util.approx_dividable(19, 7)) self.assertFalse(util.approx_dividable(22, 7)) ovhd = util.idivc(19, 7) * 7 / 19. - 1 self.assertFalse(util.approx_dividable(19, 7, overhead=ovhd - 0.01)) self.assertTrue(util.approx_dividable(19, 7, overhead=ovhd + 0.01))
def test_float(self): ''' Float. ''' self.assertTrue(util.approx_dividable(18.4, 3)) self.assertTrue(util.approx_dividable(21.4, 3))
def test_int(self): ''' Int. ''' self.assertTrue( util.approx_dividable(24, 2, rel_overhead=0, abs_overhead=0)) self.assertTrue( util.approx_dividable(24, 3, rel_overhead=0, abs_overhead=0)) self.assertTrue( util.approx_dividable(24, 4, rel_overhead=0, abs_overhead=0)) self.assertTrue(util.approx_dividable(11, 2)) self.assertFalse(util.approx_dividable(8, 5)) self.assertTrue(util.approx_dividable(19, 5)) self.assertTrue( util.approx_dividable(7, 2, rel_overhead=0.2, abs_overhead=0)) self.assertTrue( util.approx_dividable(7, 2, rel_overhead=0, abs_overhead=1)) self.assertTrue( util.approx_dividable(19, 7, rel_overhead=0.2, abs_overhead=0)) self.assertTrue( util.approx_dividable(19, 7, rel_overhead=0, abs_overhead=2)) self.assertFalse( util.approx_dividable(22, 7, rel_overhead=0.2, abs_overhead=0)) self.assertFalse( util.approx_dividable(23, 7, rel_overhead=0, abs_overhead=1)) ovhd = (21 - 19) / max(21., 19.) self.assertFalse( util.approx_dividable(19, 7, rel_overhead=ovhd - 0.01, abs_overhead=0)) self.assertTrue( util.approx_dividable(19, 7, rel_overhead=ovhd + 0.01, abs_overhead=0))