def test_coarsen_chunks(self): """Just basic testing for now""" chunks = [1,1,2,2,3,3,4,4] ds = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=chunks) coarsen_chunks(ds, nchunks=2) chunks1 = coarsen_chunks(chunks, nchunks=2) self.failUnless((chunks1 == ds.chunks).all()) self.failUnless((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all()) ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks),1)), targets=[1]*8, chunks=range(len(chunks))) coarsen_chunks(ds2, nchunks=2) self.failUnless((chunks1 == ds.chunks).all())
def test_coarsen_chunks(self): """Just basic testing for now""" chunks = [1, 1, 2, 2, 3, 3, 4, 4] ds = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks), 1)), targets=[1] * 8, chunks=chunks) coarsen_chunks(ds, nchunks=2) chunks1 = coarsen_chunks(chunks, nchunks=2) self.failUnless((chunks1 == ds.chunks).all()) self.failUnless((chunks1 == np.asarray([0, 0, 0, 0, 1, 1, 1, 1])).all()) ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape( (len(chunks), 1)), targets=[1] * 8, chunks=range(len(chunks))) coarsen_chunks(ds2, nchunks=2) self.failUnless((chunks1 == ds.chunks).all())
def _get_split_config(self, uniqueattrs): """ Returns ------- list of tuples (None, list of int) Indices for splitting """ # make sure there are more of attributes than desired groups if len(uniqueattrs) < self.__ngroups: raise ValueError, "Number of groups (%d) " % (self.__ngroups) + \ "must be less than " + \ "or equal to the number of unique attributes (%d)" % \ (len(uniqueattrs)) # use coarsen_chunks to get the split indices split_ind = coarsen_chunks(uniqueattrs, nchunks=self.__ngroups) split_ind = np.asarray(split_ind) # loop and create splits split_list = [(None, uniqueattrs[split_ind == i]) for i in range(self.__ngroups)] return split_list
def _get_partition_specs(self, uniqueattrs): """ Returns ------- list of tuples (None, list of int) Indices for splitting """ # make sure there are more of attributes than desired groups if len(uniqueattrs) < self.__ngroups: raise ValueError("Number of groups (%d) " % (self.__ngroups) + \ "must be less than " + \ "or equal to the number of unique attributes (%d)" % \ (len(uniqueattrs))) # use coarsen_chunks to get the split indices split_ind = coarsen_chunks(uniqueattrs, nchunks=self.__ngroups) split_ind = np.asarray(split_ind) # loop and create splits split_list = [(None, uniqueattrs[split_ind==i]) for i in range(self.__ngroups)] return split_list