def test_finitedataset_source_check(): """ Check that the FiniteDatasetIterator returns sensible errors when there is a missing source in the dataset. """ dataset = DenseDesignMatrix(X=np.random.rand(20,15).astype(theano.config.floatX), y=np.random.rand(20,5).astype(theano.config.floatX)) assert_raises(ValueError, dataset.iterator, mode='sequential', batch_size=5, data_specs=(VectorSpace(15),'featuresX')) try: dataset.iterator(mode='sequential', batch_size=5, data_specs=(VectorSpace(15),'featuresX')) except ValueError as e: assert 'featuresX' in str(e)
def test_finitedataset_source_check(): """ Check that the FiniteDatasetIterator returns sensible errors when there is a missing source in the dataset. """ dataset = DenseDesignMatrix(X=np.random.rand(20,15).astype(theano.config.floatX), y=np.random.rand(20,5).astype(theano.config.floatX)) assert_raises(ValueError, dataset.iterator, mode='sequential', batch_size=5, data_specs=(VectorSpace(15),'featuresX')) try: dataset.iterator(mode='sequential', batch_size=5, data_specs=(VectorSpace(15),'featuresX')) except ValueError as e: assert 'featuresX' in str(e)
class BilliardVideo(Dataset): '''Simulated single ball bouncing around a rectangular chamber with (optional) noise added each time step. This dataset is generated on the fly. ''' _default_seed = (17, 2, 946) def __init__(self, which_set, config): '''Create a BilliardVideo instance''' assert which_set in ('train', 'valid', 'test') self.which_set = which_set # Copy main config from provided config self.axes = config.axes self.num_frames = config.num_frames self.height = config.height self.width = config.width self.num_channels = config.num_channels self.num_balls = config.num_balls self.ball_diameter = config.ball_diameter self.vel_noise = config.vel_noise self.vel_decay = config.vel_decay self.frames_burnin = config.frames_burnin self.frames_simulate = config.frames_simulate self.min_val = config.min_val self.max_val = config.max_val self.dtype = config.dtype # Load data here... seedmap = { 'train': 123, 'valid': 456, 'test': 789 } # Generate dataset here # TODO: cache to disk and reload video = self._simulate(frames_simulate = self.frames_simulate, rng_seed = seedmap[self.which_set], burn_in = self.frames_burnin) #video_promoted = np.reshape(video, (1,) + video.shape) # batch size of 1 # Init TemporalDenseDesignMatrix #view_converter = DefaultViewConverter((self.height, self.width, self.num_channels), # axes = ('b', 0, 1, 'c')) # maybe?? self._dense_design_matrix = DenseDesignMatrix(topo_view = video) def _simulate(self, frames_simulate, rng_seed = None, burn_in = 0): rng = np.random.RandomState(rng_seed) # New sequential chamber = Chamber(height = self.height, width = self.width, num_balls = self.num_balls, ball_diameter = self.ball_diameter, vel_noise = self.vel_noise, vel_decay = self.vel_decay, rng = rng) res_shape = (frames_simulate, self.height, self.width, self.num_channels) buf = np.zeros(res_shape, dtype = bool) assert burn_in >= 0, 'burn_in must be non-negative' for frame_idx in xrange(burn_in): chamber.step() for frame_idx in xrange(frames_simulate): chamber.render_to(buf[frame_idx, :, :, 0]) chamber.step() # Convert from bool to appropriate float type and scale result = np.array(buf, dtype=self.dtype) result *= self.max_val - self.min_val result += self.min_val return result @functools.wraps(Dataset.iterator) def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None, data_specs=None, return_tuple=False, ignore_data_specs=False): if batch_size is None: batch_size = 100 if num_batches is None: num_batches = 100 assert batch_size > 0 assert num_batches > 0 assert topo is None assert targets is None if mode is None: mode = 'shuffled_sequential' assert mode in ('sequential', 'shuffled_sequential'), ( 'Mode must be one of: sequential, shuffled_sequential' ) if mode != 'shuffled_sequential': warnings.warn('billiard dataset returning its only supported iterator type -- shuffled -- despite the request to the contrary') if not ignore_data_specs: assert data_specs != None, 'Must provide data_specs' assert len(data_specs) == 2, 'data_specs must include only one tuple for "features"' assert type(data_specs[0]) is CompositeSpace, 'must be composite space...??' assert data_specs[0].num_components == 1, 'must only have one component, features' assert data_specs[1][0] == 'features', ( 'data_specs must include only one tuple for "features"' ) #underlying_dataspecs = (self._output_space, 'features') underlying_space = Conv2DSpace((self.height, self.width), num_channels = self.num_channels) underlying_dataspecs = (underlying_space, 'features') self._underlying_iterator = self._dense_design_matrix.iterator( mode = 'random_slice', # IMPORTANT: to return contiguous slices representing chunks of time! batch_size = self.num_frames, num_batches = num_batches * batch_size, rng=rng, data_specs=underlying_dataspecs, return_tuple=False ) #pdb.set_trace() return CopyingConcatenatingIterator( self._underlying_iterator, num_concat = batch_size, return_tuple = return_tuple )