예제 #1
0
def test_finitedataset_source_check():
    """
    Check that the FiniteDatasetIterator returns sensible
    errors when there is a missing source in the dataset.
    """
    dataset = DenseDesignMatrix(X=np.random.rand(20,15).astype(theano.config.floatX),
                                y=np.random.rand(20,5).astype(theano.config.floatX))
    assert_raises(ValueError,
                  dataset.iterator,
                  mode='sequential',
                  batch_size=5,
                  data_specs=(VectorSpace(15),'featuresX'))
    try:
        dataset.iterator(mode='sequential',
                         batch_size=5,
                         data_specs=(VectorSpace(15),'featuresX'))
    except ValueError as e:
        assert 'featuresX' in str(e)
예제 #2
0
def test_finitedataset_source_check():
    """
    Check that the FiniteDatasetIterator returns sensible
    errors when there is a missing source in the dataset.
    """
    dataset = DenseDesignMatrix(X=np.random.rand(20,15).astype(theano.config.floatX),
                                y=np.random.rand(20,5).astype(theano.config.floatX))
    assert_raises(ValueError,
                  dataset.iterator,
                  mode='sequential',
                  batch_size=5,
                  data_specs=(VectorSpace(15),'featuresX'))
    try:
        dataset.iterator(mode='sequential',
                         batch_size=5,
                         data_specs=(VectorSpace(15),'featuresX'))
    except ValueError as e:
        assert 'featuresX' in str(e)
예제 #3
0
파일: billiard.py 프로젝트: vd114/galatea
class BilliardVideo(Dataset):
    '''Simulated single ball bouncing around a rectangular chamber
    with (optional) noise added each time step. This dataset is
    generated on the fly.
    '''

    _default_seed = (17, 2, 946)
    
    def __init__(self, which_set, config):
        '''Create a BilliardVideo instance'''
        
        assert which_set in ('train', 'valid', 'test')
        self.which_set = which_set

        # Copy main config from provided config
        self.axes            = config.axes
        self.num_frames      = config.num_frames
        self.height          = config.height
        self.width           = config.width
        self.num_channels    = config.num_channels
        self.num_balls       = config.num_balls
        self.ball_diameter   = config.ball_diameter
        self.vel_noise       = config.vel_noise
        self.vel_decay       = config.vel_decay
        self.frames_burnin   = config.frames_burnin
        self.frames_simulate = config.frames_simulate
        self.min_val         = config.min_val
        self.max_val         = config.max_val
        self.dtype           = config.dtype
        
        # Load data here...
        seedmap = {
            'train': 123,
            'valid': 456,
            'test': 789
        }

        # Generate dataset here
        # TODO: cache to disk and reload
        video = self._simulate(frames_simulate = self.frames_simulate,
                               rng_seed = seedmap[self.which_set],
                               burn_in = self.frames_burnin)
        #video_promoted = np.reshape(video, (1,) + video.shape)  # batch size of 1

        # Init TemporalDenseDesignMatrix
        #view_converter = DefaultViewConverter((self.height, self.width, self.num_channels),
        #                                      axes = ('b', 0, 1, 'c'))  # maybe??
        self._dense_design_matrix = DenseDesignMatrix(topo_view = video)
        
    def _simulate(self, frames_simulate, rng_seed = None, burn_in = 0):
        rng = np.random.RandomState(rng_seed)

        # New sequential
        chamber = Chamber(height = self.height,
                          width  = self.width,
                          num_balls = self.num_balls,
                          ball_diameter = self.ball_diameter,
                          vel_noise = self.vel_noise,
                          vel_decay = self.vel_decay,
                          rng = rng)

        res_shape = (frames_simulate, self.height, self.width, self.num_channels)
        buf = np.zeros(res_shape, dtype = bool)

        assert burn_in >= 0, 'burn_in must be non-negative'
        for frame_idx in xrange(burn_in):
            chamber.step()
        for frame_idx in xrange(frames_simulate):
            chamber.render_to(buf[frame_idx, :, :, 0])
            chamber.step()

        # Convert from bool to appropriate float type and scale
        result = np.array(buf, dtype=self.dtype)
        result *= self.max_val - self.min_val
        result += self.min_val

        return result

    @functools.wraps(Dataset.iterator)
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 topo=None, targets=None, rng=None, data_specs=None,
                 return_tuple=False, ignore_data_specs=False):

        if batch_size is None: batch_size = 100
        if num_batches is None: num_batches = 100
        assert batch_size > 0
        assert num_batches > 0
        assert topo is None
        assert targets is None

        if mode is None: mode = 'shuffled_sequential'
        assert mode in ('sequential', 'shuffled_sequential'), (
            'Mode must be one of: sequential, shuffled_sequential'
        )
        if mode != 'shuffled_sequential':
            warnings.warn('billiard dataset returning its only supported iterator type -- shuffled -- despite the request to the contrary')
        if not ignore_data_specs:
            assert data_specs != None, 'Must provide data_specs'
            assert len(data_specs) == 2, 'data_specs must include only one tuple for "features"'
            assert type(data_specs[0]) is CompositeSpace, 'must be composite space...??'
            assert data_specs[0].num_components == 1, 'must only have one component, features'
            assert data_specs[1][0] == 'features', (
                'data_specs must include only one tuple for "features"'
            )
        
        #underlying_dataspecs = (self._output_space, 'features')
        underlying_space = Conv2DSpace((self.height, self.width),
                                       num_channels = self.num_channels)
        underlying_dataspecs = (underlying_space, 'features')

        self._underlying_iterator = self._dense_design_matrix.iterator(
            mode = 'random_slice',     # IMPORTANT: to return contiguous slices representing chunks of time!
            batch_size = self.num_frames,
            num_batches = num_batches * batch_size,
            rng=rng,
            data_specs=underlying_dataspecs,
            return_tuple=False
        )

        #pdb.set_trace()
        
        return CopyingConcatenatingIterator(
            self._underlying_iterator,
            num_concat = batch_size,
            return_tuple = return_tuple
        )