def test_dtype_errors(): # Try to call theano_expr with a bad label dtype. raised = False fmt = OneHotFormatter(max_labels=50) try: fmt.theano_expr(theano.tensor.vector(dtype=theano.config.floatX)) except TypeError: raised = True assert raised # Try to call format with a bad label dtype. raised = False try: fmt.format(numpy.zeros(10, dtype='float64')) except TypeError: raised = True assert raised
def test_dtype_errors(): # Try to call theano_expr with a bad label dtype. raised = False fmt = OneHotFormatter(max_labels=50) try: fmt.theano_expr(theano.tensor.vector(dtype=theano.config.floatX)) except TypeError: raised = True assert raised # Try to call format with a bad label dtype. raised = False try: fmt.format(numpy.zeros(10, dtype='float64')) except TypeError: raised = True assert raised
def check_one_hot_formatter_symbolic(seed, max_labels, dtype, ncases): rng = numpy.random.RandomState(seed) fmt = OneHotFormatter(max_labels=max_labels, dtype=dtype) integer_labels = rng.random_integers(0, max_labels - 1, size=ncases) x = theano.tensor.vector(dtype='int64') y = fmt.theano_expr(x) f = theano.function([x], y) one_hot_labels = f(integer_labels) assert len(zip(*one_hot_labels.nonzero())) == ncases for case, label in enumerate(integer_labels): assert one_hot_labels[case, label] == 1
def test_bad_arguments(): # Make sure an invalid max_labels raises an error. raised = False try: fmt = OneHotFormatter(max_labels=-10) except ValueError: raised = True assert raised raised = False try: fmt = OneHotFormatter(max_labels='10') except ValueError: raised = True assert raised # Make sure an invalid dtype identifier raises an error. raised = False try: fmt = OneHotFormatter(max_labels=10, dtype='invalid') except TypeError: raised = True assert raised # Make sure an invalid ndim raises an error for format(). fmt = OneHotFormatter(max_labels=10) raised = False try: fmt.format(numpy.zeros((2, 3, 4), dtype='int32')) except ValueError: raised = True assert raised # Make sure an invalid ndim raises an error for theano_expr(). raised = False try: fmt.theano_expr(theano.tensor.itensor3()) except ValueError: raised = True assert raised
def test_bad_arguments(): # Make sure an invalid max_labels raises an error. raised = False try: fmt = OneHotFormatter(max_labels=-10) except ValueError: raised = True assert raised raised = False try: fmt = OneHotFormatter(max_labels='10') except ValueError: raised = True assert raised # Make sure an invalid dtype identifier raises an error. raised = False try: fmt = OneHotFormatter(max_labels=10, dtype='invalid') except TypeError: raised = True assert raised # Make sure an invalid ndim raises an error for format(). fmt = OneHotFormatter(max_labels=10) raised = False try: fmt.format(numpy.zeros((2, 3), dtype='int32')) except ValueError: raised = True assert raised # Make sure an invalid ndim raises an error for theano_expr(). raised = False try: fmt.theano_expr(theano.tensor.imatrix()) except ValueError: raised = True assert raised
class OneHotDistribution(Distribution): """Randomly samples from a distribution of one-hot vectors.""" def __init__(self, space, rng=None): super(OneHotDistribution, self).__init__(space) self.dim = space.get_total_dimension() self.formatter = OneHotFormatter(self.dim, dtype=space.dtype) self.rng = RandomStreams() if rng is None else rng def sample(self, n): idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1) return self.formatter.theano_expr(idxs, mode='concatenate')
class OneHotDistribution(Distribution): """Randomly samples from a distribution of one-hot vectors.""" def __init__(self, space, rng=None): super(OneHotDistribution, self).__init__(space) self.dim = space.get_total_dimension() self.formatter = OneHotFormatter(self.dim, dtype=space.dtype) self.rng = RandomStreams() if rng is None else rng def sample(self, n): idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1) return self.formatter.theano_expr(idxs, mode='concatenate')
class IndexSpace(Space): """ A space representing indices, for example MNIST labels (0-10) or the indices of words in a dictionary for NLP tasks. A single space can contain multiple indices, for example the word indices of an n-gram. IndexSpaces can be converted to VectorSpaces in two ways: Either the labels are converted into one-hot vectors which are then concatenated, or they are converted into a single vector where 1s indicate labels present i.e. for 4 possible labels we have [0, 2] -> [1 0 1 0] or [0, 2] -> [1 0 0 0 0 0 1 0]. """ def __init__(self, max_labels, dim, **kwargs): """ Initialize an IndexSpace. Parameters ---------- max_labels : int The number of possible classes/labels. This means that all labels should be < max_labels. Example: For MNIST there are 10 numbers and hence max_labels = 10. dim : int The number of indices in one space e.g. for MNIST there is one target label and hence dim = 1. If we have an n-gram of word indices as input to a neurel net language model, dim = n. kwargs: passes on to superclass constructor """ super(IndexSpace, self).__init__(**kwargs) self.max_labels = max_labels self.dim = dim self.formatter = OneHotFormatter(self.max_labels) def __str__(self): """ Return a string representation. """ return '%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s' % \ dict(classname=self.__class__.__name__, dim=self.dim, max_labels=self.max_labels) @functools.wraps(Space.get_total_dimension) def get_total_dimension(self): return self.dim @functools.wraps(Space.np_format_as) def np_format_as(self, batch, space): if isinstance(space, VectorSpace): if self.max_labels == space.dim: rval = self.formatter.format(batch, sparse=space.sparse, mode='merge') elif self.dim * self.max_labels == space.dim: rval = self.formatter.format(batch, sparse=space.sparse, mode='concatenate') else: raise ValueError("Can't convert IndexSpace to" "VectorSpace (%d labels to %d dimensions)" % (self.dim, space.dim)) return rval else: raise ValueError("Can't convert IndexSpace to %(space)s" % (space.__class__.__name__)) @functools.wraps(Space._format_as) def _format_as(self, batch, space): """ Supports formatting to a VectorSpace where indices are represented by ones in a binary vector. """ if isinstance(space, VectorSpace): if self.max_labels == space.dim: rval = self.formatter.theano_expr(batch, sparse=space.sparse, mode='merge') elif self.dim * self.max_labels == space.dim: rval = self.formatter.theano_expr(batch, sparse=space.sparse, mode='concatenate') else: raise ValueError("Can't convert IndexSpace to" "VectorSpace (%d labels to %d dimensions)" % (self.dim, space.dim)) return rval else: raise ValueError("Can't convert IndexSpace to %(space)s" % (space.__class__.__name__)) @functools.wraps(Space.make_theano_batch) def make_theano_batch(self, name=None, dtype=None, batch_size=None): if batch_size == 1: rval = T.lrow(name=name) else: rval = T.lmatrix(name=name) return rval @functools.wraps(Space.batch_size) def batch_size(self, batch): self.validate(batch) return batch.shape[0] @functools.wraps(Space.np_batch_size) def np_batch_size(self, batch): self.np_validate(batch) return batch.shape[0] @functools.wraps(Space._validate) def _validate(self, batch): """ .. todo:: WRITEME """ if not isinstance(batch, theano.gof.Variable): raise TypeError("IndexSpace batch should be a theano Variable, " "got " + str(type(batch))) if not isinstance(batch.type, (theano.tensor.TensorType, CudaNdarrayType)): raise TypeError("VectorSpace batch should be TensorType or " "CudaNdarrayType, got "+str(batch.type)) if batch.ndim != 2: raise ValueError('IndexSpace batches must be 2D, got %d ' 'dimensions' % batch.ndim) for val in get_debug_values(batch): self.np_validate(val) @functools.wraps(Space._np_validate) def _np_validate(self, batch): # Use the 'CudaNdarray' string to avoid importing theano.sandbox.cuda # when it is not available if (not isinstance(batch, np.ndarray) and str(type(batch)) != "<type 'CudaNdarray'>"): raise TypeError("The value of a IndexSpace batch should be a " "numpy.ndarray, or CudaNdarray, but is %s." % str(type(batch))) if batch.ndim != 2: raise ValueError("The value of a IndexSpace batch must be " "2D, got %d dimensions for %s." % (batch.ndim, batch)) if batch.shape[1] != self.dim: raise ValueError("The width of a IndexSpace batch must match " "with the space's dimension, but batch has shape " "%s and dim = %d." % (str(batch.shape), self.dim))
class IndexSpace(Space): """ A space representing indices, for example MNIST labels (0-10) or the indices of words in a dictionary for NLP tasks. A single space can contain multiple indices, for example the word indices of an n-gram. IndexSpaces can be converted to VectorSpaces in two ways: Either the labels are converted into one-hot vectors which are then concatenated, or they are converted into a single vector where 1s indicate labels present i.e. for 4 possible labels we have [0, 2] -> [1 0 1 0] or [0, 2] -> [1 0 0 0 0 0 1 0]. """ def __init__(self, max_labels, dim, **kwargs): """ Initialize an IndexSpace. Parameters ---------- max_labels : int The number of possible classes/labels. This means that all labels should be < max_labels. Example: For MNIST there are 10 numbers and hence max_labels = 10. dim : int The number of indices in one space e.g. for MNIST there is one target label and hence dim = 1. If we have an n-gram of word indices as input to a neurel net language model, dim = n. kwargs: passes on to superclass constructor """ super(IndexSpace, self).__init__(**kwargs) self.max_labels = max_labels self.dim = dim self.formatter = OneHotFormatter(self.max_labels) def __str__(self): """ Return a string representation. """ return '%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s)' % \ dict(classname=self.__class__.__name__, dim=self.dim, max_labels=self.max_labels) def __eq__(self, other): return (type(self) == type(other) and self.max_labels == other.max_labels and self.dim == other.dim) def __ne__(self, other): return (not self == other) @functools.wraps(Space.get_total_dimension) def get_total_dimension(self): return self.dim @functools.wraps(Space.np_format_as) def np_format_as(self, batch, space): if isinstance(space, VectorSpace): if self.max_labels == space.dim: rval = self.formatter.format(batch, sparse=space.sparse, mode='merge') elif self.dim * self.max_labels == space.dim: rval = self.formatter.format(batch, sparse=space.sparse, mode='concatenate') else: raise ValueError("Can't convert %s to %s" % (self, space)) return rval else: raise ValueError("Can't convert %s to %s" % (self, space)) @functools.wraps(Space._format_as) def _format_as(self, batch, space): """ Supports formatting to a VectorSpace where indices are represented by ones in a binary vector. """ if isinstance(space, VectorSpace): if self.max_labels == space.dim: rval = self.formatter.theano_expr(batch, sparse=space.sparse, mode='merge') elif self.dim * self.max_labels == space.dim: rval = self.formatter.theano_expr(batch, sparse=space.sparse, mode='concatenate') else: raise ValueError("Can't convert %s to %s" % (self, space)) return rval else: raise ValueError("Can't convert %s to %s" % (self, space)) @functools.wraps(Space.make_theano_batch) def make_theano_batch(self, name=None, dtype=None, batch_size=None): if batch_size == 1: rval = T.lrow(name=name) else: rval = T.lmatrix(name=name) return rval @functools.wraps(Space.batch_size) def batch_size(self, batch): self.validate(batch) return batch.shape[0] @functools.wraps(Space.np_batch_size) def np_batch_size(self, batch): self.np_validate(batch) return batch.shape[0] @functools.wraps(Space._validate) def _validate(self, batch): """ .. todo:: WRITEME """ if not isinstance(batch, theano.gof.Variable): raise TypeError("IndexSpace batch should be a theano Variable, " "got " + str(type(batch))) if not isinstance(batch.type, (theano.tensor.TensorType, CudaNdarrayType)): raise TypeError("VectorSpace batch should be TensorType or " "CudaNdarrayType, got "+str(batch.type)) if batch.ndim != 2: raise ValueError('IndexSpace batches must be 2D, got %d ' 'dimensions' % batch.ndim) for val in get_debug_values(batch): self.np_validate(val) @functools.wraps(Space._np_validate) def _np_validate(self, batch): # Use the 'CudaNdarray' string to avoid importing theano.sandbox.cuda # when it is not available if (not isinstance(batch, np.ndarray) and str(type(batch)) != "<type 'CudaNdarray'>"): raise TypeError("The value of a IndexSpace batch should be a " "numpy.ndarray, or CudaNdarray, but is %s." % str(type(batch))) if batch.ndim != 2: raise ValueError("The value of a IndexSpace batch must be " "2D, got %d dimensions for %s." % (batch.ndim, batch)) if batch.shape[1] != self.dim: raise ValueError("The width of a IndexSpace batch must match " "with the space's dimension, but batch has shape " "%s and dim = %d." % (str(batch.shape), self.dim))
dataset = yaml_parse.load(model.dataset_yaml_src) grid_shape = None # Number of choices for one-hot values rows = model.generator.condition_space.get_total_dimension() # Samples per condition sample_cols = 5 # Generate conditional information conditional_batch = model.generator.condition_space.make_theano_batch() formatter = OneHotFormatter(rows, dtype=model.generator.condition_space.dtype) conditional = formatter.theano_expr(conditional_batch, mode='concatenate') # Now sample from generator # For some reason format_as from VectorSpace is not working right topo_samples_batch = model.generator.sample(conditional) topo_sample_f = theano.function([conditional], topo_samples_batch) conditional_data = formatter.format(np.concatenate([np.repeat(i, sample_cols) for i in range(rows)]) .reshape((rows * sample_cols, 1)), mode='concatenate') topo_samples = topo_sample_f(conditional_data) samples = dataset.get_design_matrix(topo_samples) dataset.axes = ['b', 0, 1, 'c'] dataset.view_converter.axes = ['b', 0, 1, 'c'] topo_samples = dataset.get_topological_view(samples)