def test_dtype_errors():
    # Try to call theano_expr with a bad label dtype.
    raised = False
    fmt = OneHotFormatter(max_labels=50)
    try:
        fmt.theano_expr(theano.tensor.vector(dtype=theano.config.floatX))
    except TypeError:
        raised = True
    assert raised

    # Try to call format with a bad label dtype.
    raised = False
    try:
        fmt.format(numpy.zeros(10, dtype='float64'))
    except TypeError:
        raised = True
    assert raised
Exemple #2
0
def test_dtype_errors():
    # Try to call theano_expr with a bad label dtype.
    raised = False
    fmt = OneHotFormatter(max_labels=50)
    try:
        fmt.theano_expr(theano.tensor.vector(dtype=theano.config.floatX))
    except TypeError:
        raised = True
    assert raised

    # Try to call format with a bad label dtype.
    raised = False
    try:
        fmt.format(numpy.zeros(10, dtype='float64'))
    except TypeError:
        raised = True
    assert raised
Exemple #3
0
 def check_one_hot_formatter_symbolic(seed, max_labels, dtype, ncases):
     rng = numpy.random.RandomState(seed)
     fmt = OneHotFormatter(max_labels=max_labels, dtype=dtype)
     integer_labels = rng.random_integers(0, max_labels - 1, size=ncases)
     x = theano.tensor.vector(dtype='int64')
     y = fmt.theano_expr(x)
     f = theano.function([x], y)
     one_hot_labels = f(integer_labels)
     assert len(zip(*one_hot_labels.nonzero())) == ncases
     for case, label in enumerate(integer_labels):
         assert one_hot_labels[case, label] == 1
Exemple #4
0
def test_bad_arguments():
    # Make sure an invalid max_labels raises an error.
    raised = False
    try:
        fmt = OneHotFormatter(max_labels=-10)
    except ValueError:
        raised = True
    assert raised

    raised = False
    try:
        fmt = OneHotFormatter(max_labels='10')
    except ValueError:
        raised = True
    assert raised

    # Make sure an invalid dtype identifier raises an error.
    raised = False
    try:
        fmt = OneHotFormatter(max_labels=10, dtype='invalid')
    except TypeError:
        raised = True
    assert raised

    # Make sure an invalid ndim raises an error for format().
    fmt = OneHotFormatter(max_labels=10)
    raised = False
    try:
        fmt.format(numpy.zeros((2, 3, 4), dtype='int32'))
    except ValueError:
        raised = True
    assert raised

    # Make sure an invalid ndim raises an error for theano_expr().
    raised = False
    try:
        fmt.theano_expr(theano.tensor.itensor3())
    except ValueError:
        raised = True
    assert raised
def test_bad_arguments():
    # Make sure an invalid max_labels raises an error.
    raised = False
    try:
        fmt = OneHotFormatter(max_labels=-10)
    except ValueError:
        raised = True
    assert raised

    raised = False
    try:
        fmt = OneHotFormatter(max_labels='10')
    except ValueError:
        raised = True
    assert raised

    # Make sure an invalid dtype identifier raises an error.
    raised = False
    try:
        fmt = OneHotFormatter(max_labels=10, dtype='invalid')
    except TypeError:
        raised = True
    assert raised

    # Make sure an invalid ndim raises an error for format().
    fmt = OneHotFormatter(max_labels=10)
    raised = False
    try:
        fmt.format(numpy.zeros((2, 3), dtype='int32'))
    except ValueError:
        raised = True
    assert raised

    # Make sure an invalid ndim raises an error for theano_expr().
    raised = False
    try:
        fmt.theano_expr(theano.tensor.imatrix())
    except ValueError:
        raised = True
    assert raised
Exemple #6
0
class OneHotDistribution(Distribution):
    """Randomly samples from a distribution of one-hot vectors."""
    def __init__(self, space, rng=None):
        super(OneHotDistribution, self).__init__(space)

        self.dim = space.get_total_dimension()
        self.formatter = OneHotFormatter(self.dim, dtype=space.dtype)

        self.rng = RandomStreams() if rng is None else rng

    def sample(self, n):
        idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1)
        return self.formatter.theano_expr(idxs, mode='concatenate')
class OneHotDistribution(Distribution):
    """Randomly samples from a distribution of one-hot vectors."""

    def __init__(self, space, rng=None):
        super(OneHotDistribution, self).__init__(space)

        self.dim = space.get_total_dimension()
        self.formatter = OneHotFormatter(self.dim, dtype=space.dtype)

        self.rng = RandomStreams() if rng is None else rng

    def sample(self, n):
        idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1)
        return self.formatter.theano_expr(idxs, mode='concatenate')
Exemple #8
0
class IndexSpace(Space):
    """
    A space representing indices, for example MNIST labels (0-10) or the
    indices of words in a dictionary for NLP tasks. A single space can
    contain multiple indices, for example the word indices of an n-gram.

    IndexSpaces can be converted to VectorSpaces in two ways: Either the
    labels are converted into one-hot vectors which are then concatenated,
    or they are converted into a single vector where 1s indicate labels
    present i.e. for 4 possible labels we have [0, 2] -> [1 0 1 0] or
    [0, 2] -> [1 0 0 0 0 0 1 0].
    """
    def __init__(self, max_labels, dim, **kwargs):
        """
        Initialize an IndexSpace.

        Parameters
        ----------
        max_labels : int
            The number of possible classes/labels. This means that
            all labels should be < max_labels. Example: For MNIST
            there are 10 numbers and hence max_labels = 10.
        dim : int
            The number of indices in one space e.g. for MNIST there is
            one target label and hence dim = 1. If we have an n-gram
            of word indices as input to a neurel net language model, dim = n.
        kwargs: passes on to superclass constructor
        """

        super(IndexSpace, self).__init__(**kwargs)

        self.max_labels = max_labels
        self.dim = dim
        self.formatter = OneHotFormatter(self.max_labels)

    def __str__(self):
        """
        Return a string representation.
        """
        return '%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s' % \
               dict(classname=self.__class__.__name__,
                    dim=self.dim,
                    max_labels=self.max_labels)

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):
        return self.dim

    @functools.wraps(Space.np_format_as)
    def np_format_as(self, batch, space):
        if isinstance(space, VectorSpace):
            if self.max_labels == space.dim:
                rval = self.formatter.format(batch, sparse=space.sparse,
                                             mode='merge')
            elif self.dim * self.max_labels == space.dim:
                rval = self.formatter.format(batch, sparse=space.sparse,
                                             mode='concatenate')
            else:
                raise ValueError("Can't convert IndexSpace to"
                                 "VectorSpace (%d labels to %d dimensions)"
                                 % (self.dim, space.dim))
            return rval
        else:
            raise ValueError("Can't convert IndexSpace to %(space)s"
                             % (space.__class__.__name__))

    @functools.wraps(Space._format_as)
    def _format_as(self, batch, space):
        """
        Supports formatting to a VectorSpace where indices are represented
        by ones in a binary vector.
        """
        if isinstance(space, VectorSpace):
            if self.max_labels == space.dim:
                rval = self.formatter.theano_expr(batch, sparse=space.sparse,
                                                  mode='merge')
            elif self.dim * self.max_labels == space.dim:
                rval = self.formatter.theano_expr(batch, sparse=space.sparse,
                                                  mode='concatenate')
            else:
                raise ValueError("Can't convert IndexSpace to"
                                 "VectorSpace (%d labels to %d dimensions)"
                                 % (self.dim, space.dim))
            return rval
        else:
            raise ValueError("Can't convert IndexSpace to %(space)s"
                             % (space.__class__.__name__))

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        if batch_size == 1:
            rval = T.lrow(name=name)
        else:
            rval = T.lmatrix(name=name)
        return rval

    @functools.wraps(Space.batch_size)
    def batch_size(self, batch):
        self.validate(batch)
        return batch.shape[0]

    @functools.wraps(Space.np_batch_size)
    def np_batch_size(self, batch):
        self.np_validate(batch)
        return batch.shape[0]

    @functools.wraps(Space._validate)
    def _validate(self, batch):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(batch, theano.gof.Variable):
            raise TypeError("IndexSpace batch should be a theano Variable, "
                            "got " + str(type(batch)))
        if not isinstance(batch.type, (theano.tensor.TensorType,
                                       CudaNdarrayType)):
            raise TypeError("VectorSpace batch should be TensorType or "
                            "CudaNdarrayType, got "+str(batch.type))
        if batch.ndim != 2:
            raise ValueError('IndexSpace batches must be 2D, got %d '
                             'dimensions' % batch.ndim)
        for val in get_debug_values(batch):
            self.np_validate(val)

    @functools.wraps(Space._np_validate)
    def _np_validate(self, batch):
        # Use the 'CudaNdarray' string to avoid importing theano.sandbox.cuda
        # when it is not available
        if (not isinstance(batch, np.ndarray)
            and str(type(batch)) != "<type 'CudaNdarray'>"):
            raise TypeError("The value of a IndexSpace batch should be a "
                            "numpy.ndarray, or CudaNdarray, but is %s."
                            % str(type(batch)))
        if batch.ndim != 2:
            raise ValueError("The value of a IndexSpace batch must be "
                             "2D, got %d dimensions for %s." % (batch.ndim,
                                                                batch))
        if batch.shape[1] != self.dim:
            raise ValueError("The width of a IndexSpace batch must match "
                             "with the space's dimension, but batch has shape "
                             "%s and dim = %d." % (str(batch.shape), self.dim))
Exemple #9
0
class IndexSpace(Space):
    """
    A space representing indices, for example MNIST labels (0-10) or the
    indices of words in a dictionary for NLP tasks. A single space can
    contain multiple indices, for example the word indices of an n-gram.

    IndexSpaces can be converted to VectorSpaces in two ways: Either the
    labels are converted into one-hot vectors which are then concatenated,
    or they are converted into a single vector where 1s indicate labels
    present i.e. for 4 possible labels we have [0, 2] -> [1 0 1 0] or
    [0, 2] -> [1 0 0 0 0 0 1 0].
    """
    def __init__(self, max_labels, dim, **kwargs):
        """
        Initialize an IndexSpace.

        Parameters
        ----------
        max_labels : int
            The number of possible classes/labels. This means that
            all labels should be < max_labels. Example: For MNIST
            there are 10 numbers and hence max_labels = 10.
        dim : int
            The number of indices in one space e.g. for MNIST there is
            one target label and hence dim = 1. If we have an n-gram
            of word indices as input to a neurel net language model, dim = n.
        kwargs: passes on to superclass constructor
        """

        super(IndexSpace, self).__init__(**kwargs)

        self.max_labels = max_labels
        self.dim = dim
        self.formatter = OneHotFormatter(self.max_labels)

    def __str__(self):
        """
        Return a string representation.
        """
        return '%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s)' % \
               dict(classname=self.__class__.__name__,
                    dim=self.dim,
                    max_labels=self.max_labels)

    def __eq__(self, other):
        return (type(self) == type(other) and
                self.max_labels == other.max_labels and
                self.dim == other.dim)

    def __ne__(self, other):
        return (not self == other)

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):
        return self.dim

    @functools.wraps(Space.np_format_as)
    def np_format_as(self, batch, space):
        if isinstance(space, VectorSpace):
            if self.max_labels == space.dim:
                rval = self.formatter.format(batch, sparse=space.sparse,
                                             mode='merge')
            elif self.dim * self.max_labels == space.dim:
                rval = self.formatter.format(batch, sparse=space.sparse,
                                             mode='concatenate')
            else:
                raise ValueError("Can't convert %s to %s"
                                 % (self, space))
            return rval
        else:
            raise ValueError("Can't convert %s to %s"
                             % (self, space))

    @functools.wraps(Space._format_as)
    def _format_as(self, batch, space):
        """
        Supports formatting to a VectorSpace where indices are represented
        by ones in a binary vector.
        """
        if isinstance(space, VectorSpace):
            if self.max_labels == space.dim:
                rval = self.formatter.theano_expr(batch, sparse=space.sparse,
                                                  mode='merge')
            elif self.dim * self.max_labels == space.dim:
                rval = self.formatter.theano_expr(batch, sparse=space.sparse,
                                                  mode='concatenate')
            else:
                raise ValueError("Can't convert %s to %s"
                                 % (self, space))
            return rval
        else:
            raise ValueError("Can't convert %s to %s"
                             % (self, space))

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        if batch_size == 1:
            rval = T.lrow(name=name)
        else:
            rval = T.lmatrix(name=name)
        return rval

    @functools.wraps(Space.batch_size)
    def batch_size(self, batch):
        self.validate(batch)
        return batch.shape[0]

    @functools.wraps(Space.np_batch_size)
    def np_batch_size(self, batch):
        self.np_validate(batch)
        return batch.shape[0]

    @functools.wraps(Space._validate)
    def _validate(self, batch):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(batch, theano.gof.Variable):
            raise TypeError("IndexSpace batch should be a theano Variable, "
                            "got " + str(type(batch)))
        if not isinstance(batch.type, (theano.tensor.TensorType,
                                       CudaNdarrayType)):
            raise TypeError("VectorSpace batch should be TensorType or "
                            "CudaNdarrayType, got "+str(batch.type))
        if batch.ndim != 2:
            raise ValueError('IndexSpace batches must be 2D, got %d '
                             'dimensions' % batch.ndim)
        for val in get_debug_values(batch):
            self.np_validate(val)

    @functools.wraps(Space._np_validate)
    def _np_validate(self, batch):
        # Use the 'CudaNdarray' string to avoid importing theano.sandbox.cuda
        # when it is not available
        if (not isinstance(batch, np.ndarray)
            and str(type(batch)) != "<type 'CudaNdarray'>"):
            raise TypeError("The value of a IndexSpace batch should be a "
                            "numpy.ndarray, or CudaNdarray, but is %s."
                            % str(type(batch)))
        if batch.ndim != 2:
            raise ValueError("The value of a IndexSpace batch must be "
                             "2D, got %d dimensions for %s." % (batch.ndim,
                                                                batch))
        if batch.shape[1] != self.dim:
            raise ValueError("The width of a IndexSpace batch must match "
                             "with the space's dimension, but batch has shape "
                             "%s and dim = %d." % (str(batch.shape), self.dim))
dataset = yaml_parse.load(model.dataset_yaml_src)

grid_shape = None

# Number of choices for one-hot values
rows = model.generator.condition_space.get_total_dimension()

# Samples per condition
sample_cols = 5

# Generate conditional information
conditional_batch = model.generator.condition_space.make_theano_batch()
formatter = OneHotFormatter(rows,
                            dtype=model.generator.condition_space.dtype)
conditional = formatter.theano_expr(conditional_batch, mode='concatenate')

# Now sample from generator
# For some reason format_as from VectorSpace is not working right
topo_samples_batch = model.generator.sample(conditional)
topo_sample_f = theano.function([conditional], topo_samples_batch)
conditional_data = formatter.format(np.concatenate([np.repeat(i, sample_cols) for i in range(rows)])
                                      .reshape((rows * sample_cols, 1)),
                                    mode='concatenate')
topo_samples = topo_sample_f(conditional_data)

samples = dataset.get_design_matrix(topo_samples)
dataset.axes = ['b', 0, 1, 'c']
dataset.view_converter.axes = ['b', 0, 1, 'c']
topo_samples = dataset.get_topological_view(samples)