Beispiel #1
0
        def getFeatures(indexes):
            """
            .. todo::
                Write me
            """
            if self._load_to_memory:
                sequences = [self.samples_sequences[i] for i in indexes]
            else:
                sequences = [self.node[i] for i in indexes]
            # Get random source word index for "ngram"
            source_i = [numpy.random.randint(self.frame_length/2 +1, len(s)-self.frame_length/2, 1)[0] 
                        for s in sequences]
            target_i = [min(abs(int(numpy.random.normal(s_i, self.frame_length/3.0))), len(s)-1)
                        for s_i, s in safe_izip(source_i, sequences)]
            

            # Words mapped to integers greater than input max are set to 1 (unknown)
            X = [numpy.asarray([s[i]]) for i, s in safe_izip(source_i, sequences)]
            X[X>=self.X_labels] = numpy.asarray([1])
            X = numpy.asarray(X)
            y = [numpy.asarray([s[i]]) for i, s in safe_izip(target_i, sequences)]
            y[y>=self.X_labels] = numpy.asarray([1])
            y = numpy.asarray(y)
            # Store the targets generated by these indices.
            self.lastY = (y, indexes)
            #print X
            #print y
            return X
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 rng=None, data_specs=None, return_tuple=False):
        allowed_modes = ('sequential', 'random_slice', 'even_sequential',
                         'batchwise_shuffled_sequential',
                         'even_batchwise_shuffled_sequential')
        if mode is not None and mode not in allowed_modes:
            raise ValueError("Due to HDF5 limitations on advanced indexing, " +
                             "the '" + mode + "' iteration mode is not " +
                             "supported")

        if data_specs is None:
            data_specs = self._iter_data_specs

        space, source = data_specs
        sub_spaces, sub_sources = (
            (space.components, source) if isinstance(space, CompositeSpace)
            else ((space,), (source,)))
        convert = [None for sp, src in safe_izip(sub_spaces, sub_sources)]

        mode = (self._iter_subset_class if mode is None
                else resolve_iterator_class(mode))

        if batch_size is None:
            batch_size = getattr(self, '_iter_batch_size', None)
        if num_batches is None:
            num_batches = getattr(self, '_iter_num_batches', None)
        if rng is None and mode.stochastic:
            rng = self.rng
        return VariableImageDatasetIterator(
            dataset=self,
            subset_iterator=mode(
                self.num_examples, batch_size, num_batches, rng),
            data_specs=data_specs,
            return_tuple=return_tuple,
            convert=convert)
Beispiel #3
0
    def next(self):
        next_index = self._subset_iterator.next()
        rvals = []
        if hasattr(self._dataset, 'get'):
            raw_data = self._next(next_index)
        else:
            raw_data = self._fallback_next(next_index)
        for (space, source, data, fn) in safe_izip(self._space, self._source,
                                                   #self._raw_data,
                                                   raw_data,
                                                   self._convert):
            rval = data
            if isinstance(space, SequenceDataSpace):
                # Add padding
                max_sequence_length = max(len(sample) for sample
                                          in data)
                batch = np.zeros((len(rval), max_sequence_length,
                                  space.dim), dtype=space.dtype)
                for i, sample in enumerate(rval):
                    batch[i, :len(sample)] = sample
                rvals.append(np.transpose(batch, (1, 0, 2)))

                # Create mask
                rvals.append(self._create_mask(rval))
            else:
                rvals.append(rval)

        # Reorder according to given data specs

        if not self._return_tuple and len(rval) == 1:
            rvals, = rvals
        return tuple(rvals)
Beispiel #4
0
    def __call__(self, model, X, Y=None, **kwargs):
        def wrapped_layer_cost(layer, coef):
            try:
                return layer.get_weight_decay(coeff)
            except NotImplementedError:
                if coef == 0.:
                    return 0.
                else:
                    raise NotImplementedError(
                        str(type(layer)) +
                        " does not implement get_weight_decay.")

        layer_costs = [
            wrapped_layer_cost(layer, coeff)
            for layer, coeff in safe_izip(model.layers, self.coeffs)
        ]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'MLP_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #5
0
    def next(self):
        """
        Retrieves the next batch of examples.

        Returns
        -------
        next_batch : object
            An object representing a mini-batch of data, conforming
            to the space specified in the `data_specs` constructor
            argument to this iterator. Will be a tuple if more
            than one data source was specified or if the constructor
            parameter `return_tuple` was `True`.

        Raises
        ------
        StopIteration
            When there are no more batches to return.
        """
        next_index = self._subset_iterator.next()
        # TODO: handle fancy-index copies by allocating a buffer and
        # using np.take()

        rval = tuple(
            fn(data[next_index]) if fn else data[next_index]
            for data, fn in safe_izip(self._raw_data, self._convert))
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #6
0
    def next(self):
        """
        Retrieves the next batch of examples.

        Returns
        -------
        next_batch : object
            An object representing a mini-batch of data, conforming
            to the space specified in the `data_specs` constructor
            argument to this iterator. Will be a tuple if more
            than one data source was specified or if the constructor
            parameter `return_tuple` was `True`.

        Raises
        ------
        StopIteration
            When there are no more batches to return.
        """
        next_index = self._subset_iterator.next()
        # TODO: handle fancy-index copies by allocating a buffer and
        # using np.take()

        rval = tuple(
            fn(data[next_index]) if fn else data[next_index]
            for data, fn in safe_izip(self._raw_data, self._convert))
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #7
0
    def next(self):
        next_index = self._subset_iterator.next()
        rvals = []
        for space, source, data, fn in safe_izip(self._space, self._source,
                                                 self._raw_data,
                                                 self._convert):
            rval = data[next_index]
            if isinstance(space, SequenceDataSpace):
                # Add padding
                max_sequence_length = max(len(sample) for sample in rval)
                batch = np.zeros(
                    (len(rval), max_sequence_length) + data[0].shape[1:],
                    dtype=data[0].dtype)
                for i, sample in enumerate(rval):
                    batch[i, :len(sample)] = sample
                rval = np.transpose(batch, (1, 0, 2))
                if fn:
                    rval = fn(rval)
                rvals.append(rval)

                # Create mask
                if source in self.mask_needed:
                    rvals.append(self._create_mask(rval))
            else:
                if fn:
                    rval = fn(rval)
                rvals.append(rval)

        # Reorder according to given data specs

        if not self._return_tuple and len(rval) == 1:
            rvals, = rvals
        return tuple(rvals)
Beispiel #8
0
    def expr(self, model, data, **kwargs):
        """
        .. todo::

            WRITEME
        """
        self.get_data_specs(model)[0].validate(data)
        layer_costs = [
            layer.get_weight_decay(coeff)
            for layer, coeff in safe_izip(model.hidden_layers, self.coeffs)
        ]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'DBM_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #9
0
    def __call__(self, model, X, Y = None, ** kwargs):

        def wrapped_layer_cost(layer, coef):
            try:
                return layer.get_weight_decay(coeff)
            except NotImplementedError:
                if coef==0.:
                    return 0.
                else:
                    raise NotImplementedError(str(type(layer))+" does not implement get_weight_decay.")


        layer_costs = [ wrapped_layer_cost(layer, coeff)
            for layer, coeff in safe_izip(model.layers, self.coeffs) ]

        assert T.scalar() != 0. # make sure theano semantics do what I want
        layer_costs = [ cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval =  T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'MLP_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #10
0
 def _fallback_next(self, next_index):
     # TODO: handle fancy-index copies by allocating a buffer and
     # using np.take()
     return tuple(
         fn(data[next_index]) if fn else data[next_index]
         for data, fn in safe_izip(self._raw_data, self._convert)
     )
Beispiel #11
0
    def get_monitoring_channels(self, X=None, Y=None):
        """
        Note: X and Y may both be None, in the case when this is
              a layer of a bigger MLP.
        """

        state = X
        rval = OrderedDict()

        for layer, scale in safe_izip(self.mlp.layers, self._params):
            state = self.scale(state, layer, scale)
            ch = layer.get_monitoring_channels()
            for key in ch:
                rval[layer.layer_name+'_'+key] = ch[key]
            state = layer.fprop(state)
            args = [state]
            if layer is self.mlp.layers[-1]:
                args.append(Y)
            ch = layer.get_monitoring_channels_from_state(*args)
            for key in ch:
                rval[layer.layer_name+'_'+key]  = ch[key]

        for i in xrange(len(self._params)):
            rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].min()
            rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].mean()
            rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].max()

        return rval
Beispiel #12
0
    def next(self):

        # next numerical index
        next_file_index = self._subset_iterator.next()

        # associate numerical index with file from the dataset
        next_file = self._dataset.file_list[next_file_index][
            0]  # !!! added line to iterate over different index set !!!

        # lookup file's position in the hdf5 array
        offset, nframes, key, target = self._dataset.file_index[next_file]

        thop = 1.  # hardcoded and must match prepare_dataset.py!!!
        sup = np.arange(0, nframes - self._dataset.tframes,
                        np.int(self._dataset.tframes / thop))
        next_index = offset + sup

        spaces, sources = self._data_specs
        output = []

        for data, fn, source, space in safe_izip(self._raw_data, self._convert,
                                                 sources, spaces.components):
            if source == 'targets':
                # if fn:
                #     output.append( fn( np.reshape(data[next_index[0], :], (1,-1)) ) )
                # else:
                #     output.append( np.reshape(data[next_index[0], :], (1,-1)) )
                output.append(target)
            else:
                design_mat = []
                for index in next_index:
                    if 0:  #space.dtype=='complex64':
                        X = data[index:index +
                                 self._dataset.tframes, :]  # return phase too
                    else:
                        X = np.abs(data[index:index +
                                        self._dataset.tframes, :])
                    design_mat.append(X.reshape((np.prod(X.shape), )))

                design_mat = np.vstack(design_mat)

                if self._dataset.tframes > 1:
                    # ideally we'd standardize in a preprocessing layer
                    # (so that standardization is built-in to the model rather
                    # than the dataset) but i haven't quite figured out how to do
                    # this yet for images, due to a memory error associated with
                    # a really big diagonal scaling matrix
                    # (however, it works fine for vectors)
                    design_mat = self._dataset.standardize(design_mat)

                if fn:
                    output.append(fn(design_mat))
                else:
                    output.append(design_mat)

        output.append(next_file)
        rval = tuple(output)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #13
0
    def expr(self, model, data, ** kwargs):
        """
        .. todo::

            WRITEME
        """
        self.get_data_specs(model)[0].validate(data)
        layer_costs = [layer.get_weight_decay(coeff)
                       for layer, coeff in safe_izip(model.layers,
                                                     self.coeffs)]

        assert T.scalar() != 0. # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'RNN_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #14
0
    def add_dataset(self, dataset, mode="sequential", batch_size=None, num_batches=None, seed=None):
        """
        Determines the data used to calculate the values of each channel.

        Parameters
        ----------
        dataset : object
            A `pylearn2.datasets.Dataset` object.
        mode : str or object, optional
            Iteration mode; see the docstring of the `iterator` method
            on `pylearn2.datasets.Dataset` for details.
        batch_size : int, optional
            The size of an individual batch. Optional if `mode` is
            'sequential' and `num_batches` is specified (batch size
            will be calculated based on full dataset size).
        num_batches : int, optional
            The total number of batches. Unnecessary if `mode` is
            'sequential' and `batch_size` is specified (number of
            batches will be calculated based on full dataset size).
        """
        # The user can ommit using lists if only one dataset is set
        if not isinstance(dataset, list):
            dataset = [dataset]
        if not isinstance(mode, list):
            mode = [mode]
        if not isinstance(batch_size, list):
            batch_size = [batch_size]
        if not isinstance(num_batches, list):
            num_batches = [num_batches]
        if seed is None:
            seed = [None] * len(dataset)
        if not isinstance(seed, list):
            seed = [seed]
        if any([len(l) != len(dataset) for l in [mode, batch_size, seed]]):
            raise ValueError("make sure each dataset has its iteration " + "mode, batch size and number of batches.")
        for (d, m, b, n, sd) in safe_izip(dataset, mode, batch_size, num_batches, seed):
            try:
                it = d.iterator(mode=m, batch_size=b, num_batches=n, topo=self.topo, targets=self.require_label, rng=sd)
            except ValueError as exc:
                raise ValueError("invalid iteration parameters in " "Monitor.add_dataset: " + str(exc))
            if it.stochastic:
                # must be a seed, not a random number generator
                # if it were a random number generator, different iterators using
                # it would update its state, so we would not get the same iterator
                # each time
                # Also, must not be None, because this makes the iterator pick
                # a seed based on the clock
                if not isinstance(sd, (list, tuple, int)):
                    raise TypeError(
                        "Monitor requires a seed (not a random number generator) when using stochastic iteration modes."
                    )
            else:
                assert sd is None  # the iterator should catch this, but let's double-check

            if not d in self._datasets:
                self._datasets.append(d)
                self._iteration_mode.append(m)
                self._batch_size.append(b)
                self._num_batches.append(n)
                self._rng_seed.append(sd)
Beispiel #15
0
    def next(self):
        next_index = self._subset_iterator.next()
        rvals = []
        for space, source, data, fn in safe_izip(self._space, self._source,
                                                 self._raw_data,
                                                 self._convert):
            rval = data[next_index]
            if isinstance(space, SequenceDataSpace):
                # Add padding
                max_sequence_length = max(len(sample) for sample
                                          in rval)
                batch = np.zeros((len(rval), max_sequence_length) +
                                 data[0].shape[1:], dtype=data[0].dtype)
                for i, sample in enumerate(rval):
                    batch[i, :len(sample)] = sample

                # Create mask
                if source in self.mask_needed:
                    mask = self._create_mask(rval)
                rval = np.swapaxes(batch, 0, 1)
                if fn:
                    rval = fn(rval)
                rvals.append(rval)
                if source in self.mask_needed:
                    rvals.append(mask)
            else:
                if fn:
                    rval = fn(rval)
                rvals.append(rval)

        # Reorder according to given data specs

        if not self._return_tuple and len(rval) == 1:
            rvals, = rvals
        return tuple(rvals)
    def next(self):
        next_index = self._subset_iterator.next()
        rval = tuple(
            fn(batch) if fn else batch for batch, fn in safe_izip(
                self._dataset.get(self._source, next_index), self._convert))

        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #17
0
        def getFeatures(indexes):
            """
            .. todo::
                Write me
            """
            if self._load_to_memory:
                sequences = [self.samples_sequences[i] for i in indexes]
            else:
                sequences = [self.node[i] for i in indexes]
            # Get random start point for ngram
            # Get random source word index for "ngram"
            source_i = [numpy.random.randint(self.frame_length/2 +1, len(s)-self.frame_length/2, 1)[0] 
                        for s in sequences]
            target_i = [min(abs(int(numpy.random.normal(s_i, self.frame_length/3.0))), len(s)-1)
                        for s_i, s in safe_izip(source_i, sequences)]
            preX = [s[i] for i, s in safe_izip(source_i, sequences)]
            X = []
            def make_sequence(word):
                string = self._inv_words[word]
                #if len(string) < 1:
                    #print "Word index", word, "Returns empty word"
                seq = map(lambda c: [self._char_labels.get(c, 0)], self._inv_words[word])
                #if len(seq) < 1:
                   # print "Word index", word, "Returns empty sequence", string
                seq.append([self._eow])
                return numpy.asarray(seq)

            for word in preX:
                X.append(make_sequence(word))
            X = numpy.asarray(X)
            y = [numpy.asarray([s[i]]) for i, s in safe_izip(target_i, sequences)]
            #y[y>=30000] = numpy.asarray([1])
            y = numpy.asarray(y)
            bad_is = numpy.where(y >= 30000)
            y[bad_is] =  numpy.asarray([1])
            
            # Target Words mapped to integers greater than input max are set to 
            # 1 (unknown)

            # Store the targets generated by these indices.
            self.lastY = (y, indexes)
            if self._use_words:
                self.lastPreX = preX
            return X
Beispiel #18
0
    def next(self):
        """
        Retrieves the next batch of examples.

        Returns
        -------
        next_batch : object
            An object representing a mini-batch of data, conforming
            to the space specified in the `data_specs` constructor
            argument to this iterator. Will be a tuple if more
            than one data source was specified or if the constructor
            parameter `return_tuple` was `True`.

        Raises
        ------
        StopIteration
            When there are no more batches to return.
        """
        next_index = self._subset_iterator.next()
        next_index = self._dataset.support[ next_index ] # !!! added line to iterate over different index set !!!

        spaces, sources = self._data_specs
        output = []                

        for data, fn, source in safe_izip(self._raw_data, self._convert, sources):
            if source=='targets':
                if fn:
                    output.append( fn(data[next_index, :]) )
                else:
                    output.append( data[next_index, :] )
            else:
                design_mat = []
                for index in next_index:                    
                    X = np.abs(data[index:index+self._dataset.tframes, :])
                    design_mat.append( X.reshape((np.prod(X.shape),)) ) 

                design_mat = np.vstack(design_mat)
                
                if self._dataset.tframes > 1:
                    # ideally we'd standardize in a preprocessing layer
                    # (so that standardization is built-in to the model rather
                    # than the dataset) but i haven't quite figured out how to do 
                    # this yet for images, due to a memory error associated with
                    # a really big diagonal scaling matrix
                    # (however, it works fine for vectors)
                    design_mat = self._dataset.standardize(design_mat)

                if fn:
                    output.append( fn(design_mat) )
                else:
                    output.append( design_mat )

        rval = tuple(output)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #19
0
    def next(self):
        
        # next numerical index
        next_file_index = self._subset_iterator.next()        
        
        # associate numerical index with file from the dataset
        next_file = self._dataset.file_list[ next_file_index ][0] # !!! added line to iterate over different index set !!!
        
        # lookup file's position in the hdf5 array
        offset, nframes, key, target = self._dataset.file_index[next_file]
        
        thop = 1. # hardcoded and must match prepare_dataset.py!!!
        sup = np.arange(0,nframes-self._dataset.tframes,np.int(self._dataset.tframes/thop))        
        next_index = offset + sup


        spaces, sources = self._data_specs
        output = []                

        for data, fn, source, space in safe_izip(self._raw_data, self._convert, sources, spaces.components):
            if source=='targets':
                # if fn:
                #     output.append( fn( np.reshape(data[next_index[0], :], (1,-1)) ) )
                # else:
                #     output.append( np.reshape(data[next_index[0], :], (1,-1)) )
                output.append( target )
            else:
                design_mat = []
                for index in next_index:
                    if 0:#space.dtype=='complex64':
                        X = data[index:index+self._dataset.tframes, :] # return phase too
                    else:
                        X = np.abs(data[index:index+self._dataset.tframes, :])
                    design_mat.append( X.reshape((np.prod(X.shape),)) )

                design_mat = np.vstack(design_mat)
                if self._dataset.tframes > 1:
                    # ideally we'd standardize in a preprocessing layer
                    # (so that standardization is built-in to the model rather
                    # than the dataset) but i haven't quite figured out how to do 
                    # this yet for images, due to a memory error associated with
                    # a really big diagonal scaling matrix
                    # (however, it works fine for vectors)                    
                    design_mat = self._dataset.standardize(design_mat)
                
                if fn:
                    output.append( fn(design_mat) )
                else:
                    output.append( design_mat )
        
        output.append(next_file)
        rval = tuple(output)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
    def next(self):
        next_index = self._subset_iterator.next()
        rval = tuple(
            fn(batch) if fn else batch for batch, fn in
            safe_izip(self._dataset.get(self._source, next_index),
                      self._convert)
        )

        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
Beispiel #21
0
def get_objs():
    n = 0.
    aves = [0. for model in models]
    m = 0
    for X, Y in train.iterator(batch_size = 5000, mode='sequential', targets=True):
        objs = [func(X, Y) for func in funcs]
        n += 1.
        aves = [ave + (obj - ave) / n for ave, obj in safe_izip(aves, objs)]
        m += X.shape[0]
    if m != 10000:
        raise AssertionError(str(m))
    return aves
Beispiel #22
0
    def expr(self, model, data, **kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(abs(weights))
            added up for each set of weights.
        """

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        self.get_data_specs(model)[0].validate(data)
        if isinstance(self.coeffs, list):
            warnings.warn("Coefficients should be given as a dictionary "
                          "with layer names as key. The support of "
                          "coefficients as list would be deprecated "
                          "from 03/06/2015")
            layer_costs = [
                layer.get_l1_weight_decay(coeff)
                for layer, coeff in safe_izip(model.layers, self.coeffs)
            ]
            layer_costs = [cost for cost in layer_costs if cost != 0.]

        else:
            layer_costs = []
            for layer in model.layers:
                layer_name = layer.layer_name
                if layer_name in self.coeffs:
                    cost = layer.get_l1_weight_decay(self.coeffs[layer_name])
                    if cost != 0.:
                        layer_costs.append(cost)

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_l1_penalty'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_L1Penalty'

        assert total_cost.ndim == 0

        total_cost.name = 'l1_penalty'

        return total_cost
Beispiel #23
0
    def expr(self, model, data, ** kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(abs(weights))
            added up for each set of weights.
        """

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        self.get_data_specs(model)[0].validate(data)
        if isinstance(self.coeffs, list):
            warnings.warn("Coefficients should be given as a dictionary "
                          "with layer names as key. The support of "
                          "coefficients as list would be deprecated "
                          "from 03/06/2015")
            layer_costs = [layer.get_l1_weight_decay(coeff)
                           for layer, coeff in safe_izip(model.layers,
                                                         self.coeffs)]
            layer_costs = [cost for cost in layer_costs if cost != 0.]

        else:
            layer_costs = []
            for layer in model.layers:
                layer_name = layer.layer_name
                if layer_name in self.coeffs:
                    cost = layer.get_l1_weight_decay(self.coeffs[layer_name])
                    if cost != 0.:
                        layer_costs.append(cost)

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_l1_penalty'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_L1Penalty'

        assert total_cost.ndim == 0

        total_cost.name = 'l1_penalty'

        return total_cost
Beispiel #24
0
    def expr(self, model, data, **kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(sqr(weights))
            added up for each set of weights.
        """
        self.get_data_specs(model)[0].validate(data)

        def wrapped_layer_cost(layer, coef):
            try:
                return layer.get_weight_decay(coeff)
            except NotImplementedError:
                if coef == 0.:
                    return 0.
                else:
                    reraise_as(
                        NotImplementedError(
                            str(type(layer)) +
                            " does not implement get_weight_decay."))

        layer_costs = [
            wrapped_layer_cost(layer, coeff)
            for layer, coeff in safe_izip(model.layers, self.coeffs)
        ]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
    def get(self, source, indexes):
        """
        Returns required examples for the required data sources, e.g. the first
        ten features and targets pairs or the last five targets

        Parameters
        ----------
        source : tuple of str
            Tuple of source names
        indexes : slice
            Examples to fetch
        """
        assert type(indexes) is slice
        # Translate indexes to stay in the [start, stop] range
        indexes = slice(indexes.start + self.start, indexes.stop + self.start,
                        indexes.step)
        # Make sure that requested sources are provided by the dataset
        self._validate_source(source)

        rval = []
        # Axes for a single example
        single_axes = [a for a in self.axes if a != 'b']
        for so in source:
            if so == 'features':
                images = self.X[indexes]
                shapes = self.s[indexes]
                space = self.data_specs[0].components[0]
                # If batch size has changed, reallocate a buffer
                if self.X_buffer is None or len(self.X_buffer) != len(images):
                    self.X_buffer = space.get_origin_batch(len(images))
                for i, (img, s) in enumerate(safe_izip(images, shapes)):
                    # Transpose image in 'b01c' format to comply with
                    # transformer interface
                    b01c = img.reshape(s).transpose(
                        [single_axes.index(a) for a in (0, 1, 'c')])
                    # Assign i'th example in the batch with the preprocessed
                    # image
                    self.X_buffer.transpose(
                        [('b', 0, 1, 'c').index(a) for a in self.axes]
                    )[i] = self.transformer(b01c)
                if self.rescale is not None:
                    self.X_buffer /= self.rescale
                rval.append(self.X_buffer)
            elif so == 'targets':
                targets = self.y[indexes]
                space = self.data_specs[0].components[1]
                # If batch size has changed, reallocate a buffer
                if self.y_buffer is None or len(self.y_buffer) != len(targets):
                    self.y_buffer = space.get_origin_batch(len(targets))
                rval.append(self.y[indexes])
        return tuple(rval)
    def get(self, source, indexes):
        """
        Returns required examples for the required data sources, e.g. the first
        ten features and targets pairs or the last five targets

        Parameters
        ----------
        source : tuple of str
            Tuple of source names
        indexes : slice
            Examples to fetch
        """
        assert type(indexes) is slice
        # Translate indexes to stay in the [start, stop] range
        indexes = slice(indexes.start + self.start, indexes.stop + self.start,
                        indexes.step)
        # Make sure that requested sources are provided by the dataset
        self._validate_source(source)

        rval = []
        # Axes for a single example
        single_axes = [a for a in self.axes if a != 'b']
        for so in source:
            if so == 'features':
                images = self.X[indexes]
                shapes = self.s[indexes]
                space = self.data_specs[0].components[0]
                # If batch size has changed, reallocate a buffer
                if self.X_buffer is None or len(self.X_buffer) != len(images):
                    self.X_buffer = space.get_origin_batch(len(images))
                for i, (img, s) in enumerate(safe_izip(images, shapes)):
                    # Transpose image in 'b01c' format to comply with
                    # transformer interface
                    b01c = img.reshape(s).transpose(
                        [single_axes.index(a) for a in (0, 1, 'c')])
                    # Assign i'th example in the batch with the preprocessed
                    # image
                    self.X_buffer.transpose([
                        ('b', 0, 1, 'c').index(a) for a in self.axes
                    ])[i] = self.transformer(b01c)
                if self.rescale is not None:
                    self.X_buffer /= self.rescale
                rval.append(self.X_buffer)
            elif so == 'targets':
                targets = self.y[indexes]
                space = self.data_specs[0].components[1]
                # If batch size has changed, reallocate a buffer
                if self.y_buffer is None or len(self.y_buffer) != len(targets):
                    self.y_buffer = space.get_origin_batch(len(targets))
                rval.append(self.y[indexes])
        return tuple(rval)
Beispiel #27
0
def get_objs():
    n = 0.
    aves = [0. for model in models]
    m = 0
    for X, Y in train.iterator(batch_size=5000,
                               mode='sequential',
                               targets=True):
        objs = [func(X, Y) for func in funcs]
        n += 1.
        aves = [ave + (obj - ave) / n for ave, obj in safe_izip(aves, objs)]
        m += X.shape[0]
    if m != 60000:
        raise AssertionError(str(m))
    return aves
Beispiel #28
0
    def __init__(self,
                 dataset,
                 data_specs,
                 subset_iterator,
                 return_tuple=False,
                 convert=None):
        # Unpack the data specs into two tuples
        space, source = data_specs
        if not isinstance(source, tuple):
            source = (source, )

        # Remove the requested mask from the data specs before calling
        # the parent constructor
        self._original_source = source
        mask_seen, sequence_seen = False, False
        self.mask_needed = []
        retain = []
        for i, (subspace,
                subsource) in enumerate(safe_izip(space.components, source)):
            if isinstance(subspace, SequenceMaskSpace):
                if not subsource.endswith('_mask') or \
                        subsource[:-5] not in source:
                    raise ValueError("SequenceDatasetIterator received "
                                     "data_specs containing a "
                                     "SequenceMaskSpace with corresponding "
                                     "source %s, but the source should end "
                                     "with `_mask` in order to match it to the"
                                     "correct SequenceDataSpace")
                mask_seen = True
                self.mask_needed.append(subsource[:-5])
            else:
                retain.append(i)
                if isinstance(subspace, SequenceDataSpace):
                    sequence_seen = True
        if mask_seen != sequence_seen and i + 1 != len(retain):
            raise ValueError("SequenceDatasetIterator was asked to iterate "
                             "over a sequence mask without data or vice versa")
        space = space.restrict(retain)
        source = tuple(source[i] for i in retain)
        super(SequenceDatasetIterator,
              self).__init__(dataset,
                             subset_iterator, (space, source),
                             return_tuple=return_tuple,
                             convert=convert)
        if not isinstance(space, CompositeSpace):
            space = (space, )
        else:
            space = space.components
        assert len(space) == len(source)
        self._original_space = space
Beispiel #29
0
    def expr(self, model, data, ** kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(sqr(weights))
            added up for each set of weights.
        """
        self.get_data_specs(model)[0].validate(data)

        def wrapped_layer_cost(layer, coef):
            try:
                return layer.get_weight_decay(coeff)
            except NotImplementedError:
                if coef == 0.:
                    return 0.
                else:
                    reraise_as(NotImplementedError(str(type(layer)) +
                               " does not implement get_weight_decay."))

        layer_costs = [wrapped_layer_cost(layer, coeff)
                       for layer, coeff
                       in safe_izip(model.layers, self.coeffs)]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #30
0
    def __init__(self, dataset, data_specs, subset_iterator,
                 return_tuple=False, convert=None):
        # Unpack the data specs into two tuples
        space, source = data_specs
        if not isinstance(source, tuple):
            source = (source,)

        # Remove the requested mask from the data specs before calling
        # the parent constructor
        self._original_source = source
        mask_seen, sequence_seen = False, False
        self.mask_needed = []
        retain = []
        for i, (subspace, subsource) in enumerate(safe_izip(space.components,
                                                            source)):
            if isinstance(subspace, SequenceMaskSpace):
                if not subsource.endswith('_mask') or \
                        subsource[:-5] not in source:
                    raise ValueError("SequenceDatasetIterator received "
                                     "data_specs containing a "
                                     "SequenceMaskSpace with corresponding "
                                     "source %s, but the source should end "
                                     "with `_mask` in order to match it to the"
                                     "correct SequenceDataSpace")
                mask_seen = True
                self.mask_needed.append(subsource[:-5])
            else:
                retain.append(i)
                if isinstance(subspace, SequenceDataSpace):
                    sequence_seen = True
        if mask_seen != sequence_seen and i + 1 != len(retain):
            raise ValueError("SequenceDatasetIterator was asked to iterate "
                             "over a sequence mask without data or vice versa")
        space = space.restrict(retain)
        source = tuple(source[i] for i in retain)
        super(SequenceDatasetIterator, self).__init__(
            dataset, subset_iterator, (space, source),
            return_tuple=return_tuple, convert=convert
        )
        if not isinstance(space, CompositeSpace):
            space = (space,)
        else:
            space = space.components
        assert len(space) == len(source)
        self._original_space = space
Beispiel #31
0
    def fprop(self, state_below, apply_dropout = False):

        if apply_dropout:
            warnings.warn("dropout should be implemented with fixed_var_descr to make sure it works with BGD, this is just a hack to get it working with SGD")
            theano_rng = MRG_RandomStreams(self.rng.randint(2**15))
            state_below = self.apply_dropout(state=state_below, include_prob=self.dropout_input_include_prob, theano_rng=theano_rng)

        rval = self.layers[0].fprop(state_below)

        if apply_dropout:
            dropout = self.dropout_include_probs[0]
            rval = self.apply_dropout(state=rval, include_prob=dropout, theano_rng=theano_rng)

        for layer, dropout in safe_izip(self.layers[1:], self.dropout_include_probs[1:]):
            rval = layer.fprop(rval)
            if apply_dropout:
                rval = self.apply_dropout(state=rval, include_prob=dropout, theano_rng=theano_rng)

        return rval
Beispiel #32
0
    def __call__(self, model, X, Y = None, ** kwargs):

        layer_costs = [ layer.get_weight_decay(coeff)
            for layer, coeff in safe_izip(model.hidden_layers, self.coeffs) ]

        assert T.scalar() != 0. # make sure theano semantics do what I want
        layer_costs = [ cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval =  T.as_tensor_variable(0.)
            rval.name = '0_weight_decay'
            return rval
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'DBM_WeightDecay'

        assert total_cost.ndim == 0

        total_cost.name = 'weight_decay'

        return total_cost
Beispiel #33
0
    def get_monitoring_channels(self, model, X, Y = None, drop_mask = None):

        rval = OrderedDict()

        scratch = self(model, X, drop_mask, return_locals = True)

        history = scratch['history']
        X_tilde = scratch['X_tilde']


        for ii, state in enumerate(history):
            rval['obj_after_' + str(ii)] = self.cost_from_state(state,
                    model, X, X_tilde)

            if ii > 0:
                prev_state = history[ii-1]
                V_hat = state['V_hat']
                prev_V_hat = prev_state['V_hat']
                assert V_hat is not prev_V_hat
                rval['max_pixel_diff[%d]'%ii] = abs(V_hat-prev_V_hat).max()
                h0 = state['H_hat'][0]
                prev_h0 = prev_state['H_hat'][0]
                assert h0 is not prev_h0
                rval['max_h0_diff[%d]' % ii] = abs(h0[0] - prev_h0[0]).max()

        final_state = history[-1]



        layers = [ model.visible_layer ] + model.hidden_layers
        states = [ final_state['V_hat'] ] + final_state['H_hat']

        for layer, state in safe_izip(layers, states):
            d = layer.get_monitoring_channels_from_state(state)
            for key in d:
                mod_key = 'final_denoise_' + layer.layer_name + '_' + key
                assert mod_key not in rval
                rval[mod_key] = d[key]

        return rval
Beispiel #34
0
    def cost_from_state(self, state, dbm, X, X_tilde):

        V_hat = state['V_hat']

        beta = dbm.visible_layer.beta

        model_term = beta * (X_tilde-V_hat)
        noise_term = self.noise_precision * (X_tilde-X)
        diff = model_term - noise_term
        assert diff.ndim == 4
        smd_cost = T.sqr(diff).sum(axis=(1,2,3)).mean()
        assert smd_cost.ndim == 0

        if not hasattr(self, 'both_directions'):
            self.both_directions = False

        total_cost = smd_cost

        if self.l1_act_targets is not None:
            for mf_state, targets, coeffs, eps, layer in safe_izip(state['H_hat'] ,
                    self.l1_act_targets, self.l1_act_coeffs, self.l1_act_eps, dbm.hidden_layers):
                assert not isinstance(targets, str)
                if not isinstance(targets, (list, tuple)):
                    assert not isinstance(mf_state, (list, tuple))
                    mf_state = [ mf_state ]
                    targets = [ targets ]
                    coeffs = [ coeffs ]
                    eps = [ eps ]
                total_cost += layer.get_l1_activation_cost(
                        state = mf_state,
                        targets = targets,
                        coeffs = coeffs,
                        eps = eps)
                # end for substates
            # end for layers
        # end if act penalty

        total_cost.name = 'total_cost(V_hat = %s)' % V_hat.name

        return total_cost
Beispiel #35
0
    def cost_from_state(self, state, dbm, X, X_tilde):

        V_hat = state['V_hat']

        beta = dbm.visible_layer.beta

        model_term = beta * (X_tilde - V_hat)
        noise_term = self.noise_precision * (X_tilde - X)
        diff = model_term - noise_term
        assert diff.ndim == 4
        smd_cost = T.sqr(diff).sum(axis=(1, 2, 3)).mean()
        assert smd_cost.ndim == 0

        if not hasattr(self, 'both_directions'):
            self.both_directions = False

        total_cost = smd_cost

        if self.l1_act_targets is not None:
            for mf_state, targets, coeffs, eps, layer in safe_izip(
                    state['H_hat'], self.l1_act_targets, self.l1_act_coeffs,
                    self.l1_act_eps, dbm.hidden_layers):
                assert not isinstance(targets, str)
                if not isinstance(targets, (list, tuple)):
                    assert not isinstance(mf_state, (list, tuple))
                    mf_state = [mf_state]
                    targets = [targets]
                    coeffs = [coeffs]
                    eps = [eps]
                total_cost += layer.get_l1_activation_cost(state=mf_state,
                                                           targets=targets,
                                                           coeffs=coeffs,
                                                           eps=eps)
                # end for substates
            # end for layers
        # end if act penalty

        total_cost.name = 'total_cost(V_hat = %s)' % V_hat.name

        return total_cost
Beispiel #36
0
    def expr(self, model, data, **kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(abs(weights))
            added up for each set of weights.
        """
        self.get_data_specs(model)[0].validate(data)
        layer_costs = [
            layer.get_l1_weight_decay(coeff)
            for layer, coeff in safe_izip(model.layers, self.coeffs)
        ]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_l1_penalty'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_L1Penalty'

        assert total_cost.ndim == 0

        total_cost.name = 'l1_penalty'

        return total_cost
    def iterator(self,
                 mode=None,
                 batch_size=None,
                 num_batches=None,
                 rng=None,
                 data_specs=None,
                 return_tuple=False):
        allowed_modes = ('sequential', 'random_slice', 'even_sequential',
                         'batchwise_shuffled_sequential',
                         'even_batchwise_shuffled_sequential')
        if mode is not None and mode not in allowed_modes:
            raise ValueError("Due to HDF5 limitations on advanced indexing, " +
                             "the '" + mode + "' iteration mode is not " +
                             "supported")

        if data_specs is None:
            data_specs = self._iter_data_specs

        space, source = data_specs
        sub_spaces, sub_sources = ((space.components, source) if isinstance(
            space, CompositeSpace) else ((space, ), (source, )))
        convert = [None for sp, src in safe_izip(sub_spaces, sub_sources)]

        mode = (self._iter_subset_class
                if mode is None else resolve_iterator_class(mode))

        if batch_size is None:
            batch_size = getattr(self, '_iter_batch_size', None)
        if num_batches is None:
            num_batches = getattr(self, '_iter_num_batches', None)
        if rng is None and mode.stochastic:
            rng = self.rng
        return VariableImageDatasetIterator(dataset=self,
                                            subset_iterator=mode(
                                                self.num_examples, batch_size,
                                                num_batches, rng),
                                            data_specs=data_specs,
                                            return_tuple=return_tuple,
                                            convert=convert)
Beispiel #38
0
    def expr(self, model, data, ** kwargs):
        """Returns a theano expression for the cost function.

        Parameters
        ----------
        model : MLP
        data : tuple
            Should be a valid occupant of
            CompositeSpace(model.get_input_space(),
            model.get_output_space())

        Returns
        -------
        total_cost : theano.gof.Variable
            coeff * sum(abs(weights))
            added up for each set of weights.
        """
        self.get_data_specs(model)[0].validate(data)
        layer_costs = [layer.get_l1_weight_decay(coeff)
                       for layer, coeff
                       in safe_izip(model.layers, self.coeffs)]

        assert T.scalar() != 0.  # make sure theano semantics do what I want
        layer_costs = [cost for cost in layer_costs if cost != 0.]

        if len(layer_costs) == 0:
            rval = T.as_tensor_variable(0.)
            rval.name = '0_l1_penalty'
            return rval
        else:
            total_cost = reduce(operator.add, layer_costs)
        total_cost.name = 'MLP_L1Penalty'

        assert total_cost.ndim == 0

        total_cost.name = 'l1_penalty'

        return total_cost
Beispiel #39
0
    def get_monitoring_channels(self, model, X, Y=None, drop_mask=None):

        rval = OrderedDict()

        scratch = self(model, X, drop_mask, return_locals=True)

        history = scratch['history']
        X_tilde = scratch['X_tilde']

        for ii, state in enumerate(history):
            rval['obj_after_' + str(ii)] = self.cost_from_state(
                state, model, X, X_tilde)

            if ii > 0:
                prev_state = history[ii - 1]
                V_hat = state['V_hat']
                prev_V_hat = prev_state['V_hat']
                assert V_hat is not prev_V_hat
                rval['max_pixel_diff[%d]' % ii] = abs(V_hat - prev_V_hat).max()
                h0 = state['H_hat'][0]
                prev_h0 = prev_state['H_hat'][0]
                assert h0 is not prev_h0
                rval['max_h0_diff[%d]' % ii] = abs(h0[0] - prev_h0[0]).max()

        final_state = history[-1]

        layers = [model.visible_layer] + model.hidden_layers
        states = [final_state['V_hat']] + final_state['H_hat']

        for layer, state in safe_izip(layers, states):
            d = layer.get_monitoring_channels_from_state(state)
            for key in d:
                mod_key = 'final_denoise_' + layer.layer_name + '_' + key
                assert mod_key not in rval
                rval[mod_key] = d[key]

        return rval
Beispiel #40
0
    def cost_from_states(self,
                         state,
                         new_state,
                         dbm,
                         X,
                         Y,
                         drop_mask,
                         drop_mask_Y,
                         new_drop_mask,
                         new_drop_mask_Y,
                         return_locals=False):
        """
        .. todo::

            WRITEME
        """

        if not self.supervised:
            assert drop_mask_Y is None
            assert new_drop_mask_Y is None
        if self.supervised:
            assert drop_mask_Y is not None
            if self.both_directions:
                assert new_drop_mask_Y is not None
            assert Y is not None

        V_hat_unmasked = state['V_hat_unmasked']
        assert V_hat_unmasked.ndim == X.ndim

        if not hasattr(self, 'use_sum'):
            self.use_sum = False

        inpaint_cost = self.get_inpaint_cost(dbm, X, V_hat_unmasked, drop_mask,
                                             state, Y, drop_mask_Y)

        if not hasattr(self, 'both_directions'):
            self.both_directions = False

        assert self.both_directions == (new_state is not None)

        if new_state is not None:

            new_V_hat_unmasked = new_state['V_hat_unmasked']

            new_inpaint_cost = dbm.visible_layer.recons_cost(
                X, new_V_hat_unmasked, new_drop_mask)
            if self.supervised:
                new_Y_hat_unmasked = new_state['Y_hat_unmasked']
                scale = None
                raise NotImplementedError(
                    "This branch appears to be broken, needs to define scale.")
                new_inpaint_cost = new_inpaint_cost + \
                        dbm.hidden_layers[-1].recons_cost(Y, new_Y_hat_unmasked, new_drop_mask_Y, scale)
            # end if include_Y
            inpaint_cost = 0.5 * inpaint_cost + 0.5 * new_inpaint_cost
        # end if both directions

        total_cost = inpaint_cost

        if not hasattr(self, 'range_rewards'):
            self.range_rewards = None
        if self.range_rewards is not None:
            for layer, mf_state, coeffs in safe_izip(dbm.hidden_layers,
                                                     state['H_hat'],
                                                     self.range_rewards):
                try:
                    layer_cost = layer.get_range_rewards(mf_state, coeffs)
                except NotImplementedError:
                    if coeffs == 0.:
                        layer_cost = 0.
                    else:
                        raise
                if layer_cost != 0.:
                    total_cost += layer_cost

        if not hasattr(self, 'stdev_rewards'):
            self.stdev_rewards = None
        if self.stdev_rewards is not None:
            assert False  # not monitored yet
            for layer, mf_state, coeffs in safe_izip(dbm.hidden_layers,
                                                     state['H_hat'],
                                                     self.stdev_rewards):
                try:
                    layer_cost = layer.get_stdev_rewards(mf_state, coeffs)
                except NotImplementedError:
                    if coeffs == 0.:
                        layer_cost = 0.
                    else:
                        raise
                if layer_cost != 0.:
                    total_cost += layer_cost

        l1_act_cost = None
        if self.l1_act_targets is not None:
            l1_act_cost = 0.
            if self.l1_act_eps is None:
                self.l1_act_eps = [None] * len(self.l1_act_targets)
            for layer, mf_state, targets, coeffs, eps in \
                    safe_izip(dbm.hidden_layers, state['H_hat'] , self.l1_act_targets, self.l1_act_coeffs, self.l1_act_eps):

                assert not isinstance(targets, str)

                try:
                    layer_cost = layer.get_l1_act_cost(mf_state, targets,
                                                       coeffs, eps)
                except NotImplementedError:
                    if coeffs == 0.:
                        layer_cost = 0.
                    else:
                        raise
                if layer_cost != 0.:
                    l1_act_cost += layer_cost
                # end for substates
            # end for layers
            total_cost += l1_act_cost
        # end if act penalty

        if not hasattr(self, 'hid_presynaptic_cost'):
            self.hid_presynaptic_cost = None
        if self.hid_presynaptic_cost is not None:
            assert False  # not monitored yet
            for c, s, in safe_izip(self.hid_presynaptic_cost, state['H_hat']):
                if c == 0.:
                    continue
                s = s[1]
                assert hasattr(s, 'owner')
                owner = s.owner
                assert owner is not None
                op = owner.op

                if not hasattr(op, 'scalar_op'):
                    raise ValueError(
                        "Expected V_hat_unmasked to be generated by an Elemwise op, got "
                        + str(op) + " of type " + str(type(op)))
                assert isinstance(op.scalar_op, T.nnet.sigm.ScalarSigmoid)
                z, = owner.inputs

                total_cost += c * T.sqr(z).mean()

        if not hasattr(self, 'reweighted_act_targets'):
            self.reweighted_act_targets = None
        reweighted_act_cost = None
        if self.reweighted_act_targets is not None:
            reweighted_act_cost = 0.
            warnings.warn(
                "reweighted_act_cost is hardcoded for sigmoid layers and doesn't check that this is "
                "what we get.")
            for c, t, s in safe_izip(self.reweighted_act_coeffs,
                                     self.reweighted_act_targets,
                                     state['H_hat']):
                if c == 0:
                    continue
                s, _ = s
                m = s.mean(axis=0)
                d = T.sqr(m - t)
                weight = 1. / (1e-7 + s * (1 - s))
                reweighted_act_cost += c * (weight * d).mean()
            total_cost += reweighted_act_cost

        total_cost.name = 'total_cost(V_hat_unmasked = %s)' % V_hat_unmasked.name

        if return_locals:
            return total_cost, locals()

        return total_cost
Beispiel #41
0
    def __call__(self):
        """
        Runs the model on the monitoring dataset in order to add one
        data point to each of the channels.
        """

        # If the channels have changed at all, we need to recompile the theano
        # functions used to compute them
        if self._dirty:
            self.redo_theano()

        model = self.model
        datasets = self._datasets

        # Set all channels' val_shared to 0
        self.begin_record_entry()

        for d, i, b, n, a, sd in safe_izip(datasets, self._iteration_mode, self._batch_size,
                                 self._num_batches, self.accum, self._rng_seed):
            if isinstance(d, basestring):
                d = yaml_parse.load(d)
                raise NotImplementedError()
                # need to put d back into self._datasets
            myiterator = d.iterator(mode=i,
                                    batch_size=b,
                                    num_batches=n,
                                    topo=self.topo,
                                    targets=self.require_label,
                                    rng=sd)

            for X in myiterator:
                if self.require_label:
                    X, y = X
                    self.run_prereqs(X,y,d)
                    a(X, y)
                else:
                    self.run_prereqs(X, None, d)
                    a(X)
            # end for X
        # end for d


        log.info("Monitoring step:")
        log.info("\tEpochs seen: %d" % self._epochs_seen)
        log.info("\tBatches seen: %d" % self._num_batches_seen)
        log.info("\tExamples seen: %d" % self._examples_seen)
        t = time.time() - self.t0
        for channel_name in sorted(self.channels.keys(), key=number_aware_alphabetical_key):
            channel = self.channels[channel_name]
            channel.time_record.append(t)
            channel.batch_record.append(self._num_batches_seen)
            channel.example_record.append(self._examples_seen)
            channel.epoch_record.append(self._epochs_seen)
            val = channel.val_shared.get_value()
            channel.val_record.append(val)
            # TODO: use logging infrastructure so that user can configure
            # formatting
            if abs(val) < 1e4:
                val_str = str(val)
            else:
                val_str = '%.3e' % val

            log.info("\t%s: %s" % (channel_name, val_str))
Beispiel #42
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is needed so that if new channels are added, Theano's
        optimizations make sure (to the extent that they can) that the new
        channels and old channels don't have any redundant calculations.

        It is also needed to regenerate Theano functions after pickling and
        unpickling, since Theano functions should not be pickled.
        """
        self._dirty = False

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(inputs=[], updates=updates, mode=self.theano_function_mode,
                    name = 'Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        #Get the appropriate kind of theano variable to represent the data the model
        #acts on
        X = self.model.get_input_space().make_theano_batch(name = "monitoring_X")
        if config.compute_test_value != 'off':
            m = self.model.get_test_batch_size()
            test_value = self.model.get_input_space().get_origin_batch(m)
            X.tag.test_value = np.cast[X.type.dtype](test_value)
        if self.require_label:
            Y = self.model.get_output_space().make_theano_batch(name = "monitoring_Y")

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line('compiling monitor including channel '+key+'\n')
            log.info('\t%s' % key)
        it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \
              for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                    self._num_batches, self._batch_size)]
        num_examples = [np.cast[config.floatX](float(i.num_examples)) for i in it]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for channel in self.channels.values():
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            n = num_examples[index]
            u = updates[index]
            if isinstance(channel.graph_input, (list, tuple)):
                g[channel.graph_input[0]] = X
                g[channel.graph_input[1]] = Y
            else:
                g[channel.graph_input] = X
            if n == 0:
                raise ValueError("Iterating over 0 examples results in divide by 0")
            if self.topo:
                batch_index = d.get_topo_batch_axis()
            else:
                batch_index = 0
            val = channel.val * T.cast(X.shape[batch_index], config.floatX) / n
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' \
                                + key.name + ' has dtype ' + key.dtype + \
                                ' but is driven by an expression with type ' + \
                                up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key '+var_descriptor(elem)+'\n')
                        mode.record.handle_line('g val '+var_descriptor(g[elem])+'\n')
                    for elem in u:
                        mode.record.handle_line('u key '+var_descriptor(elem)+'\n')
                        mode.record.handle_line('u val '+var_descriptor(u[elem])+'\n')
                function_name = 'Monitor.accum[%d]' % idx
                if self.require_label:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling supervised accum\n')
                    # Some channels may not depend on the data, ie, they might just monitor the model
                    # parameters, or some shared variable updated by the training algorithm, so we
                    # need to ignore the unused input error
                    self.accum.append(function([X, Y], givens=g, updates=u, mode=self.theano_function_mode,
                            name=function_name))
                else:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling unsupervised accum\n')
                    self.accum.append(function([X], givens=g, updates=u, mode=self.theano_function_mode,
                            name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output '+var_descriptor(elem)+'\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del([name for name in final_names
                                    if name not in init_names])
Beispiel #43
0
 def add_updates(old, new):
     if isinstance(old, (list, tuple)):
         for old_elem, new_elem in safe_izip(old, new):
             add_updates(old_elem, new_elem)
     else:
         rval[old] = new
Beispiel #44
0
 def mlp_pred(non_linearity):
     Z = [T.dot(X, W) for W in model.W1]
     H = map(non_linearity, Z)
     Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
     pred = sum(Z)
     return pred
Beispiel #45
0
 def _next(self, next_index):
     return tuple(
         fn(batch) if fn else batch for batch, fn in safe_izip(
             self._dataset.get(self._source, next_index), self._convert))
Beispiel #46
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is called any time we need to evaluate the channels and the
        channel definitions have changed since last we called it, or if the
        theano functions are unavailable for any other reason (first time they
        are needed after construction or deserialization, etc.)

        All channels are compiled as part of the same theano function so that
        the theano optimizations can eliminate subexpressions that are shared
        between multiple channels.
        """
        self._dirty = False

        # Recompute the data specs, since the channels may have changed.
        self._build_data_specs()

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(
                inputs=[],
                updates=updates,
                mode=self.theano_function_mode,
                name='Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        # Get the appropriate kind of theano variable to represent the data
        # the model acts on
        batch_names = ['monitoring_%s' % s for s in self._flat_data_specs[1]]
        theano_args = self._flat_data_specs[0].make_theano_batch(batch_names)

        # Get a symbolic expression of the batch size
        # We do it here, rather than for each channel, because channels with an
        # empty data_specs do not use data, and are unable to extract the batch
        # size. The case where the whole data specs is empty is not supported.
        batch_size = self._flat_data_specs[0].batch_size(theano_args)

        # Also get a nested representation, for joint iteration
        # with each of channel.graph_input
        nested_theano_args = self._data_specs_mapping.nest(theano_args)
        if not isinstance(nested_theano_args, tuple):
            nested_theano_args = (nested_theano_args, )
        assert len(nested_theano_args) == (len(self.channels) + 1)

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line('compiling monitor including ' +
                                        'channel ' + key + '\n')
            log.info('\t%s' % key)
        it = [
            d.iterator(mode=i,
                       num_batches=n,
                       batch_size=b,
                       data_specs=self._flat_data_specs,
                       return_tuple=True)
            for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                        self._num_batches, self._batch_size)
        ]
        self.num_examples = [
            np.cast[config.floatX](float(i.num_examples)) for i in it
        ]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for i, channel in enumerate(self.channels.values()):
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            cur_num_examples = self.num_examples[index]
            u = updates[index]

            # Flatten channel.graph_input and the appropriate part of
            # nested_theano_args, to iterate jointly over them.
            c_mapping = DataSpecsMapping(channel.data_specs)
            channel_inputs = c_mapping.flatten(channel.graph_input,
                                               return_tuple=True)
            inputs = c_mapping.flatten(nested_theano_args[i + 1],
                                       return_tuple=True)

            for (channel_X, X) in safe_izip(channel_inputs, inputs):
                assert channel_X not in g or g[channel_X] is X
                assert channel_X.type == X.type, (channel_X.type, X.type)
                g[channel_X] = X

            if batch_size == 0:
                # No channel does need any data, so there is not need to
                # average results, and we will call the accum functions only
                # once.
                # TODO: better handling of channels not needing data when
                # some other channels need data.
                assert len(self._flat_data_specs[1]) == 0
                val = channel.val
            else:
                if n == 0:
                    raise ValueError("Iterating over 0 examples results in " +
                                     "divide by 0")
                val = (channel.val * T.cast(batch_size, config.floatX) /
                       cur_num_examples)
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' +
                                        key.name + ' has dtype ' + key.dtype +
                                        ' but is driven by an expression ' +
                                        'with type ' + up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('g val ' +
                                                var_descriptor(g[elem]) + '\n')
                    for elem in u:
                        mode.record.handle_line('u key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('u val ' +
                                                var_descriptor(u[elem]) + '\n')
                function_name = 'Monitor.accum[%d]' % idx
                if mode is not None and hasattr(mode, 'record'):
                    mode.record.handle_line('compiling supervised accum\n')
                # Some channels may not depend on the data, ie, they might just
                # monitor the model parameters, or some shared variable updated
                # by the training algorithm, so we need to ignore the unused
                # input error
                self.accum.append(
                    function(theano_args,
                             givens=g,
                             updates=u,
                             mode=self.theano_function_mode,
                             name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output ' +
                                                var_descriptor(elem) + '\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del(
            [name for name in final_names if name not in init_names])
Beispiel #47
0
 def _fallback_next(self, next_index):
     # TODO: handle fancy-index copies by allocating a buffer and
     # using np.take()
     return tuple(
         fn(data[next_index]) if fn else data[next_index]
         for data, fn in safe_izip(self._raw_data, self._convert))
Beispiel #48
0
    def __call__(self,
                 model,
                 X,
                 Y=None,
                 drop_mask=None,
                 drop_mask_Y=None,
                 return_locals=False,
                 include_toronto=True,
                 **kwargs):
        """
        .. todo::

            WRITEME
        """

        if not self.supervised:
            assert drop_mask_Y is None
            Y = None  # ignore Y if some other cost is supervised and has made it get passed in
        if self.supervised:
            assert Y is not None
            if drop_mask is not None:
                assert drop_mask_Y is not None

        if not hasattr(model, 'cost'):
            model.cost = self
        if not hasattr(model, 'mask_gen'):
            model.mask_gen = self.mask_gen

        dbm = model

        X_space = model.get_input_space()

        if drop_mask is None:
            if self.supervised:
                drop_mask, drop_mask_Y = self.mask_gen(X, Y, X_space=X_space)
            else:
                drop_mask = self.mask_gen(X, X_space=X_space)

        if drop_mask_Y is not None:
            assert drop_mask_Y.ndim == 1

        if drop_mask.ndim < X.ndim:
            if self.mask_gen is not None:
                assert self.mask_gen.sync_channels
            if X.ndim != 4:
                raise NotImplementedError()
            drop_mask = drop_mask.dimshuffle(0, 1, 2, 'x')

        if not hasattr(self, 'noise'):
            self.noise = False

        history = dbm.do_inpainting(X,
                                    Y=Y,
                                    drop_mask=drop_mask,
                                    drop_mask_Y=drop_mask_Y,
                                    return_history=True,
                                    noise=self.noise,
                                    niter=self.niter,
                                    block_grad=self.block_grad)
        final_state = history[-1]

        new_drop_mask = None
        new_drop_mask_Y = None
        new_history = [None for state in history]

        if not hasattr(self, 'both_directions'):
            self.both_directions = False
        if self.both_directions:
            new_drop_mask = 1. - drop_mask
            if self.supervised:
                new_drop_mask_Y = 1. - drop_mask_Y
            new_history = dbm.do_inpainting(X,
                                            Y=Y,
                                            drop_mask=new_drop_mask,
                                            drop_mask_Y=new_drop_mask_Y,
                                            return_history=True,
                                            noise=self.noise,
                                            niter=self.niter,
                                            block_grad=self.block_grad)

        new_final_state = new_history[-1]

        total_cost, sublocals = self.cost_from_states(final_state,
                                                      new_final_state,
                                                      dbm,
                                                      X,
                                                      Y,
                                                      drop_mask,
                                                      drop_mask_Y,
                                                      new_drop_mask,
                                                      new_drop_mask_Y,
                                                      return_locals=True)
        l1_act_cost = sublocals['l1_act_cost']
        inpaint_cost = sublocals['inpaint_cost']
        reweighted_act_cost = sublocals['reweighted_act_cost']

        if not hasattr(self, 'robustness'):
            self.robustness = None
        if self.robustness is not None:
            inpainting_H_hat = history[-1]['H_hat']
            mf_H_hat = dbm.mf(X, Y=Y)
            if self.supervised:
                inpainting_H_hat = inpainting_H_hat[:-1]
                mf_H_hat = mf_H_hat[:-1]
                for ihh, mhh in safe_izip(flatten(inpainting_H_hat),
                                          flatten(mf_H_hat)):
                    total_cost += self.robustness * T.sqr(mhh - ihh).sum()

        if not hasattr(self, 'toronto_act_targets'):
            self.toronto_act_targets = None
        toronto_act_cost = None
        if self.toronto_act_targets is not None and include_toronto:
            toronto_act_cost = 0.
            H_hat = history[-1]['H_hat']
            for s, c, t in zip(H_hat, self.toronto_act_coeffs,
                               self.toronto_act_targets):
                if c == 0.:
                    continue
                s, _ = s
                m = s.mean(axis=0)
                toronto_act_cost += c * T.sqr(m - t).mean()
            total_cost += toronto_act_cost

        if return_locals:
            return locals()

        total_cost.name = 'total_inpaint_cost'

        return total_cost
Beispiel #49
0
    def __call__(self):
        """
        Runs the model on the monitoring dataset in order to add one
        data point to each of the channels.
        """

        # If the channels have changed at all, we need to recompile the theano
        # functions used to compute them
        if self._dirty:
            self.redo_theano()

        model = self.model
        datasets = self._datasets

        # Set all channels' val_shared to 0
        self.begin_record_entry()

        for d, i, b, n, a, sd, ne in safe_izip(datasets, self._iteration_mode,
                                               self._batch_size,
                                               self._num_batches, self.accum,
                                               self._rng_seed,
                                               self.num_examples):
            if isinstance(d, basestring):
                d = yaml_parse.load(d)
                raise NotImplementedError()
                # need to put d back into self._datasets
            myiterator = d.iterator(mode=i,
                                    batch_size=b,
                                    num_batches=n,
                                    topo=self.topo,
                                    targets=self.require_label,
                                    rng=sd)

            actual_ne = 0
            for X in myiterator:
                if self.require_label:
                    X, y = X
                    self.run_prereqs(X, y, d)
                    a(X, y)
                else:
                    self.run_prereqs(X, None, d)
                    a(X)
                if X.ndim == 2:
                    actual_batch_size = X.shape[0]
                else:
                    actual_batch_size = X.shape[d.get_topo_batch_axis()]
                actual_ne += actual_batch_size
            # end for X
            if actual_ne != ne:
                raise RuntimeError(
                    "At compile time, your iterator said it had " + str(ne) +
                    " examples total, but at runtime it gave us " +
                    str(actual_ne) + ".")
        # end for d

        log.info("Monitoring step:")
        log.info("\tEpochs seen: %d" % self._epochs_seen)
        log.info("\tBatches seen: %d" % self._num_batches_seen)
        log.info("\tExamples seen: %d" % self._examples_seen)
        t = time.time() - self.t0
        for channel_name in sorted(self.channels.keys(),
                                   key=number_aware_alphabetical_key):
            channel = self.channels[channel_name]
            channel.time_record.append(t)
            channel.batch_record.append(self._num_batches_seen)
            channel.example_record.append(self._examples_seen)
            channel.epoch_record.append(self._epochs_seen)
            val = channel.val_shared.get_value()
            channel.val_record.append(val)
            # TODO: use logging infrastructure so that user can configure
            # formatting
            if abs(val) < 1e4:
                val_str = str(val)
            else:
                val_str = '%.3e' % val

            log.info("\t%s: %s" % (channel_name, val_str))
Beispiel #50
0
 def _next(self, next_index):
     return tuple(
         fn(batch) if fn else batch for batch, fn in
         safe_izip(self._dataset.get(self._source, next_index),
                   self._convert)
     )
Beispiel #51
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is needed so that if new channels are added, Theano's
        optimizations make sure (to the extent that they can) that the new
        channels and old channels don't have any redundant calculations.

        It is also needed to regenerate Theano functions after pickling and
        unpickling, since Theano functions should not be pickled.
        """
        self._dirty = False

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(
                inputs=[],
                updates=updates,
                mode=self.theano_function_mode,
                name='Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        # Get the appropriate kind of theano variable to represent the data the model
        # acts on
        X = self.model.get_input_space().make_theano_batch(name="monitoring_X")
        if config.compute_test_value != 'off':
            m = self.model.get_test_batch_size()
            test_value = self.model.get_input_space().get_origin_batch(m)
            X.tag.test_value = np.cast[X.type.dtype](test_value)
        if self.require_label:
            Y = self.model.get_output_space().make_theano_batch(
                name="monitoring_Y")

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line(
                    'compiling monitor including channel ' + key + '\n')
            log.info('\t%s' % key)
        it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \
              for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                    self._num_batches, self._batch_size)]
        self.num_examples = [
            np.cast[config.floatX](float(i.num_examples)) for i in it
        ]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for channel in self.channels.values():
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            cur_num_examples = self.num_examples[index]
            u = updates[index]
            if isinstance(channel.graph_input, (list, tuple)):
                channel_X, channel_Y = channel.graph_input
                assert channel_X not in g or g[channel_X] is X
                assert channel_Y not in g or g[channel_Y] is Y
                g[channel_X] = X
                g[channel_Y] = Y
            else:
                channel_X = channel.graph_input
                assert channel_X not in g or g[channel_X] is X
                g[channel_X] = X
            if n == 0:
                raise ValueError(
                    "Iterating over 0 examples results in divide by 0")
            if self.topo:
                batch_index = d.get_topo_batch_axis()
            else:
                batch_index = 0
            val = channel.val * T.cast(X.shape[batch_index],
                                       config.floatX) / cur_num_examples
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' \
                                + key.name + ' has dtype ' + key.dtype + \
                                ' but is driven by an expression with type ' + \
                                up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('g val ' +
                                                var_descriptor(g[elem]) + '\n')
                    for elem in u:
                        mode.record.handle_line('u key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('u val ' +
                                                var_descriptor(u[elem]) + '\n')
                function_name = 'Monitor.accum[%d]' % idx
                if self.require_label:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling supervised accum\n')
                    # Some channels may not depend on the data, ie, they might just monitor the model
                    # parameters, or some shared variable updated by the training algorithm, so we
                    # need to ignore the unused input error
                    self.accum.append(
                        function([X, Y],
                                 givens=g,
                                 updates=u,
                                 mode=self.theano_function_mode,
                                 name=function_name))
                else:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line(
                            'compiling unsupervised accum\n')
                    self.accum.append(
                        function([X],
                                 givens=g,
                                 updates=u,
                                 mode=self.theano_function_mode,
                                 name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output ' +
                                                var_descriptor(elem) + '\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del(
            [name for name in final_names if name not in init_names])
Beispiel #52
0
    def get_monitoring_channels(self,
                                model,
                                X,
                                Y=None,
                                drop_mask=None,
                                drop_mask_Y=None,
                                **kwargs):
        """
        .. todo::

            WRITEME
        """

        if self.supervised:
            assert Y is not None

        rval = OrderedDict()

        # TODO: shouldn't self() handle this?
        if drop_mask is not None and drop_mask.ndim < X.ndim:
            if self.mask_gen is not None:
                assert self.mask_gen.sync_channels
            if X.ndim != 4:
                raise NotImplementedError()
            drop_mask = drop_mask.dimshuffle(0, 1, 2, 'x')

        scratch = self(model,
                       X,
                       Y,
                       drop_mask=drop_mask,
                       drop_mask_Y=drop_mask_Y,
                       return_locals=True)

        history = scratch['history']
        new_history = scratch['new_history']
        new_drop_mask = scratch['new_drop_mask']
        new_drop_mask_Y = None
        drop_mask = scratch['drop_mask']
        if self.supervised:
            drop_mask_Y = scratch['drop_mask_Y']
            new_drop_mask_Y = scratch['new_drop_mask_Y']

        ii = 0
        for name in [
                'inpaint_cost', 'l1_act_cost', 'toronto_act_cost',
                'reweighted_act_cost'
        ]:
            var = scratch[name]
            if var is not None:
                rval['total_inpaint_cost_term_' + str(ii) + '_' + name] = var
                ii = ii + 1

        if self.monitor_each_step:
            for ii, packed in enumerate(safe_izip(history, new_history)):
                state, new_state = packed
                rval['all_inpaint_costs_after_' +
                     str(ii)] = self.cost_from_states(state, new_state, model,
                                                      X, Y, drop_mask,
                                                      drop_mask_Y,
                                                      new_drop_mask,
                                                      new_drop_mask_Y)

                if ii > 0:
                    prev_state = history[ii - 1]
                    V_hat = state['V_hat']
                    prev_V_hat = prev_state['V_hat']
                    rval['max_pixel_diff[%d]' % ii] = abs(V_hat -
                                                          prev_V_hat).max()

        final_state = history[-1]

        #empirical beta code--should be moved to gaussian visible layer, should support topo data
        #V_hat = final_state['V_hat']
        #err = X - V_hat
        #masked_err = err * drop_mask
        #sum_sqr_err = T.sqr(masked_err).sum(axis=0)
        #recons_count = T.cast(drop_mask.sum(axis=0), 'float32')

        # empirical_beta = recons_count / sum_sqr_err
        # assert empirical_beta.ndim == 1

        #rval['empirical_beta_min'] = empirical_beta.min()
        #rval['empirical_beta_mean'] = empirical_beta.mean()
        #rval['empirical_beta_max'] = empirical_beta.max()

        layers = model.get_all_layers()
        states = [final_state['V_hat']] + final_state['H_hat']

        for layer, state in safe_izip(layers, states):
            d = layer.get_monitoring_channels_from_state(state)
            for key in d:
                mod_key = 'final_inpaint_' + layer.layer_name + '_' + key
                assert mod_key not in rval
                rval[mod_key] = d[key]

        if self.supervised:
            inpaint_Y_hat = history[-1]['H_hat'][-1]
            err = T.neq(T.argmax(inpaint_Y_hat, axis=1), T.argmax(Y, axis=1))
            assert err.ndim == 1
            assert drop_mask_Y.ndim == 1
            err = T.dot(err, drop_mask_Y) / drop_mask_Y.sum()
            if err.dtype != inpaint_Y_hat.dtype:
                err = T.cast(err, inpaint_Y_hat.dtype)

            rval['inpaint_err'] = err

            Y_hat = model.mf(X)[-1]

            Y = T.argmax(Y, axis=1)
            Y = T.cast(Y, Y_hat.dtype)

            argmax = T.argmax(Y_hat, axis=1)
            if argmax.dtype != Y_hat.dtype:
                argmax = T.cast(argmax, Y_hat.dtype)
            err = T.neq(Y, argmax).mean()
            if err.dtype != Y_hat.dtype:
                err = T.cast(err, Y_hat.dtype)

            rval['err'] = err

            if self.monitor_multi_inference:
                Y_hat = model.inference_procedure.multi_infer(X)

                argmax = T.argmax(Y_hat, axis=1)
                if argmax.dtype != Y_hat.dtype:
                    argmax = T.cast(argmax, Y_hat.dtype)
                err = T.neq(Y, argmax).mean()
                if err.dtype != Y_hat.dtype:
                    err = T.cast(err, Y_hat.dtype)

                rval['multi_err'] = err

        return rval
Beispiel #53
0
 def add_updates(old, new):
     if isinstance(old, (list, tuple)):
         for old_elem, new_elem in safe_izip(old, new):
             add_updates(old_elem, new_elem)
     else:
         rval[old] = new
Beispiel #54
0
 def mlp_pred(non_linearity):
     Z = [T.dot(X, W) for W in model.W1]
     H = [non_linearity(z) for z in Z]
     Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
     pred = sum(Z)
     return pred
Beispiel #55
0
    def add_dataset(self,
                    dataset,
                    mode='sequential',
                    batch_size=None,
                    num_batches=None,
                    seed=None):
        """
        Determines the data used to calculate the values of each channel.

        Parameters
        ----------
        dataset : object
            A `pylearn2.datasets.Dataset` object.
        mode : str or object, optional
            Iteration mode; see the docstring of the `iterator` method \
            on `pylearn2.datasets.Dataset` for details.
        batch_size : int, optional
            The size of an individual batch. Optional if `mode` is \
            'sequential' and `num_batches` is specified (batch size \
            will be calculated based on full dataset size).
        num_batches : int, optional
            The total number of batches. Unnecessary if `mode` is \
            'sequential' and `batch_size` is specified (number of \
            batches will be calculated based on full dataset size).
        seed : int, optional
            Optional. The seed to be used for random iteration modes.
        """
        # The user can ommit using lists if only one dataset is set
        if not isinstance(dataset, list):
            dataset = [dataset]
        if not isinstance(mode, list):
            mode = [mode]
        if not isinstance(batch_size, list):
            batch_size = [batch_size]
        if not isinstance(num_batches, list):
            num_batches = [num_batches]
        if seed is None:
            seed = [None] * len(dataset)
        if not isinstance(seed, list):
            seed = [seed]
        if len(mode) != len(dataset):
            raise ValueError("Received " + str(len(dataset)) +
                             " dataset but " + str(len(mode)) + " modes.")
        if any([len(l) != len(dataset) for l in [batch_size, seed]]):
            raise ValueError("make sure each dataset has its iteration " +
                             "batch size and number of batches.")
        for (d, m, b, n, sd) in safe_izip(dataset, mode, batch_size,
                                          num_batches, seed):
            try:
                it = d.iterator(mode=m,
                                batch_size=b,
                                num_batches=n,
                                data_specs=self._flat_data_specs,
                                return_tuple=True,
                                rng=sd)
            except ValueError as exc:
                raise ValueError("invalid iteration parameters in " +
                                 "Monitor.add_dataset: " + str(exc))
            if it.stochastic:
                # Must be a seed, not a random number generator. If it were a
                # random number generator, different iterators using it would
                # update its state, so we would not get the same iterator
                # each time. Also, must not be None, because this makes the
                # iterator pick a seed based on the clock
                if sd is None:
                    raise TypeError("Monitor requires a seed when using " +
                                    "stochastic iteration modes.")
                if not isinstance(sd, (list, tuple, int)):
                    raise TypeError("Monitor requires a seed (not a random " +
                                    "number generator) when using " +
                                    "stochastic iteration modes.")
            else:
                # The iterator should catch this, but let's double-check
                assert sd is None

            if not d in self._datasets:
                self._datasets.append(d)
                self._iteration_mode.append(m)
                self._batch_size.append(b)
                self._num_batches.append(n)
                self._rng_seed.append(sd)
Beispiel #56
0
 def mlp_pred(non_linearity):
     Z = [T.dot(X, W) for W in model.W1]
     H = map(non_linearity, Z)
     Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
     pred = sum(Z)
     return pred
Beispiel #57
0
    def __call__(self):
        """
        Runs the model on the monitoring dataset in order to add one data point
        to each of the channels.
        """

        # If the channels have changed at all, we need to recompile the theano
        # functions used to compute them
        if self._dirty:
            self.redo_theano()

        datasets = self._datasets

        # Set all channels' val_shared to 0
        self.begin_record_entry()
        for d, i, b, n, a, sd, ne in safe_izip(datasets, self._iteration_mode,
                                               self._batch_size,
                                               self._num_batches, self.accum,
                                               self._rng_seed,
                                               self.num_examples):
            if isinstance(d, basestring):
                d = yaml_parse.load(d)
                raise NotImplementedError()

            # need to put d back into self._datasets
            myiterator = d.iterator(mode=i,
                                    batch_size=b,
                                    num_batches=n,
                                    data_specs=self._flat_data_specs,
                                    return_tuple=True,
                                    rng=sd)

            # If self._flat_data_specs is empty, no channel needs data,
            # so we do not need to call the iterator in order to average
            # the monitored values across different batches, we only
            # have to call them once.
            if len(self._flat_data_specs[1]) == 0:
                X = ()
                self.run_prereqs(X, d)
                a(*X)

            else:
                actual_ne = 0
                for X in myiterator:
                    # X is a flat (not nested) tuple
                    self.run_prereqs(X, d)
                    a(*X)
                    actual_ne += self._flat_data_specs[0].np_batch_size(X)
                # end for X
                if actual_ne != ne:
                    raise RuntimeError("At compile time, your iterator said "
                                       "it had %d examples total, but at "
                                       "runtime it gave us %d." %
                                       (ne, actual_ne))
        # end for d

        log.info("Monitoring step:")
        log.info("\tEpochs seen: %d" % self._epochs_seen)
        log.info("\tBatches seen: %d" % self._num_batches_seen)
        log.info("\tExamples seen: %d" % self._examples_seen)
        t = time.time() - self.t0
        for channel_name in sorted(self.channels.keys(),
                                   key=number_aware_alphabetical_key):
            channel = self.channels[channel_name]
            channel.time_record.append(t)
            channel.batch_record.append(self._num_batches_seen)
            channel.example_record.append(self._examples_seen)
            channel.epoch_record.append(self._epochs_seen)
            val = channel.val_shared.get_value()
            channel.val_record.append(val)
            # TODO: use logging infrastructure so that user can configure
            # formatting
            if abs(val) < 1e4:
                val_str = str(val)
            else:
                val_str = '%.3e' % val

            log.info("\t%s: %s" % (channel_name, val_str))