Example #1
0
    def get_gradients(self, model, X, Y=None, **kwargs):
        """
        .. todo::

            WRITEME
        """

        scratch = self(model,
                       X,
                       Y,
                       include_toronto=False,
                       return_locals=True,
                       **kwargs)

        total_cost = scratch['total_cost']

        params = list(model.get_params())
        grads = dict(
            safe_zip(params,
                     T.grad(total_cost, params, disconnected_inputs='ignore')))

        if self.toronto_act_targets is not None:
            H_hat = scratch['history'][-1]['H_hat']
            for i, packed in enumerate(
                    safe_zip(H_hat, self.toronto_act_coeffs,
                             self.toronto_act_targets)):
                s, c, t = packed
                if c == 0.:
                    continue
                s, _ = s
                m = s.mean(axis=0)
                m_cost = c * T.sqr(m - t).mean()
                real_grads = T.grad(m_cost, s)
                if i == 0:
                    below = X
                else:
                    below = H_hat[i - 1][0]
                W, = model.hidden_layers[i].transformer.get_params()
                assert W in grads
                b = model.hidden_layers[i].b

                ancestor = T.scalar()
                hack_W = W + ancestor
                hack_b = b + ancestor

                fake_s = T.dot(below, hack_W) + hack_b
                if fake_s.ndim != real_grads.ndim:
                    print fake_s.ndim
                    print real_grads.ndim
                    assert False
                sources = [(fake_s, real_grads)]

                fake_grads = T.grad(cost=None,
                                    known_grads=dict(sources),
                                    wrt=[below, ancestor, hack_W, hack_b])

                grads[W] = grads[W] + fake_grads[2]
                grads[b] = grads[b] + fake_grads[3]

        return grads, OrderedDict()
Example #2
0
    def expr(self, model, data, ** kwargs):
        """
        Returns the sum of the costs the SumOfCosts instance was given at
        initialization.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            the model for which we want to calculate the sum of costs
        data : flat tuple of tensor_like variables.
            data has to follow the format defined by self.get_data_specs(),
            but this format will always be a flat tuple.
        """
        self.get_data_specs(model)[0].validate(data)
        composite_specs, mapping = self.get_composite_specs_and_mapping(model)
        nested_data = mapping.nest(data)
        costs = []
        for cost, cost_data in safe_zip(self.costs, nested_data):
            costs.append(cost.expr(model, cost_data, **kwargs))
        assert len(costs) > 0

        if any([cost is None for cost in costs]):
            sum_of_costs = None
        else:
            costs = [coeff * cost
                     for coeff, cost in safe_zip(self.coeffs, costs)]
            assert len(costs) > 0
            sum_of_costs = reduce(lambda x, y: x + y, costs)

        return sum_of_costs
Example #3
0
    def _read_hdf5(self, sources, aliases, load_all=False, use_h5py=True):
        """
        Loads elements from an HDF5 dataset using either h5py or tables. It can
        load either the whole object in memory or a reference to the object on
        disk, depending on the load_all parameter. Returns a list of objects.

        Parameters
        ----------
        sources : list of str
            List of HDF5 keys corresponding to the data to be loaded.
        load_all : bool, optional (default False)
            If true, load dataset into memory.
        use_h5py: bool, optional (default True)
            If true uses h5py, else tables.
        """
        data = alias_dict()
        if use_h5py:
            for s, a in safe_zip(sources, aliases):
                if load_all:
                    data[s, a] = self._fhandler[s][:]
                else:
                    data[s, a] = self._fhandler[s]
                    # hdf5 handle has no ndim
                    data[s].ndim = len(data[s].shape)
        else:
            for s, a in safe_zip(sources, aliases):
                if load_all:
                    data[s, a](self._fhandler.getNode('/', s)[:])
                else:
                    data[s, a] = self._fhandler.getNode('/', s)
        return data
Example #4
0
    def expr(self, model, data, ** kwargs):
        """
        Returns the sum of the costs the SumOfCosts instance was given at
        initialization.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            the model for which we want to calculate the sum of costs
        data : flat tuple of tensor_like variables.
            data has to follow the format defined by self.get_data_specs(),
            but this format will always be a flat tuple.
        """
        self.get_data_specs(model)[0].validate(data)
        composite_specs, mapping = self.get_composite_specs_and_mapping(model)
        nested_data = mapping.nest(data)
        costs = []
        for cost, cost_data in safe_zip(self.costs, nested_data):
            costs.append(cost.expr(model, cost_data, **kwargs))
        assert len(costs) > 0

        if any([cost is None for cost in costs]):
            sum_of_costs = None
        else:
            costs = [coeff * cost
                     for coeff, cost in safe_zip(self.coeffs, costs)]
            assert len(costs) > 0
            sum_of_costs = reduce(lambda x, y: x + y, costs)

        return sum_of_costs
Example #5
0
    def _read_hdf5(self, sources, aliases, load_all=False, use_h5py=True):
        """
        Loads elements from an HDF5 dataset using either h5py or tables. It can
        load either the whole object in memory or a reference to the object on
        disk, depending on the load_all parameter. Returns a list of objects.

        Parameters
        ----------
        sources : list of str
            List of HDF5 keys corresponding to the data to be loaded.
        load_all : bool, optional (default False)
            If true, load dataset into memory.
        use_h5py: bool, optional (default True)
            If true uses h5py, else tables.
        """
        data = alias_dict()
        if use_h5py:
            for s, a in safe_zip(sources, aliases):
                if load_all:
                    data[s, a] = self._fhandler[s][:]
                else:
                    data[s, a] = self._fhandler[s]
                    # hdf5 handle has no ndim
                    data[s].ndim = len(data[s].shape)
        else:
            for s, a in safe_zip(sources, aliases):
                if load_all:
                    data[s, a](self._fhandler.getNode("/", s)[:])
                else:
                    data[s, a] = self._fhandler.getNode("/", s)
        return data
Example #6
0
    def get_gradients(self, model, X, Y = None, **kwargs):
        """
        .. todo::

            WRITEME
        """

        if Y is None:
            data = X
        else:
            data = (X, Y)

        scratch = self.expr(model, data, include_toronto = False,
                return_locals=True, **kwargs)

        total_cost = scratch['total_cost']

        params = list(model.get_params())
        grads = dict(safe_zip(params, T.grad(total_cost, params,
            disconnected_inputs='ignore')))

        if self.toronto_act_targets is not None:
            H_hat = scratch['history'][-1]['H_hat']
            for i, packed in enumerate(safe_zip(H_hat,
                self.toronto_act_coeffs, self.toronto_act_targets)):
                s, c, t = packed
                if c == 0.:
                    continue
                s, _ = s
                m = s.mean(axis=0)
                m_cost = c * T.sqr(m-t).mean()
                real_grads = T.grad(m_cost, s)
                if i == 0:
                    below = X
                else:
                    below = H_hat[i-1][0]
                W, = model.hidden_layers[i].transformer.get_params()
                assert W in grads
                b = model.hidden_layers[i].b

                ancestor = T.scalar()
                hack_W = W + ancestor
                hack_b = b + ancestor

                fake_s = T.dot(below, hack_W) + hack_b
                if fake_s.ndim != real_grads.ndim:
                    print fake_s.ndim
                    print real_grads.ndim
                    assert False
                sources = [ (fake_s, real_grads) ]

                fake_grads = T.grad(cost=None, known_grads=dict(sources),
                        wrt=[below, ancestor, hack_W, hack_b])

                grads[W] = grads[W] + fake_grads[2]
                grads[b] = grads[b] + fake_grads[3]


        return grads, OrderedDict()
Example #7
0
    def get_gradients(self, model, data, **kwargs):
        self.get_data_specs(model)[0].validate(data)
        obj, scratch = self.base_cost(model, data, return_locals=True, **kwargs)
        if self.supervised:
            assert isinstance(data, (list, tuple))
            assert len(data) == 2
            (X, Y) = data
        else:
            X, = data
        interm_grads = OrderedDict()


        H_hat = scratch['H_hat']
        terms = scratch['terms']
        hidden_layers = scratch['hidden_layers']

        grads = OrderedDict()

        assert len(H_hat) == len(terms)
        assert len(terms) == len(hidden_layers)
        num_layers = len(hidden_layers)
        for i in xrange(num_layers):
            state = H_hat[i]
            layer = model.hidden_layers[i]
            term = terms[i]

            if term == 0.:
                continue
            else:
                print 'term is ',term

            if i == 0:
                state_below = X
                layer_below = model.visible_layer
            else:
                layer_below = model.hidden_layers[i-1]
                state_below = H_hat[i-1]
            state_below = layer_below.upward_state(state_below)

            components = flatten(state)

            real_grads = T.grad(term, components)

            fake_state = layer.linear_feed_forward_approximation(state_below)

            fake_components = flatten(fake_state)
            real_grads = OrderedDict(safe_zip(fake_components, real_grads))

            params = list(layer.get_params())
            fake_grads = T.grad(cost=None, consider_constant=flatten(state_below),
                    wrt=params, known_grads = real_grads)

            for param, grad in safe_zip(params, fake_grads):
                if param in grads:
                    grads[param] = grads[param] + grad
                else:
                    grads[param] = grad

        return grads, OrderedDict()
Example #8
0
    def get_gradients(self, model, X, Y=None, **kwargs):
        obj, scratch = self.base_cost(model,
                                      X,
                                      Y,
                                      return_locals=True,
                                      **kwargs)

        interm_grads = OrderedDict()

        H_hat = scratch['H_hat']
        terms = scratch['terms']
        hidden_layers = scratch['hidden_layers']

        grads = OrderedDict()

        assert len(H_hat) == len(terms)
        assert len(terms) == len(hidden_layers)
        num_layers = len(hidden_layers)
        for i in xrange(num_layers):
            state = H_hat[i]
            layer = model.hidden_layers[i]
            term = terms[i]

            if term == 0.:
                continue
            else:
                print 'term is ', term

            if i == 0:
                state_below = X
                layer_below = model.visible_layer
            else:
                layer_below = model.hidden_layers[i - 1]
                state_below = H_hat[i - 1]
            state_below = layer_below.upward_state(state_below)

            components = flatten(state)

            real_grads = T.grad(term, components)

            fake_state = layer.linear_feed_forward_approximation(state_below)

            fake_components = flatten(fake_state)
            real_grads = OrderedDict(safe_zip(fake_components, real_grads))

            params = list(layer.get_params())
            fake_grads = T.grad(cost=None,
                                consider_constant=flatten(state_below),
                                wrt=params,
                                known_grads=real_grads)

            for param, grad in safe_zip(params, fake_grads):
                if param in grads:
                    grads[param] = grads[param] + grad
                else:
                    grads[param] = grad

        return grads, OrderedDict()
Example #9
0
    def get_monitoring_channels(self, data):
        """
        .. todo::

            WRITEME
        """
        space, source = self.get_monitoring_data_specs()
        space.validate(data)
        X = data
        history = self.mf(X, return_history=True)
        q = history[-1]

        rval = OrderedDict()

        ch = self.visible_layer.get_monitoring_channels()
        for key in ch:
            rval['vis_' + key] = ch[key]

        for state, layer in safe_zip(q, self.hidden_layers):
            ch = layer.get_monitoring_channels()
            for key in ch:
                rval[layer.layer_name + '_' + key] = ch[key]
            ch = layer.get_monitoring_channels_from_state(state)
            for key in ch:
                rval['mf_' + layer.layer_name + '_' + key] = ch[key]

        if len(history) > 1:
            prev_q = history[-2]

            flat_q = flatten(q)
            flat_prev_q = flatten(prev_q)

            mx = None
            for new, old in safe_zip(flat_q, flat_prev_q):
                cur_mx = abs(new - old).max()
                if new is old:
                    logger.error('{0} is {1}'.format(new, old))
                    assert False
                if mx is None:
                    mx = cur_mx
                else:
                    mx = T.maximum(mx, cur_mx)

            rval['max_var_param_diff'] = mx

            for layer, new, old in safe_zip(self.hidden_layers,
                                            q, prev_q):
                sum_diff = 0.
                for sub_new, sub_old in safe_zip(flatten(new), flatten(old)):
                    sum_diff += abs(sub_new - sub_old).sum()
                denom = self.batch_size * \
                    layer.get_total_state_space().get_total_dimension()
                denom = np.cast[config.floatX](denom)
                rval['mean_'+layer.layer_name+'_var_param_diff'] = \
                    sum_diff / denom

        return rval
Example #10
0
    def get_monitoring_channels(self, data):
        """
        .. todo::

            WRITEME
        """
        space, source = self.get_monitoring_data_specs()
        space.validate(data)
        X = data
        history = self.mf(X, return_history=True)
        q = history[-1]

        rval = OrderedDict()

        ch = self.visible_layer.get_monitoring_channels()
        for key in ch:
            rval['vis_' + key] = ch[key]

        for state, layer in safe_zip(q, self.hidden_layers):
            ch = layer.get_monitoring_channels()
            for key in ch:
                rval[layer.layer_name + '_' + key] = ch[key]
            ch = layer.get_monitoring_channels_from_state(state)
            for key in ch:
                rval['mf_' + layer.layer_name + '_' + key] = ch[key]

        if len(history) > 1:
            prev_q = history[-2]

            flat_q = flatten(q)
            flat_prev_q = flatten(prev_q)

            mx = None
            for new, old in safe_zip(flat_q, flat_prev_q):
                cur_mx = abs(new - old).max()
                if new is old:
                    logger.error('{0} is {1}'.format(new, old))
                    assert False
                if mx is None:
                    mx = cur_mx
                else:
                    mx = T.maximum(mx, cur_mx)

            rval['max_var_param_diff'] = mx

            for layer, new, old in safe_zip(self.hidden_layers,
                                            q, prev_q):
                sum_diff = 0.
                for sub_new, sub_old in safe_zip(flatten(new), flatten(old)):
                    sum_diff += abs(sub_new - sub_old).sum()
                denom = self.batch_size * \
                    layer.get_total_state_space().get_total_dimension()
                denom = np.cast[config.floatX](denom)
                rval['mean_'+layer.layer_name+'_var_param_diff'] = \
                    sum_diff / denom

        return rval
Example #11
0
    def model(self, large=None, last_layer=None, seed=None):
        """
        Creates the MLP model based on internal attributes.

        Parameters
        ----------
        large : bool, optional
            The variant - large or small; by default, the value stored in
            the instance is used.
        last_layer : optional
            Last layer in the network
        seed : optional
            Seed for random number generator

        Returns
        -------
        model : pylearn2.models.mlp.MLP
            The model
        """
        laylist = self.layers()
        model = MLP(layers=laylist,
                    input_space=Conv2DSpace(
                        shape=self.shape,
                        num_channels=3,
                        axes=['b', 0, 1, 'c']),
                    seed=seed)

        last_layer_std = None
        index = 0
        for lay in laylist[:last_layer_std]:
            if not isinstance(lay, (ZeroPad, Softmax)):
                # we simulate a get_weights method here as
                # the class does not provides one
                # It does provide a get_weights_topo() but that is useless
                # as the shape is changed
                # example:
                #    get_weights => (96, 3, 7, 7)
                #    get_weights_topo => (96, 7, 7, 3)
                crt_w = lay.transformer.get_params()[0].get_value()
                #crt_w = lay.get_weights_topo()
                crt_b = lay.get_biases()
                assert all([crt == new for crt, new in safe_zip(
                    crt_w.shape, self.weights[index].shape)])
                assert all([crt == new for crt, new in safe_zip(
                    crt_b.shape, self.biases[index].shape)])
                lay.set_weights(self.weights[index])
                lay.set_biases(self.biases[index])
                index = index + 1

        return model
Example #12
0
        def get_expected_warning(from_space, from_batch, to_space):

            # composite -> composite
            if isinstance(from_space, CompositeSpace) and \
               isinstance(to_space, CompositeSpace):
                for fs, fb, ts in safe_zip(from_space.components,
                                           from_batch,
                                           to_space.components):
                    warning, message = get_expected_warning(fs, fb, ts)
                    if warning is not None:
                        return warning, message

                return None, None

            # composite -> simple
            if isinstance(from_space, CompositeSpace):
                for fs, fb in safe_zip(from_space.components, from_batch):
                    warning, message = get_expected_warning(fs, fb, to_space)
                    if warning is not None:
                        return warning, message

                return None, None

            # simple -> composite
            if isinstance(to_space, CompositeSpace):
                if isinstance(from_space, VectorSpace) and \
                   isinstance(from_batch, theano.sparse.SparseVariable):
                    assert from_space.sparse
                    return (UserWarning,
                            'Formatting from a sparse VectorSpace to a '
                            'CompositeSpace is currently (2 Jan 2014) a '
                            'non-differentiable action. This is because it '
                            'calls slicing operations on a sparse batch '
                            '(e.g. "my_matrix[r:R, c:C]", which Theano does '
                            'not yet have a gradient operator for. If '
                            'autodifferentiation is reporting an error, '
                            'this may be why.')

                for ts in to_space.components:
                    warning, message = get_expected_warning(from_space,
                                                            from_batch,
                                                            ts)
                    if warning is not None:
                        return warning, message

                return None, None

            # simple -> simple
            return None, None
Example #13
0
    def make_layer_to_state(self, num_examples, rng=None):
        """
        Makes and returns a dictionary mapping layers to states.

        By states, we mean here a real assignment, not a mean field
        state. For example, for a layer containing binary random
        variables, the state will be a shared variable containing
        values in {0,1}, not [0,1]. The visible layer will be included.

        Uses a dictionary so it is easy to unambiguously index a layer
        without needing to remember rules like vis layer = 0, hiddens
        start at 1, etc.

        Parameters
        ----------
        num_examples : int
            Number of examples to make up the state
        rng : MRG_RandomStreams
            Random number generator, if None then use model's rng
        """

        # Make a list of all layers
        layers = [self.visible_layer] + self.hidden_layers

        if rng is None:
            rng = self.rng

        states = [layer.make_state(num_examples, rng) for layer in layers]

        def recurse_check(layer, state):
            if isinstance(state, (list, tuple)):
                for elem in state:
                    recurse_check(layer, elem)
            else:
                val = state.get_value()
                m = val.shape[0]
                if m != num_examples:
                    raise ValueError(
                        layer.layer_name + " gave state with " + str(m) + " examples in some component."
                        "We requested " + str(num_examples)
                    )

        for layer, state in safe_zip(layers, states):
            recurse_check(layer, state)

        rval = OrderedDict(safe_zip(layers, states))

        return rval
Example #14
0
    def get_gradients(self, model, data, **kwargs):
        space, sources = self.get_data_specs(model)
        space.validate(data)
        assert isinstance(model, CompressAdversaryPair)
        g = model.compressor
        d = model.discriminator

        #get raw gradients for d and g objectives...
        d_obj, g_obj = self.get_objectives(model, data)
        g_params = g.get_params()
        d_params = d.get_params()
        for param in g_params:
            assert param not in d_params
        for param in d_params:
            assert param not in g_params
        
        d_grads = T.grad(d_obj, d_params)
        g_grads = T.grad(g_obj, g_params)

        # if self.scale_grads:
        #     S_grad = T.grad(g_obj, S)
        #     scale = T.maximum(1., self.target_scale / T.sqrt(T.sqr(S_grad).sum()))
        #     g_grads = [g_grad * scale for g_grad in g_grads]

        #adjust raw gradients with control signals
        rval = OrderedDict()
        zeros = itertools.repeat(theano.tensor.constant(0., dtype='float32'))

        if self.ever_train_discriminator:
            rval.update(OrderedDict(safe_zip(d_params, [self.now_train_discriminator * dg for dg in d_grads])))
        else:
            rval.update(OrderedDict(zip(d_params, zeros)))

        if self.ever_train_compressor:
            rval.update(OrderedDict(safe_zip(g_params, [self.now_train_compressor * gg for gg in g_grads])))
        else:
            rval.update(OrderedDict(zip(g_params, zeros)))

        #update control signals using the updates return functionality
        updates = OrderedDict()
        #first, the clock
        self.future_train_clock = T.switch(T.ge(self.train_clock,self.discriminator_steps+self.joint_steps+self.compressor_steps),1.,self.train_clock+1.)
        updates[self.train_clock] = self.future_train_clock
        #then the control signals
        updates[self.now_train_discriminator] = T.switch(T.le(self.future_train_clock,self.discriminator_steps+self.joint_steps),1.,0.)
        updates[self.now_train_compressor] = T.switch(T.gt(self.future_train_clock,self.discriminator_steps),1.,0.)

        return rval, updates
Example #15
0
    def get_monitoring_channels(self, data):
        """
        data is a flat tuple, and can contain features, targets, or both
        """
        rval = super(PieceChangeMonitoringMLP,
                     self).get_monitoring_channels(data)
        X, Y = data
        state = X

        theano_rng = MRG_RandomStreams(self.rng.randint(2**15))

        assert not isinstance(state, tuple)
        piece_ids_0 = self.piece_id(state, theano_rng)
        # piece_ids_0 = Print('piece_ids_0[0]')(piece_ids_0[0])
        piece_ids_1 = self.piece_id(state, theano_rng)
        assert len(piece_ids_0) == 2  # rm

        piece_changes = T.cast(
            sum([
                T.neq(ids_0, ids_1).sum()
                for ids_0, ids_1 in safe_zip(piece_ids_0, piece_ids_1)
            ]), 'float32')
        possible_changes = T.cast(sum([ids_0.size for ids_0 in piece_ids_0]),
                                  'float32')
        rval['piece_change_rate'] = piece_changes / possible_changes

        return rval
Example #16
0
    def load_model(self, model):
        """
        Slot that loads a model object (not file).

        Parameters
        ----------
        model : Model
            The model to load.
        """
        try:
            logger.debug('Loading model %s', str(model))
            pras_list = model.get_params()
            parv_list = model.get_param_values()

            for par, parv in safe_zip(pras_list, parv_list):
                tvi = QtGui.QTreeWidgetItem()
                tvi.setText(0, par.name)
                tvi.par = par
                tvi.parv = parv
                self.lv_top.addTopLevelItem(tvi)

            logger.debug('Model loaded')
        except Exception, exc:
            logger.error('Loading image file failed', exc_info=True)
            QtGui.QMessageBox.warning(self, 'Exception', str(exc))
Example #17
0
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model: a Python object representing the model to train loosely
        implementing the interface of models.model.Model.

        dataset: a pylearn2.datasets.dataset.Dataset object used to draw
        training data
        """
        self.model = model

        self.monitor = Monitor.get_monitor(model)

        if self.monitoring_dataset is not None:
            # Get the data specifications needed by the model
            space, source = model.get_monitoring_data_specs()

            # Create Theano variables for each of the individual components
            # of that data. Usually, it will be X for inputs and Y for targets.
            # First, we need to find these components, and put them in a tuple
            mapping = DataSpecsMapping((space, source))
            space_tuple = mapping.flatten(space, return_tuple=True)
            source_tuple = mapping.flatten(source, return_tuple=True)
            # Then, build a flat tuple of these Theano variables
            ipt = tuple(sp.make_theano_batch(name='monitor_%s' % src)
                    for (sp, src) in safe_zip(space_tuple, source_tuple))
            # Finally, organize them back into a structure expected by the
            # monitoring channels of the model
            nested_ipt = mapping.nest(ipt)

            self.monitor.add_dataset(dataset=self.monitoring_dataset,
                                mode="sequential",
                                batch_size=self.batch_size,
                                num_batches=self.monitoring_batches)

            channels = model.get_monitoring_channels(nested_ipt)
            if not isinstance(channels, dict):
                raise TypeError("model.get_monitoring_channels must return a "
                                "dictionary, but it returned " + str(channels))
            for name in channels:
                J = channels[name]
                if isinstance(J, tuple):
                    assert len(J) == 2
                    J, prereqs = J
                else:
                    prereqs = None

                self.monitor.add_channel(name=name,
                                         ipt=nested_ipt,
                                         val=J,
                                         prereqs=prereqs,
                                         data_specs=(space, source))
        self.first = True
        self.bSetup = True
def get_gradients(model):
    cost = model.get_default_cost()

    data_specs = cost.get_data_specs(model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

    theano_args = []
    for space, source in safe_zip(space_tuple, source_tuple):
        name = '%s[%s]' % (SGD.__class__.__name__, source)
        arg = space.make_theano_batch(name=name, batch_size=model.batch_size)
        theano_args.append(arg)
    theano_args = tuple(theano_args)

    nested_args = mapping.nest(theano_args)

    fixed_var_descr = cost.get_fixed_var_descr(model, nested_args)
    grads, updates = cost.get_gradients(model, nested_args,
                                        **fixed_var_descr.fixed_vars)

    params = list(model.get_params())
    for param in params:
        some = grads[param]
        print("ok")

    return grads
Example #19
0
    def _shared_inputs(self, inputs):
        """
        .. todo::

            WRITEME
        """
        return [elem for elem, shared in safe_zip(inputs, self._shared_mask) if shared ]
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 rng=None, data_specs=None, return_tuple=False):
        """
        Method inherited from `pylearn2.datasets.dataset.Dataset`.
        """
        self.mode = mode
        self.batch_size = batch_size
        self._return_tuple = return_tuple

        # TODO: If there is a view_converter, we have to use it to convert
        # the stored data for "features" into one that the iterator can return.
        space, source = data_specs or (self.X_space, 'features')
        assert isinstance(space, CompositeSpace),\
            "Unexpected input space for the data."
        sub_spaces = space.components
        sub_sources = source

        conv_fn = lambda x: x.todense().astype(theano.config.floatX)
        convert = []
        for sp, src in safe_zip(sub_spaces, sub_sources):
            convert.append(conv_fn if src in ('features', 'targets') else None)

        assert mode is not None,\
                "Iteration mode not provided for %s" % str(self)
        mode = resolve_iterator_class(mode)
        subset_iterator = mode(self.X.shape[0], batch_size, num_batches, rng)

        return FiniteDatasetIterator(self,
                                     subset_iterator,
                                     data_specs=data_specs,
                                     return_tuple=return_tuple,
                                     convert=convert)
    def redraw():
        '''
        Draws the currently selected convolutional kernel.
        '''

        axes_list = all_axes.flatten()
        layer = conv_layers[layer_index]
        unit_index = unit_indices[layer_index, ...]
        weights = _get_conv_weights_bc01(layer)[unit_index, ...]

        active_axes = axes_list[:weights.shape[0]]

        for axes, weights in safe_zip(active_axes, weights):
            axes.set_visible(True)
            axes.imshow(weights, cmap='gray', interpolation='nearest')

        assert len(frozenset(active_axes)) == len(active_axes)

        unused_axes = axes_list[len(active_axes):]
        assert len(frozenset(unused_axes)) == len(unused_axes)
        assert len(axes_list) == len(active_axes) + len(unused_axes)

        for axes in unused_axes:
            axes.set_visible(False)

        title_text.set_text("Layer %s, unit %d" %
                            (layer.layer_name,
                             unit_indices[layer_index]))

        figure.canvas.draw()
Example #22
0
        def draw(batch_pair):
            for axis, image_batch in safe_zip(axes, batch_pair):
                assert image_batch.shape[0] == 1
                grayscale_image = image_batch[0, :, :, 0]
                axis.imshow(grayscale_image, cmap='gray')

            figure.canvas.draw()
Example #23
0
    def _fill_mapping(self, space, source):
        """Builds a nested tuple of integers representing the mapping"""
        if isinstance(space, NullSpace):
            # This Space does not contain any data, and should not
            # be mapped to anything
            assert source == ''
            return None

        elif not isinstance(space, CompositeSpace):
            # Space is a simple Space, source should be a simple source
            if isinstance(source, tuple):
                source, = source

            # If (space, source) has not already been seen, insert it.
            # We need both the space and the source to match.
            if (space, source) in self.specs_to_index:
                spec_index = self.specs_to_index[(space, source)]
            else:
                spec_index = self.n_unique_specs
                self.specs_to_index[(space, source)] = spec_index
                self.n_unique_specs += 1

            return spec_index

        else:
            # Recursively fill the mapping, and return it
            spec_mapping = tuple(
                    self._fill_mapping(sub_space, sub_source)
                    for sub_space, sub_source in safe_zip(
                        space.components, source))

            return spec_mapping
    def __call__(self, * batches):
        """
        .. todo::

            WRITEME
        """
        for batch in batches:
            if not isinstance(batch, list):
                raise TypeError("Expected each argument to be a list,"
                                " but one argument is " +
                                str(batch) + " of type "+str(type(batch)))
        total_examples = np.cast[config.floatX](
            sum([batch[0].shape[0] for batch in batches]))
        if self.has_updates:
            self._clear()
        augmented = self._true_inputs(batches[0]) + [total_examples]
        self._set_shared(batches[0])
        rval = self._func(*augmented)
        for batch in batches[1:]:
            augmented = self._true_inputs(batch) + [total_examples]
            self._set_shared(batch)
            # This works if there is no output,
            # because the output is an empty list
            cur_out = self._func(*augmented)
            rval = [x + y for x, y in safe_zip(rval, cur_out)]
        if len(rval) == 1:
            return rval[0]
        return rval
Example #25
0
    def _get_standard_neg(self, model, layer_to_chains):
        params = list(model.get_params())

        warnings.warn("""TODO: reduce variance of negative phase by
                         integrating out the even-numbered layers. The
                         Rao-Blackwellize method can do this for you when
                         expected gradient = gradient of expectation, but
                         doing this in general is trickier.""")
        #layer_to_chains = model.rao_blackwellize(layer_to_chains)
        expected_energy_p = model.energy(
            layer_to_chains[model.visible_layer],
            [layer_to_chains[layer] for layer in model.hidden_layers]
        ).mean()

        samples = flatten(layer_to_chains.values())
        for i, sample in enumerate(samples):
            if sample.name is None:
                sample.name = 'sample_'+str(i)

        neg_phase_grads = OrderedDict(
            safe_zip(params, T.grad(-expected_energy_p, params,
                                    consider_constant=samples,
                                    disconnected_inputs='ignore'))
        )
        return neg_phase_grads
Example #26
0
    def inv_prop(self, state_above):
        if not isinstance(state_above, tuple):
            expected_space = VectorSpace(self.output_space.get_total_dimension())
            state_above = expected_space.format_as(state_above, self.output_space)

        self.output_space.validate(state_above)
        return tuple(layer.inv_prop(state) for layer,state in safe_zip(self.layers, state_above))
    def topo_view_to_design_mat(self, topo_array):
        """
        Returns a design matrix view/copy of topological matrix.

        Parameters
        ----------
        topo_array: numpy.ndarray
          An N-D array with axis order given by self.axes. Non-batch axes'
          dimension sizes must agree with corresponding sizes in self.shape.

        returns: numpy.ndarray
          A design matrix with data in rows. Data, is laid out in memory
          according to the default axis order ('b', 'c', 0, 1). This will
          try to return a view into topo_array if possible; otherwise it will
          allocate a new ndarray.
        """
        for shape_elem, axis in safe_zip(self.shape, (0, 1, 2, 'c')):
            if topo_array.shape[self.axes.index(axis)] != shape_elem:
                raise ValueError(
                    "topo_array's %s axis has a different size "
                    "(%d) from the corresponding size (%d) in "
                    "self.shape.\n"
                    "  self.shape:       %s (uses standard axis order: 0, 1, "
                    "'c')\n"
                    "  self.axes:        %s\n"
                    "  topo_array.shape: %s (should be in self.axes' order)")

        topo_array_bc01 = topo_array.transpose([self.axes.index(ax)
                                                for ax in ('b', 'c', 0, 1, 2)])

        return topo_array_bc01.reshape((topo_array_bc01.shape[0],
                                        np.prod(topo_array_bc01.shape[1:])))
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 rng=None, data_specs=None,
                 return_tuple=False):
        """
        Copied from dense_design_matrix, in order to fix uneven problem.
        """

        if data_specs is None:
            data_specs = self._iter_data_specs

        # If there is a view_converter, we have to use it to convert
        # the stored data for "features" into one that the iterator
        # can return.
        space, source = data_specs
        if isinstance(space, CompositeSpace):
            sub_spaces = space.components
            sub_sources = source
        else:
            sub_spaces = (space,)
            sub_sources = (source,)

        convert = []
        for sp, src in safe_zip(sub_spaces, sub_sources):
            if src == 'features' and \
               getattr(self, 'view_converter', None) is not None:
                conv_fn = (lambda batch, self=self, space=sp:
                           self.view_converter.get_formatted_batch(batch,
                                                                   space))
            else:
                conv_fn = None

            convert.append(conv_fn)

        # TODO: Refactor
        if mode is None:
            if hasattr(self, '_iter_subset_class'):
                mode = self._iter_subset_class
            else:
                raise ValueError('iteration mode not provided and no default '
                                 'mode set for %s' % str(self))
        else:
            mode = resolve_iterator_class(mode)

        if batch_size is None:
            batch_size = getattr(self, '_iter_batch_size', None)
        if num_batches is None:
            num_batches = getattr(self, '_iter_num_batches', None)
        if rng is None and mode.stochastic:
            rng = self.rng
        # hack to make the online augmentations run
        FiniteDatasetIterator.uneven = False
        iterator = FiniteDatasetIterator(self,
                                 mode(self.X.shape[0],
                                      batch_size,
                                      num_batches,
                                      rng),
                                 data_specs=data_specs,
                                 return_tuple=return_tuple,
                                 convert=convert)
        return iterator
Example #29
0
    def next(self):
        next_index = self._subset_iterator.next()
        # TODO: handle fancy-index copies by allocating a buffer and
        # using numpy.take()

        # This saves us some memory (and time spent allocating it)
        # when the dataset dtype matches floatX and next_index is not a
        # fancy-index.
        if self._deprecated_interface:
            if self._needs_cast:
                features = numpy.cast[config.floatX](self._raw_data[next_index])
            else:
                features = self._raw_data[next_index]
            if self._topo:
                features = self._dataset.get_topological_view(features)
            if self._targets:
                targets = self._raw_targets[next_index]
                if self._targets_need_cast:
                    targets = np.cast[config.floatX](targets)
                return features, targets
            else:
                return features
        else:
            rval = tuple(
                    fn(data[next_index]) if fn else data[next_index]
                    for data, fn in safe_zip(self._raw_data, self._convert))
            if not self._return_tuple and len(rval) == 1:
                rval, = rval
            return rval
Example #30
0
    def next(self):
        warnings.warn("This class is obselete with the new interface change, "
                    "and will be removed around November 7th",
                    stacklevel=2)

        next_index = self._subset_iterator.next()

        if self._deprecated_interface:
            if isinstance(next_index, np.ndarray) and len(next_index) == 1:
                next_index = next_index[0]
            if self._needs_cast:
                features = numpy.cast[config.floatX](self._raw_data[next_index])
            else:
                features = self._raw_data[next_index,:]
            if self._topo:
                if len(features.shape) != 2:
                    features = features.reshape((1, features.shape[0]))
                features = self._dataset.get_topological_view(features)
            if self._targets:
                targets = self._raw_targets[next_index,:]
                if len(targets.shape) != 2:
                    targets = targets.reshape((1, targets.shape[0]))
                if self._targets_need_cast:
                    targets = np.cast[config.floatX](targets)
                return features, targets
            else:
                return features
        else:
            rval = tuple(
                    fn(data[next_index]) if fn else data[next_index]
                    for data, fn in safe_zip(self._raw_data, self._convert))
            if not self._return_tuple and len(rval) == 1:
                rval, = rval
            return rval
Example #31
0
    def next(self):
        warnings.warn("This class is obselete with the new interface change, "
                    "and will be removed around November 7th",
                    stacklevel=2)

        next_index = self._subset_iterator.next()

        if self._deprecated_interface:
            if isinstance(next_index, np.ndarray) and len(next_index) == 1:
                next_index = next_index[0]
            if self._needs_cast:
                features = numpy.cast[config.floatX](self._raw_data[next_index])
            else:
                features = self._raw_data[next_index,:]
            if self._topo:
                if len(features.shape) != 2:
                    features = features.reshape((1, features.shape[0]))
                features = self._dataset.get_topological_view(features)
            if self._targets:
                targets = self._raw_targets[next_index,:]
                if len(targets.shape) != 2:
                    targets = targets.reshape((1, targets.shape[0]))
                if self._targets_need_cast:
                    targets = np.cast[config.floatX](targets)
                return features, targets
            else:
                return features
        else:
            rval = tuple(
                    fn(data[next_index]) if fn else data[next_index]
                    for data, fn in safe_zip(self._raw_data, self._convert))
            if not self._return_tuple and len(rval) == 1:
                rval, = rval
            return rval
Example #32
0
def test_variational_cd():

    # Verifies that VariationalCD works well with make_layer_to_symbolic_state
    visible_layer = BinaryVector(nvis=100)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    model = DBM(visible_layer=visible_layer,
                hidden_layers=[hidden_layer],
                batch_size=100,
                niter=1)

    cost = VariationalCD(num_chains=100, num_gibbs_steps=2)

    data_specs = cost.get_data_specs(model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

    theano_args = []
    for space, source in safe_zip(space_tuple, source_tuple):
        name = '%s' % (source)
        arg = space.make_theano_batch(name=name)
        theano_args.append(arg)
    theano_args = tuple(theano_args)
    nested_args = mapping.nest(theano_args)

    grads, updates = cost.get_gradients(model, nested_args)
Example #33
0
    def next(self):
        next_index = self._subset_iterator.next()
        # TODO: handle fancy-index copies by allocating a buffer and
        # using numpy.take()

        # This saves us some memory (and time spent allocating it)
        # when the dataset dtype matches floatX and next_index is not a
        # fancy-index.
        if self._deprecated_interface:
            if self._needs_cast:
                features = numpy.cast[config.floatX](self._raw_data[next_index])
            else:
                features = self._raw_data[next_index]
            if self._topo:
                features = self._dataset.get_topological_view(features)
            if self._targets:
                targets = self._raw_targets[next_index]
                if self._targets_need_cast:
                    targets = np.cast[config.floatX](targets)
                return features, targets
            else:
                return features
        else:
            rval = tuple(
                    fn(data[next_index]) if fn else data[next_index]
                    for data, fn in safe_zip(self._raw_data, self._convert))
            if not self._return_tuple and len(rval) == 1:
                rval, = rval
            return rval
Example #34
0
    def redraw():
        '''
        Draws the currently selected convolutional kernel.
        '''

        axes_list = all_axes.flatten()
        layer = conv_layers[layer_index]
        unit_index = unit_indices[layer_index, ...]
        weights = _get_conv_weights_bc01(layer)[unit_index, ...]

        active_axes = axes_list[:weights.shape[0]]

        for axes, weights in safe_zip(active_axes, weights):
            axes.set_visible(True)
            axes.imshow(weights, cmap='gray', interpolation='nearest')

        assert len(frozenset(active_axes)) == len(active_axes)

        unused_axes = axes_list[len(active_axes):]
        assert len(frozenset(unused_axes)) == len(unused_axes)
        assert len(axes_list) == len(active_axes) + len(unused_axes)

        for axes in unused_axes:
            axes.set_visible(False)

        title_text.set_text("Layer %s, unit %d" %
                            (layer.layer_name, unit_indices[layer_index]))

        figure.canvas.draw()
        def draw(batch_pair):
            for axis, image_batch in safe_zip(axes, batch_pair):
                assert image_batch.shape[0] == 1
                grayscale_image = image_batch[0, :, :, 0]
                axis.imshow(grayscale_image, cmap='gray')

            figure.canvas.draw()
Example #36
0
    def _fill_flat(self, nested, mapping, rval):
        """Auxiliary recursive function used by self.flatten"""
        if isinstance(nested, CompositeSpace):
            nested = tuple(nested.components)

        if mapping is None:
            # The corresponding Space was a NullSpace, which does
            # not correspond to actual data, so nested should evaluate
            # to False, and should not be included in the flattened version
            if not isinstance(nested, NullSpace):
                assert not nested, ("The following element is mapped to "
                    "NullSpace, so it should evaluate to False (for instance, "
                    "None, an empty string or an empty tuple), but is %s"
                    % nested)
            return

        if isinstance(mapping, int):
            # "nested" should actually be a single element
            idx = mapping
            if isinstance(nested, tuple):
                nested, = nested

            if rval[idx] is None:
                rval[idx] = nested
            else:
                assert rval[idx] == nested, ("This mapping was built "
                        "with the same element occurring more than once "
                        "in the nested representation, but current nested "
                        "sequence has different values (%s and %s) at "
                        "these positions." % (rval[idx], nested))
        else:
            for sub_nested, sub_mapping in safe_zip(nested, mapping):
                self._fill_flat(sub_nested, sub_mapping, rval)
Example #37
0
def test_variational_cd():

    # Verifies that VariationalCD works well with make_layer_to_symbolic_state
    visible_layer = BinaryVector(nvis=100)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    model = DBM(visible_layer=visible_layer,
                hidden_layers=[hidden_layer],
                batch_size=100,
                niter=1)

    cost = VariationalCD(num_chains=100, num_gibbs_steps=2)

    data_specs = cost.get_data_specs(model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

    theano_args = []
    for space, source in safe_zip(space_tuple, source_tuple):
        name = '%s' % (source)
        arg = space.make_theano_batch(name=name)
        theano_args.append(arg)
    theano_args = tuple(theano_args)
    nested_args = mapping.nest(theano_args)

    grads, updates = cost.get_gradients(model, nested_args)
Example #38
0
    def expr(self, model, data):
        if hasattr(model, 'autoencoders'):
            assert len(model.autoencoders) == len(self.coeffs)
        self.get_data_specs(model)[0].validate(data)
        X = data
        if hasattr(model, 'autoencoders'):
            layers = model.autoencoders
        else:
            layers = [model]
        layer_costs = []
        current = data
        for layer, coeff, in safe_zip(layers, self.coeffs):
            current = layer.encode(current)
            cost = theano.tensor.abs_(current).sum(axis=1).mean()
            layer_costs.append(coeff * cost)

        assert theano.tensor.scalar() != 0.
        layer_costs = [cost_ for cost_ in layer_costs if cost_ != 0.]
        if len(layer_costs) == 0:
            return theano.tensor.as_tensor_variable(0.)
        else:
            total_cost = reduce(lambda x, y: x + y, layer_costs)
        total_cost.name = 'L1_ActCost'
        assert total_cost.ndim == 0
        return total_cost
Example #39
0
    def topo_view_to_design_mat(self, topo_array):
        """
        ... todo::

            WRITEME
        """

        for shape_elem, axis in safe_zip(self.shape, (0, 1, "c")):
            if topo_array.shape[self.axes.index(axis)] != shape_elem:
                raise ValueError(
                    "topo_array's %s axis has a different size "
                    "(%d) from the corresponding size (%d) in "
                    "self.shape.\n"
                    "  self.shape:       %s (uses standard axis order: 0, 1, "
                    "'c')\n"
                    "  self.axes:        %s\n"
                    "  topo_array.shape: %s (should be in self.axes' order)")

        if self.mask is not None:
            m = topo_array.shape[0]
            mask_idx = np.where(self.mask.transpose(
                [self.axes.index(ax) - 1
                 for ax in ("c", 0, 1)]).flatten() == 1)[0].tolist()
            design_matrix = np.zeros((m, len(mask_idx)), dtype=topo_array.dtype)
            for i in range(m):
                topo_array_c01 = topo_array[i].transpose([self.axes.index(ax) - 1
                                                          for ax in ("c", 0, 1)])
                design_matrix[i] = topo_array_c01.flatten()[mask_idx]
        else:
            topo_array_bc01 = topo_array.transpose([self.axes.index(ax)
                                                    for ax in ("b", "c", 0, 1)])
            design_matrix = topo_array_bc01.reshape((topo_array.shape[0],
                                                     np.prod(topo_array.shape[1:])))

        return design_matrix
Example #40
0
    def __call__(self, *batches):
        """
        .. todo::

            WRITEME
        """
        for batch in batches:
            if not isinstance(batch, list):
                raise TypeError("Expected each argument to be a list,"
                                " but one argument is " + str(batch) +
                                " of type " + str(type(batch)))
        total_examples = np.cast[config.floatX](sum(
            [batch[0].shape[0] for batch in batches]))
        if self.has_updates:
            self._clear()
        augmented = self._true_inputs(batches[0]) + [total_examples]
        self._set_shared(batches[0])
        rval = self._func(*augmented)
        for batch in batches[1:]:
            augmented = self._true_inputs(batch) + [total_examples]
            self._set_shared(batch)
            # This works if there is no output,
            # because the output is an empty list
            cur_out = self._func(*augmented)
            rval = [x + y for x, y in safe_zip(rval, cur_out)]
        if len(rval) == 1:
            return rval[0]
        return rval
Example #41
0
    def get_gradients(self, model, data, ** kwargs):
        indiv_results = []
        composite_specs, mapping = self.get_composite_specs_and_mapping(model)
        nested_data = mapping.nest(data)
        for cost, cost_data in safe_zip(self.costs, nested_data):
            result = cost.get_gradients(model, cost_data, ** kwargs)
            indiv_results.append(result)

        grads = OrderedDict()
        updates = OrderedDict()
        params = model.get_params()

        for coeff, packed in zip(self.coeffs, indiv_results):
            g, u = packed
            for param in g:
                if param not in params:
                    raise ValueError("A shared variable (" +
                                     str(param) +
                                     ") that is not a parameter appeared "
                                     "a cost gradient dictionary.")
            for param in g:
                assert param.ndim == g[param].ndim
                v = coeff * g[param]
                if param not in grads:
                    grads[param] = v
                else:
                    grads[param] = grads[param] + v
                assert grads[param].ndim == param.ndim
            assert not any([state in updates for state in u])
            assert not any([state in params for state in u])
            updates.update(u)

        return grads, updates
Example #42
0
    def _get_standard_neg(self, model, layer_to_chains):
        """
        .. todo::

            WRITEME
        """
        params = list(model.get_params())

        warnings.warn("""TODO: reduce variance of negative phase by
                         integrating out the even-numbered layers. The
                         Rao-Blackwellize method can do this for you when
                         expected gradient = gradient of expectation, but
                         doing this in general is trickier.""")
        #layer_to_chains = model.rao_blackwellize(layer_to_chains)
        expected_energy_p = model.energy(
            layer_to_chains[model.visible_layer],
            [layer_to_chains[layer] for layer in model.hidden_layers]).mean()

        samples = flatten(layer_to_chains.values())
        for i, sample in enumerate(samples):
            if sample.name is None:
                sample.name = 'sample_' + str(i)

        neg_phase_grads = OrderedDict(
            safe_zip(
                params,
                T.grad(-expected_energy_p,
                       params,
                       consider_constant=samples,
                       disconnected_inputs='ignore')))
        return neg_phase_grads
Example #43
0
    def get_gradients(self, model, data, ** kwargs):
        indiv_results = []
        composite_specs, mapping = self.get_composite_specs_and_mapping(model)
        nested_data = mapping.nest(data)
        for cost, cost_data in safe_zip(self.costs, nested_data):
            result = cost.get_gradients(model, cost_data, ** kwargs)
            indiv_results.append(result)

        grads = OrderedDict()
        updates = OrderedDict()
        params = model.get_params()

        for coeff, packed in zip(self.coeffs, indiv_results):
            g, u = packed
            for param in g:
                if param not in params:
                    raise ValueError("A shared variable (" +
                                     str(param) +
                                     ") that is not a parameter appeared "
                                     "a cost gradient dictionary.")
            for param in g:
                assert param.ndim == g[param].ndim
                v = coeff * g[param]
                if param not in grads:
                    grads[param] = v
                else:
                    grads[param] = grads[param] + v
                assert grads[param].ndim == param.ndim
            assert not any([state in updates for state in u])
            assert not any([state in params for state in u])
            updates.update(u)

        return grads, updates
Example #44
0
    def make_layer_to_symbolic_state(self, num_examples, rng=None):
        """
        .. todo::

            Explain the difference with `make_layer_to_state`

        Makes and returns a dictionary mapping layers to states. By states, we
        mean here a real assignment, not a mean field state. For example, for a
        layer containing binary random variables, the state will be a shared
        variable containing values in {0,1}, not [0,1]. The visible layer will
        be included.

        Uses a dictionary so it is easy to unambiguously index a layer without
        needing to remember rules like vis layer = 0, hiddens start at 1, etc.

        Parameters
        ----------
        num_examples : int
            WRITEME
        rng : WRITEME
        """

        # Make a list of all layers
        layers = [self.visible_layer] + self.hidden_layers

        assert rng is not None

        states = [layer.make_symbolic_state(num_examples, rng) for layer in layers]

        zipped = safe_zip(layers, states)

        rval = OrderedDict(zipped)

        return rval
Example #45
0
        def compile_f_step():
            prev = T.matrices(self.nlayers)
            if clamped:
                _initial = T.matrices(len(indices))
                _clamps = T.matrices(len(indices))

                z = self._update(copy.copy(prev),
                                 clamped=safe_zip(indices, _initial, _clamps),
                                 return_activations=True)
                f = theano.function(prev + _initial + _clamps,
                                    z,
                                    on_unused_input='ignore',
                                    allow_input_downcast=True)
            else:
                z = self._update(copy.copy(prev), return_activations=True)
                f = theano.function(prev,
                                    z,
                                    on_unused_input='ignore',
                                    allow_input_downcast=True)

            def wrapped(*args):
                data = f(*args)
                length = len(data) / 2
                return data[:length], data[length:]

            return wrapped
Example #46
0
    def __call__(self, model, X, Y=None, return_locals=False, **kwargs):
        """
        If returns locals is True, returns (objective, locals())
        Note that this means adding / removing / changing the value of
        local variables is an interface change.
        In particular, TorontoSparsity depends on "terms" and "H_hat"
        """

        assert (Y is None) == (not self.supervised)

        H_hat = model.mf(X, Y=Y)

        terms = []

        hidden_layers = model.hidden_layers
        #if self.supervised:
        #    hidden_layers = hidden_layers[:-1]

        for layer, mf_state, targets, coeffs in \
                safe_zip(hidden_layers, H_hat, self.targets, self.coeffs):
            try:
                cost = layer.get_l2_act_cost(mf_state, targets, coeffs)
            except NotImplementedError:
                assert isinstance(coeffs, float) and coeffs == 0.
                cost = 0.
            terms.append(cost)


        objective = sum(terms)

        if return_locals:
            return objective, locals()
        return objective
Example #47
0
        def get_expected_warning(from_space, from_batch, to_space):

            # composite -> composite
            if isinstance(from_space, CompositeSpace) and \
               isinstance(to_space, CompositeSpace):
                for fs, fb, ts in safe_zip(from_space.components, from_batch,
                                           to_space.components):
                    warning, message = get_expected_warning(fs, fb, ts)
                    if warning is not None:
                        return warning, message

                return None, None

            # composite -> simple
            if isinstance(from_space, CompositeSpace):
                for fs, fb in safe_zip(from_space.components, from_batch):
                    warning, message = get_expected_warning(fs, fb, to_space)
                    if warning is not None:
                        return warning, message

                return None, None

            # simple -> composite
            if isinstance(to_space, CompositeSpace):
                if isinstance(from_space, VectorSpace) and \
                   isinstance(from_batch, theano.sparse.SparseVariable):
                    assert from_space.sparse
                    return (UserWarning,
                            'Formatting from a sparse VectorSpace to a '
                            'CompositeSpace is currently (2 Jan 2014) a '
                            'non-differentiable action. This is because it '
                            'calls slicing operations on a sparse batch '
                            '(e.g. "my_matrix[r:R, c:C]", which Theano does '
                            'not yet have a gradient operator for. If '
                            'autodifferentiation is reporting an error, '
                            'this may be why.')

                for ts in to_space.components:
                    warning, message = get_expected_warning(
                        from_space, from_batch, ts)
                    if warning is not None:
                        return warning, message

                return None, None

            # simple -> simple
            return None, None
Example #48
0
    def make_layer_to_state(self, num_examples, rng=None):
        """
        Makes and returns a dictionary mapping layers to states.

        By states, we mean here a real assignment, not a mean field
        state. For example, for a layer containing binary random
        variables, the state will be a shared variable containing
        values in {0,1}, not [0,1]. The visible layer will be included.

        Uses a dictionary so it is easy to unambiguously index a layer
        without needing to remember rules like vis layer = 0, hiddens
        start at 1, etc.

        Parameters
        ----------
        num_examples : int
            Number of examples to make up the state
        rng : MRG_RandomStreams
            Random number generator, if None then use model's rng
        """

        # Make a list of all layers
        layers = [self.visible_layer] + self.hidden_layers

        if rng is None:
            rng = self.rng

        states = [layer.make_state(num_examples, rng) for layer in layers]

        def recurse_check(layer, state):
            if isinstance(state, (list, tuple)):
                for elem in state:
                    recurse_check(layer, elem)
            else:
                val = state.get_value()
                m = val.shape[0]
                if m != num_examples:
                    raise ValueError(layer.layer_name + " gave state with " +
                                     str(m) + " examples in some component."
                                     "We requested " + str(num_examples))

        for layer, state in safe_zip(layers, states):
            recurse_check(layer, state)

        rval = OrderedDict(safe_zip(layers, states))

        return rval
Example #49
0
    def on_monitor(self, model, dataset, algorithm):
        """
        .. todo::

            WRITEME
        """
        monitor = model.monitor

        if self.first:
            self.first = False
            self.monitor_channel = sharedX(algorithm.scale_step)
            # TODO: make monitor accept channels not associated with any
            # dataset,
            # so this hack won't be necessary
            hack = monitor.channels.values()[0]
            monitor.add_channel('scale_step',
                                hack.graph_input,
                                self.monitor_channel,
                                dataset=hack.dataset)
        channel = monitor.channels[self.channel]
        v = channel.val_record
        if len(v) == 1:
            return
        latest = v[-1]
        logger.info("Latest {0}: {1}".format(self.channel, latest))
        # Only compare to the previous step, not the best step so far
        # Another extension can be in charge of saving the best parameters ever
        # seen.We want to keep learning as long as we're making progress. We
        # don't want to give up on a step size just because it failed to undo
        # the damage of the bigger one that preceded it in a single epoch
        logger.info("Previous is {0}".format(self.prev))
        cur = algorithm.scale_step
        if latest >= self.prev:
            logger.info("Looks like using {0} "
                        "isn't working out so great for us.".format(cur))
            cur *= self.scale
            if cur < self.giveup_after:
                logger.info("Guess we just have to give up.")
                self.continue_learning = False
                cur = self.giveup_after
            logger.info("Let's see how {0} does.".format(cur))
            logger.info("Reloading saved params from last call")
            for p, v in safe_zip(model.get_params(), self.stored_values):
                p.set_value(v)
            latest = self.prev
        elif latest <= self.prev and self.scale_up != 1.:
            logger.info("Looks like we're making progress "
                        "on the validation set, let's try speeding up")
            cur *= self.scale_up
            if cur > self.max_scale:
                cur = self.max_scale
            logger.info("New scale is {0}".format(cur))
        algorithm.scale_step = cur
        self.monitor_channel.set_value(np.cast[config.floatX](cur))
        self.prev = latest
        self.stored_values = [
            param.get_value() for param in model.get_params()
        ]
Example #50
0
def load_model(model_paths, costs, batch_size=100):
    if type(costs) is not list:
        costs = len(model_paths) * [costs]
    model = {}
    model['layers'] = []
    model['costs'] = []
    model['comparative_costs'] = []
    model['weights'] = []
    model['encoders'] = []
    model['decoders'] = []
    for i, path in enumerate(model_paths):
        if os.path.isfile(path):
            model['layers'].append(serial.load(path))
            I = model['layers'][i].get_input_space().make_theano_batch(
                batch_size=batch_size)
            E = model['layers'][i].encode(I)
            model['encoders'].append(theano.function([I], E))

            H = model['layers'][i].get_output_space().make_theano_batch(
                batch_size=batch_size)
            D = model['layers'][i].decode(H)
            model['decoders'].append(theano.function([H], D))
            model['weights'].append(model['layers'][i].get_weights())

            data_specs = costs[i].get_data_specs(model['layers'][i])
            mapping = DataSpecsMapping(data_specs)
            space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
            source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
            # Build a flat tuple of Theano Variables, one for each space.
            # We want that so that if the same space/source is specified
            # more than once in data_specs, only one Theano Variable
            # is generated for it, and the corresponding value is passed
            # only once to the compiled Theano function.
            theano_args = []
            for space, source in safe_zip(space_tuple, source_tuple):
                arg = space.make_theano_batch(batch_size=batch_size)
                theano_args.append(arg)
            theano_args = tuple(theano_args)

            # Methods of `self.cost` need args to be passed in a format compatible
            # with data_specs
            nested_args = mapping.nest(theano_args)
            fixed_var_descr = costs[i].get_fixed_var_descr(
                model['layers'][i], nested_args)

            model['costs'].append(
                theano.function([nested_args],
                                costs[i].expr(model['layers'][i], nested_args,
                                              **fixed_var_descr.fixed_vars)))
            I2 = model['layers'][i].get_input_space().make_theano_batch(
                batch_size=batch_size)

            model['comparative_costs'].append(
                theano.function([I, I2], costs[i].costs[0].cost(I, I2)))
        else:
            sys.exit("Whoa. " + path + " isn't a thing I know about!")

    return model
Example #51
0
    def _set_shared(self, inputs):
        """
        .. todo::

            WRITEME
        """
        for elem, mask, shared in safe_zip(inputs, self._shared_mask, self._shared):
            if mask:
                shared.set_value(elem)
Example #52
0
    def test_image_dtype(self):
        expected_dtypes = ('uint8', 'float32')
        norbs = (NORB(which_set='train', which_norb='small'),
                 NORB(which_set='train',
                      which_norb='small',
                      image_dtype='float32'))

        for norb, expected_dtype in safe_zip(norbs, expected_dtypes):
            assert str(norb.X.dtype) == expected_dtype
Example #53
0
    def iterator(self,
                 mode=None,
                 batch_size=None,
                 num_batches=None,
                 topo=None,
                 targets=None,
                 rng=None,
                 data_specs=None,
                 return_tuple=False):
        """
        method inherited from Dataset
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets
        self._return_tuple = return_tuple
        if data_specs is None:
            data_specs = self._iter_data_specs

        # If there is a view_converter, we have to use it to convert
        # the stored data for "features" into one that the iterator
        # can return.
        # if
        self.conv_fn = lambda x: x.todense()
        space, source = data_specs
        if isinstance(space, CompositeSpace):
            sub_spaces = space.components
            sub_sources = source
        else:
            sub_spaces = (space, )
            sub_sources = (source, )

        convert = []
        for sp, src in safe_zip(sub_spaces, sub_sources):
            if src == 'features' or 'targets':
                conv_fn = self.conv_fn
            else:
                conv_fn = None

            convert.append(conv_fn)

        if mode is None:
            if hasattr(self, '_iter_subset_class'):
                mode = self._iter_subset_class
            else:
                raise ValueError('iteration mode not provided and no default '
                                 'mode set for %s' % str(self))
        else:
            mode = resolve_iterator_class(mode)

        return FiniteDatasetIterator(self,
                                     mode(self.X.shape[0], batch_size,
                                          num_batches, rng),
                                     data_specs=data_specs,
                                     return_tuple=return_tuple,
                                     convert=convert)
Example #54
0
    def after_step(self, model):
        """
        .. todo::

            WRITEME
        """
        if self.scale_step != 1:
            for param, value in safe_zip(self.params, self.value):
                value = (1.-self.scale_step) * value + self.scale_step * param.get_value()
                param.set_value(value)