예제 #1
0
 def add_channels_to_monitor(self, monitor, monitoring_dataset):
     monitor.add_channel(
             name='momentum',
             ipt=None,
             val=self.momentum,
             data_specs=(NullSpace(), ''),
             dataset=monitoring_dataset)
예제 #2
0
파일: sgd.py 프로젝트: sebastien-j/pylearn2
    def _setup_monitor(self):
        """
        Set up monitor to model the objective value, learning rate,
        momentum (if applicable), and extra channels defined by
        the cost.

        This method must be called after `learning_rule.get_updates`,
        since it may have an effect on `learning_rule.add_channels_to_monitor`
        (that is currently the case for `learning_rule.RMSProp`).
        """
        if self.monitoring_dataset is not None:
            if (self.monitoring_batch_size is None and
                    self.monitoring_batches is None):
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.monitoring_batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=self.learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)
예제 #3
0
파일: cost.py 프로젝트: lpigou/chalearn2014
    def get_data_specs(self, model):
        """
        .. todo::

            WRITEME
        """
        return (NullSpace(), '')
예제 #4
0
파일: cost.py 프로젝트: xuanhan863/pylearn2
    def __init__(self):
        """
        fixed_vars: maps string names to shared variables or some sort of data structure
                    surrounding shared variables.
                    Any learning algorithm that does multiple updates on the same minibatch
                    should pass fixed_vars to the cost's expr and get_gradient methods
                    as keyword arguments.
        """
        self.fixed_vars = {}
        """
        A list of callable objects that the learning algorithm should
        call with input data (formatted as self.data_specs) as appropriate
        whenever a new batch of data is loaded.
        This will update the shared variables mapped to by fixed_vars.

        TODO: figure out why on_load_batch uses _no_op instead of an
            empty list--either there is a reason and it should be
            documented, or there is not reason and it should just be
            an empty list.
        """
        self.on_load_batch = [_no_op]
        """
        A (space, source) pair describing the inputs of every function
        in self.on_load_batch.
        """
        self.data_specs = (NullSpace(), '')
 def add_channel(name, val):
     model.monitor.add_channel(
         name=self.channel_prefix + name,
         ipt=None,  # no input
         data_specs=(NullSpace(), ''),  # -> no input specs
         val=val,
         dataset=self.dataset,
     )
예제 #6
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="monitor_seconds_per_epoch",
                                               ipt=None,
                                               val=self.monitor_time,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:',
                                callbacks=[self.monitor_time.set_value]):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
예제 #7
0
    def get_monitoring_data_specs(self):
        """
        Get the data_specs describing the data for get_monitoring_channels.

        This implementation returns an empty data_specs, appropriate for
        when no monitoring channels are defined, or when none of the channels
        actually need data (for instance, if they only monitor functions
        of the model's parameters).
        """
        return (NullSpace(), '')
예제 #8
0
    def get_data_specs(self, model):
        """
        Provides an implementation of `Cost.expr`.

        Returns data specifications corresponding to not using any
        data at all.

        Parameters
        ----------
        model : pylearn2.models.Model
        """
        return (NullSpace(), '')
예제 #9
0
    def add_channels_to_monitor(self, monitor, monitoring_dataset):
        """
        The channels added are the min, mean, and max of the
        mean_square_grad of each parameter.
        """

        channel_mapping = {'_min': T.min, '_max': T.max, '_mean': T.mean}

        for mean_square_grad in self.mean_square_grads.values():
            for suffix, op in channel_mapping.items():
                monitor.add_channel(name=(mean_square_grad.name + suffix),
                                    ipt=None,
                                    val=op(mean_square_grad),
                                    data_specs=(NullSpace(), ''),
                                    dataset=monitoring_dataset)
        return
예제 #10
0
    def __init__(self, dim):
        """
        Parameters
        ----------
        dim : int
            Dimension of the random binary vector
        """
        self.dim = dim

        # Parameter initialization
        b_value = numpy.zeros(self.dim)
        self.b = sharedX(b_value, 'b')
        self.p = T.nnet.sigmoid(self.b)

        # Space initialization
        self.input_space = NullSpace()
        self.output_space = VectorSpace(dim=self.dim)
예제 #11
0
    def add_channels_to_monitor(self, monitor, monitoring_dataset):
        """
        Activates monitoring of the momentum.

        Parameters
        ----------
        monitor : pylearn2.monitor.Monitor
            Monitor object, to which the rule should register additional
            monitoring channels.
        monitoring_dataset : pylearn2.datasets.dataset.Dataset or dict
            Dataset instance or dictionary whose values are Dataset objects.
        """
        monitor.add_channel(name='momentum',
                            ipt=None,
                            val=self.momentum,
                            data_specs=(NullSpace(), ''),
                            dataset=monitoring_dataset)
예제 #12
0
        def get_expected_error(space, dtype):
            """
            If calling space.dtype = dtype is expected to throw an exception,
            this returns (exception_class, exception_message).

            If no exception is to be expected, this returns (None, None).
            """
            if isinstance(space, CompositeSpace):
                if isinstance(dtype, tuple):
                    if len(space.components) != len(dtype):
                        return ValueError, "Argument 0 has length "

                    for s, d in safe_zip(space.components, dtype):
                        error, message = get_expected_error(s, d)
                        if error is not None:
                            return error, message
                else:
                    for s in space.components:
                        error, message = get_expected_error(s, dtype)
                        if error is not None:
                            return error, message

                return None, None

            if isinstance(space, SimplyTypedSpace):
                if not any((dtype is None, dtype == 'floatX', dtype
                            in all_scalar_dtypes)):
                    return (TypeError,
                            'Unrecognized value "%s" (type %s) for dtype arg' %
                            (dtype, type(dtype)))

                return None, None

            if isinstance(space, NullSpace):
                nullspace_dtype = NullSpace().dtype
                if dtype != nullspace_dtype:
                    return (TypeError,
                            'NullSpace can only take the bogus dtype "%s"' %
                            nullspace_dtype)

                return None, None

            raise NotImplementedError("%s not yet supported by this test" %
                                      type(space))
예제 #13
0
파일: cost.py 프로젝트: xuanhan863/pylearn2
 def get_data_specs(self, model):
     # This cost does not use any data
     return (NullSpace(), '')
예제 #14
0
    def add_channel(self,
                    name,
                    ipt,
                    val,
                    dataset=None,
                    prereqs=None,
                    data_specs=None):
        """
        Asks the monitor to start tracking a new value.  Can be called even
        after the monitor is already in use.

        Parameters
        ----------
        name : str
            The display name in the monitor.
        ipt : tensor_like
            The symbolic tensor which should be clamped to the data. \
            (or a list/tuple containing symbolic tensors, following the \
            data_specs)
        val : tensor_like
            The value (function of `ipt`) to be tracked.
        dataset : pylearn2.datasets.Dataset
            Which dataset to compute this channel on
        prereqs : list of callables that take a list of numpy tensors
            Each prereq must be called exactly once per each new batch of \
            data drawn *from dataset* before the channel value is computed \
            if two channels provide a prereq with exactly the same id, that \
            prereq will only be called once
        data_specs : (space, source) pair
            Identifies the order, format and semantics of ipt
        """
        if isinstance(val, (float, int, long)):
            val = np.cast[theano.config.floatX](val)

        val = T.as_tensor_variable(val)

        if data_specs is None:
            warnings.warn("parameter 'data_specs' should be provided when " +
                          "calling add_channel. We will build a default one.",
                          stacklevel=2)
            if isinstance(ipt, list):
                ipt = tuple(ipt)
            if ipt is not None and not isinstance(ipt, tuple):
                ipt = (ipt, )

            if ipt is None:
                data_specs = (NullSpace(), '')
            elif len(ipt) == 0:
                data_specs = (CompositeSpace([]), ())
            elif hasattr(dataset, 'get_data_specs'):
                dataset_space, dataset_source = dataset.get_data_specs()
                if (len(ipt) == 1 and dataset_source is not None
                        and (not isinstance(dataset_source, tuple)
                             or len(dataset_source) == 1)
                        and 'features' in dataset_source):
                    data_specs = (dataset_space, dataset_source)
                elif (len(ipt) == 2
                      and dataset_source == ('features', 'targets')):
                    data_specs = (dataset_space, dataset_source)
                else:
                    raise ValueError("Cannot infer default data_specs for " +
                                     "the following input points and " +
                                     "dataset: ipt = %s, dataset = %s" %
                                     (ipt, dataset))

        data_specs[0].validate(ipt)

        mapping = DataSpecsMapping(data_specs)
        flat_ipt = mapping.flatten(ipt)
        if not isinstance(flat_ipt, tuple):
            flat_ipt = (flat_ipt, )
        inputs = theano.gof.graph.inputs([val])
        for elem in inputs:
            if not hasattr(elem, 'get_value') and \
               not isinstance(elem, theano.gof.graph.Constant):
                if elem not in flat_ipt:
                    raise ValueError("Unspecified input: " + str(elem) +
                                     ". This may be due to an incorrect " +
                                     "implementation of a cost's " +
                                     "get_data_specs() method, or of a " +
                                     "model's get_monitoring_data_specs() " +
                                     "method.")

        mode = self.theano_function_mode
        if mode is not None and hasattr(mode, 'record'):
            mode.record.handle_line('Adding monitor channel ' + name + '\n')
            assert isinstance(flat_ipt, tuple)
            if len(flat_ipt) != 1:
                for elem in flat_ipt:
                    mode.record.handle_line('Includes input var ' +
                                            var_descriptor(elem) + '\n')
            else:
                mode.record.handle_line(name + ' input var is ' +
                                        var_descriptor(flat_ipt[0]) + '\n')
            mode.record.handle_line('channel ' + name + ' is ' +
                                    var_descriptor(val) + '\n')

        if dataset is None:
            if len(self._datasets) == 1:
                dataset = self._datasets[0]
            elif len(self._datasets) == 0:
                raise ValueError(_err_no_data)
            else:
                raise ValueError(_err_ambig_data)

        try:
            self._datasets.index(dataset)
        except ValueError:
            raise ValueError("The dataset specified is not one of the " +
                             "monitor's datasets")

        if name in self.channels:
            raise ValueError("Tried to create the same channel twice (%s)" %
                             name)

        self.channels[name] = MonitorChannel(ipt, val, name, data_specs,
                                             dataset, prereqs)
        self._dirty = True
예제 #15
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [
            param for param in model.get_params()
            if np.any(np.isinf(param.get_value()))
        ]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: " + str(inf_params))
        if any([
                np.any(np.isnan(param.get_value()))
                for param in model.get_params()
        ]):
            nan_params = [
                param for param in model.get_params()
                if np.any(np.isnan(param.get_value()))
            ]
            raise ValueError("These params are NaN: " + str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        if getattr(model, "force_batch_size", False) and \
           any(dataset.get_design_matrix().shape[0] % self.batch_size != 0 for
               dataset in self.monitoring_dataset.values()) and \
           not has_uniform_batch_size(self.monitor_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set monitor_iteration_mode to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    **fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            if (self.monitoring_batch_size is None
                    and self.monitoring_batches is None):
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.monitoring_batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                    self.monitor, monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i
        self.params = params

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 **fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(
                str(type(self.cost)) + ".get_gradients returned " +
                "something with" + str(type(grads)) + "as its " +
                "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        assert len(updates.keys()) == 0

        def get_func(learn_discriminator,
                     learn_generator,
                     dont_you_fucking_dare_touch_the_generator=False):

            updates = OrderedDict()

            assert (learn_discriminator or learn_generator
                    ) and not (learn_discriminator and learn_generator)

            if learn_discriminator:
                cur_params = model.discriminator.get_params()
            else:
                cur_params = model.generator.get_params()

            def check():
                for param in params:
                    if param not in cur_params:
                        assert param not in updates

            cur_grads = OrderedDict()
            for param in cur_params:
                cur_grads[param] = grads[param]

            for param in grads:
                if grads[param].name is None and cost_value is not None:
                    grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                         {
                                             'costname': cost_value.name,
                                             'paramname': param.name
                                         })
                assert grads[param].dtype == param.dtype

            cur_lr_scalers = OrderedDict()
            for param in cur_params:
                if param in lr_scalers:
                    lr_scaler = lr_scalers[param]
                    cur_lr_scalers[param] = lr_scaler

            log.info('Parameter and initial learning rate summary:')
            for param in cur_params:
                param_name = param.name
                if param_name is None:
                    param_name = 'anon_param'
                lr = learning_rate.get_value() * cur_lr_scalers.get(param, 1.)
                log.info('\t' + param_name + ': ' + str(lr))

            updates.update(
                self.learning_rule.get_updates(learning_rate, cur_grads,
                                               cur_lr_scalers))
            check()

            for param in cur_params:
                if updates[param].name is None:
                    updates[param].name = 'sgd_update(' + param.name + ')'
            check()
            model.modify_updates(updates)
            check()
            for param in cur_params:
                update = updates[param]
                if update.name is None:
                    update.name = 'censor(sgd_update(' + param.name + '))'
                for update_val in get_debug_values(update):
                    if np.any(np.isinf(update_val)):
                        raise ValueError("debug value of %s contains infs" %
                                         update.name)
                    if np.any(np.isnan(update_val)):
                        raise ValueError("debug value of %s contains nans" %
                                         update.name)

            check()

            if dont_you_fucking_dare_touch_the_generator:
                for param in model.generator.get_params():
                    assert param not in updates

            with log_timing(log, 'Compiling sgd_update'):
                return function(theano_args,
                                updates=updates,
                                name='sgd_update',
                                on_unused_input='ignore',
                                mode=self.theano_function_mode)

        self.d_func = get_func(1,
                               0,
                               dont_you_fucking_dare_touch_the_generator=True)
        self.g_func = get_func(0, 1)
예제 #16
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
예제 #17
0
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model : object
            A Python object representing the model to train loosely \
            implementing the interface of models.model.Model.
        dataset : pylearn2.datasets.dataset.Dataset
            Dataset object used to draw training data
        """
        self.model = model

        if self.cost is None:
            self.cost = model.get_default_cost()

        if self.batch_size is None:
            self.batch_size = model.force_batch_size
        else:
            batch_size = self.batch_size
            if self.set_batch_size:
                model.set_batch_size(batch_size)
            elif hasattr(model, 'force_batch_size'):
                if not (model.force_batch_size <= 0
                        or batch_size == model.force_batch_size):
                    raise ValueError("batch_size is %d but " +
                                     "model.force_batch_size is %d" %
                                     (batch_size, model.force_batch_size))

        self.monitor = Monitor.get_monitor(model)
        self.monitor.set_theano_function_mode(self.theano_function_mode)

        data_specs = self.cost.get_data_specs(model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space,
        # named according to the sources.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = 'BGD_[%s]' % source
            arg = space.make_theano_batch(name=name)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with their data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    **fixed_var_descr.fixed_vars)
        grads, grad_updates = self.cost.get_gradients(
            model, nested_args, **fixed_var_descr.fixed_vars)

        assert isinstance(grads, OrderedDict)
        assert isinstance(grad_updates, OrderedDict)

        if cost_value is None:
            raise ValueError("BGD is incompatible with " + str(self.cost) +
                             " because it is intractable, but BGD uses the " +
                             "cost function value to do line searches.")

        # obj_prereqs has to be a list of function f called with f(*data),
        # where data is a data tuple coming from the iterator.
        # this function enables capturing "mapping" and "f", while
        # enabling the "*data" syntax
        def capture(f, mapping=mapping):
            new_f = lambda *args: f(mapping.flatten(args, return_tuple=True))
            return new_f

        obj_prereqs = [capture(f) for f in fixed_var_descr.on_load_batch]

        if self.monitoring_dataset is not None:
            if (self.monitoring_batch_size is None
                    and self.monitoring_batches is None):
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.monitoring_batch_size,
                               num_batches=self.monitoring_batches,
                               obj_prereqs=obj_prereqs,
                               cost_monitoring_args=fixed_var_descr.fixed_vars)

        params = model.get_params()

        self.optimizer = BatchGradientDescent(
            objective=cost_value,
            gradients=grads,
            gradient_updates=grad_updates,
            params=params,
            param_constrainers=[model.censor_updates],
            lr_scalers=model.get_lr_scalers(),
            inputs=theano_args,
            verbose=self.verbose_optimization,
            max_iter=self.updates_per_batch,
            reset_alpha=self.reset_alpha,
            conjugate=self.conjugate,
            reset_conjugate=self.reset_conjugate,
            min_init_alpha=self.min_init_alpha,
            line_search_mode=self.line_search_mode,
            theano_function_mode=self.theano_function_mode,
            init_alpha=self.init_alpha)

        # These monitoring channels keep track of shared variables,
        # which do not need inputs nor data.
        if self.monitoring_dataset is not None:
            self.monitor.add_channel(
                name='ave_step_size',
                ipt=None,
                val=self.optimizer.ave_step_size,
                data_specs=(NullSpace(), ''),
                dataset=self.monitoring_dataset.values()[0])
            self.monitor.add_channel(
                name='ave_grad_size',
                ipt=None,
                val=self.optimizer.ave_grad_size,
                data_specs=(NullSpace(), ''),
                dataset=self.monitoring_dataset.values()[0])
            self.monitor.add_channel(
                name='ave_grad_mult',
                ipt=None,
                val=self.optimizer.ave_grad_mult,
                data_specs=(NullSpace(), ''),
                dataset=self.monitoring_dataset.values()[0])

        self.first = True
        self.bSetup = True
예제 #18
0
    def main_loop(self, time_budget=None):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.

        Parameters
        ----------
        time_budget : int, optional
            The maximum number of seconds before interrupting
            training. Default is `None`, no time limit.
        """
        t0 = datetime.now()
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.model.monitor.time_budget_exceeded = False
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return " +
                                     "anything. Use Model.continue_learning " +
                                     "to control whether learning continues.")
                self.model.monitor.report_epoch()
                extension_continue = self.run_callbacks_and_monitoring()
                freq = self.save_freq
                if freq > 0 and self.model.monitor.epochs_seen % freq == 0:
                    self.save()
                continue_learning = (self.model.continue_learning()
                                     and extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets) > 0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.training_seconds.__doc__ = """\
The number of seconds that were spent in actual training during the most
recent epoch. This excludes seconds that were spent running callbacks for
the extensions, computing monitoring channels, etc."""
                self.model.monitor.add_channel(
                    name="training_seconds_this_epoch",
                    ipt=None,
                    val=self.training_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
                self.total_seconds.__doc__ = """\
The number of seconds that were spent on the entirety of processing for the
previous epoch. This includes not only training but also the computation of
the monitoring channels, running TrainExtension callbacks, etc. This value
is reported for the *previous* epoch because the amount of time spent on
monitoring for this epoch is not known until the monitoring channels have
already been reported."""
                self.model.monitor.add_channel(
                    name="total_seconds_last_epoch",
                    ipt=None,
                    val=self.total_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                with log_timing(log,
                                None,
                                level=logging.DEBUG,
                                callbacks=[self.total_seconds.set_value]):
                    with log_timing(
                            log,
                            None,
                            final_msg='Time this epoch:',
                            callbacks=[self.training_seconds.set_value]):
                        rval = self.algorithm.train(dataset=self.dataset)
                    if rval is not None:
                        raise ValueError("TrainingAlgorithm.train should not "
                                         "return anything. Use "
                                         "TrainingAlgorithm.continue_learning "
                                         "to control whether learning "
                                         "continues.")
                    self.model.monitor.report_epoch()
                    extension_continue = self.run_callbacks_and_monitoring()
                    if self.save_freq > 0 and \
                       self.model.monitor._epochs_seen % self.save_freq == 0:
                        self.save()
                continue_learning = (self.algorithm.continue_learning(
                    self.model) and extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
 def add_channels_to_monitor(self, monitor, monitoring_dataset):
 
     channel_mapping = {
         '_min': T.min,
         '_max': T.max,
         '_mean': T.mean
     }
     
     for mean_update in self.mean_updates.values():
         if mean_update.ndim == 4:
             # rank-4 tensor (assuming stack of rank-3 convolutional kernels)
             knl_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=(1,2,3)))
             for suffix, op in channel_mapping.items():
                 monitor.add_channel(
                     name=(mean_update.name + "_kernel_norm" + suffix),
                     ipt=None,
                     val=op(knl_norm_vals),
                     data_specs=(NullSpace(), ''),
                     dataset=monitoring_dataset)
         elif mean_update.ndim == 3:
             # rank-3 tensor (assuming stack of rank-2 conv layer biases)
             knl_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=(1,2)))
             for suffix, op in channel_mapping.items():
                 monitor.add_channel(
                     name=(mean_update.name + "_norm" + suffix),
                     ipt=None,
                     val=op(knl_norm_vals),
                     data_specs=(NullSpace(), ''),
                     dataset=monitoring_dataset)
         elif mean_update.ndim == 2:
             # rank-2 tensor (matrix)
             col_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=0))
             row_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=1))
             mtx_norm_val = T.sqrt(T.sum(T.sqr(mean_update)))        
             for suffix, op in channel_mapping.items():
                 monitor.add_channel(
                     name=(mean_update.name + "_col_norm" + suffix),
                     ipt=None,
                     val=op(col_norm_vals),
                     data_specs=(NullSpace(), ''),
                     dataset=monitoring_dataset)
                 monitor.add_channel(
                     name=(mean_update.name + "_row_norm" + suffix),
                     ipt=None,
                     val=op(row_norm_vals),
                     data_specs=(NullSpace(), ''),
                     dataset=monitoring_dataset)
             monitor.add_channel(
                 name=(mean_update.name + "_norm"),
                 ipt=None,
                 val=mtx_norm_val,
                 data_specs=(NullSpace(), ''),
                 dataset=monitoring_dataset)
         elif mean_update.ndim == 1:
             # rank-1 tensor (vector)
             norm_val = T.sqrt(T.sum(T.sqr(mean_update), axis=0))
             monitor.add_channel(
                 name=(mean_update.name + "_norm"),
                 ipt=None,
                 val=norm_val,
                 data_specs=(NullSpace(), ''),
                 dataset=monitoring_dataset)
         elif mean_update.ndim == 0:
             # rank-0 tensor (scalar)
             monitor.add_channel(
                 name=(mean_update.name + "_norm"),
                 ipt=None,
                 val=mean_update,
                 data_specs=(NullSpace(), ''),
                 dataset=monitoring_dataset)                
         else:
             # not sure which axes to sum over in this case
             raise ValueError(
                 'Mean update {0} has unexpected number of dimensions {1} ({2})'
                 .format(mean_update, mean_update.ndim, mean_update.shape))
                 
     self.base.add_channels_to_monitor(monitor, monitoring_dataset)
     
     return