Beispiel #1
0
    def __init__(self, inputs=None, outputs=None, function=None, **kwargs):
        """
        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer.
            `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`.
            The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape
            element is an integer representing the size for its dimension, or None if the shape isn't known.
            For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would
            be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : List of [int or shape tuple]
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        function : theano expression, optional
            If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is
            mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container`
            object, such as a :class:`opendeep.models.Prototype`.
        """
        self._classname = self.__class__.__name__
        self.inputs = raise_to_list(inputs)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.function = function
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)
        self.args['inputs'] = self.inputs
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        if self.function is not None:
            self.args['function'] = self.function
        # Don't know the position of switches!
        self.switches_on = None

        log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
    def __init__(self, inputs=None, outputs=None, function=None, **kwargs):
        """
        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer.
            `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`.
            The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape
            element is an integer representing the size for its dimension, or None if the shape isn't known.
            For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would
            be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : List of [int or shape tuple]
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        function : theano expression, optional
            If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is
            mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container`
            object, such as a :class:`opendeep.models.Prototype`.
        """
        self._classname = self.__class__.__name__
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.function = function
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)
        self.args['inputs'] = self.inputs
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        if self.function is not None:
            self.args['function'] = self.function
        # Don't know the position of switches!
        self.switches_on = None

        log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
Beispiel #3
0
    def __init__(self, dataset, loss=None, model=None,
                 epochs=1000, batch_size=100, min_batch_size=1,
                 save_freq=10, stop_threshold=None, stop_patience=50,
                 learning_rate=1e-3, lr_decay=None, lr_decay_factor=None,
                 grad_clip=None, hard_clip=False,
                 **kwargs):
        """
        Initialize the Optimizer.

        Parameters
        ----------
        dataset : Dataset
            The :class:`opendeep.data.Dataset` to use when training the Model.
        loss : Loss
            The :class:`opendeep.optimization.loss.Loss` function to compare the model to a 'target' result.
        model : Model
            The :class:`opendeep.models.Model` to train. Needed if the Optimizer isn't being passed to a
            Model's .train() method.
        epochs : int
            How many training iterations over the dataset to go.
        batch_size : int
            How many examples from the training dataset to use in parallel.
        min_batch_size : int
            The minimum number of examples required at a time (for things like time series, this would be > 1).
        save_freq : int, optional
            How many epochs to train between each new save of the Model's parameters.
        stop_threshold : float, optional
            The factor by how much the best validation training score needs to improve to determine early stopping.
        stop_patience : int, optional
            The patience or number of epochs to wait after the stop_threshold has been reached before stopping.
        learning_rate : float
            The multiplicative amount to adjust parameters based on their gradient values.
        lr_decay : str
            The decay function to use for changing the learning rate over epochs. See
            `opendeep.utils.decay` for classes of decay and documentation.
        lr_decay_factor : float
            The amount of decay to use for the ``lr_decay`` type of decay.
        grad_clip : float, optional
            Whether to clip gradients. This will clip the norm of the gradients either with a hard cutoff or rescaling.
        hard_clip : bool
            Whether to use a hard cutoff or rescaling for clipping gradients.
        """
        log.info("Initializing optimizer %s", str(self.__class__.__name__))

        # Deal with early stopping None initializations (no early stopping).
        if not stop_threshold:
            stop_threshold = numpy.inf
        if not save_freq:
            save_freq = 1000000
        if not stop_patience:
            stop_patience = 1

        # Put all init parameters in self.args so we can log the initial configuration.
        self.args = locals().copy()
        self.args.pop('self')
        kwargs = self.args.pop('kwargs')
        self.args = add_kwargs_to_dict(kwargs, self.args)
        # log the arguments
        log.info("Optimizer config args: %s", str(self.args))
        # if the optimizer wasn't initialized with a Model (train() being called from the model class itself),
        # just return. (This seems kinda hacky but hey, people wanted .train() to happen from Model and there
        # wasn't really a better way unless the epoch looping logic was in that method for Model. That wasn't
        # the best option because other methods besides stochastic ones can exist for optimizers in the future.
        # TODO: fix this up - feels like a hack just to make model.train() work...
        if not model:
            return
        # Otherwise, things are proceeding as normal. Carry on...

        assert isinstance(model, Model), "Optimizer input model needs to be a Model class! " \
                                         "Found %s" % str(model.__class__.__name__)
        assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be a Dataset class! " \
                                             "Found %s" % str(dataset.__class__.__name__)
        # deal with loss expression/targets
        if loss is not None:
            assert isinstance(loss, Loss), "Optimizer input loss needs to be a Loss class! " \
                                           "Found %s" % str(loss.__class__.__name__)
        if isinstance(loss, Loss):
            self.loss_targets = loss.get_targets()
            self.loss_expression = loss.get_loss()
        else:
            assert model.get_loss() is not None, "No Loss specified, and the model does not have one implemented."
            if isinstance(model.get_loss(), tuple):
                self.loss_targets = raise_to_list(model.get_loss()[0])
                self.loss_expression = model.get_loss()[1]
            else:
                self.loss_targets = None
                self.loss_expression = model.get_loss()

        model_inputs = raise_to_list(model.get_inputs())
        n_model_inputs = len(model_inputs)

        model_targets = self.loss_targets or []
        for input in model_inputs:
            if input in model_targets:
                model_targets.remove(input)

        n_model_targets = len(model_targets)
        self.unsupervised = (n_model_targets is 0)
        # make sure the number of inputs/targets matches up with the dataset properties
        # train
        assert n_model_inputs == len(raise_to_list(dataset.train_inputs)), \
            "Dataset has %d train inputs, while model expects %d" % \
            (len(raise_to_list(dataset.train_inputs)), n_model_inputs)
        if not self.unsupervised:
            assert n_model_targets == len(raise_to_list(dataset.train_targets) or []), \
                "Dataset has %d train targets, while model expects %d" % \
                (len(raise_to_list(dataset.train_targets) or []), n_model_targets)
        # valid
        if dataset.valid_inputs is not None:
            assert n_model_inputs == len(raise_to_list(dataset.valid_inputs)), \
                "Dataset has %d valid inputs, while model expects %d" % \
                (len(raise_to_list(dataset.valid_inputs)), n_model_inputs)
            if not self.unsupervised:
                assert n_model_targets == len(raise_to_list(dataset.valid_targets) or []), \
                    "Dataset has %d valid targets, while model expects %d" % \
                    (len(raise_to_list(dataset.valid_targets) or []), n_model_targets)
        # test
        if dataset.test_inputs is not None:
            assert n_model_inputs == len(raise_to_list(dataset.test_inputs)), \
                "Dataset has %d test inputs, while model expects %d" % \
                (len(raise_to_list(dataset.test_inputs)), n_model_inputs)
            if not self.unsupervised:
                assert n_model_targets == len(raise_to_list(dataset.test_targets) or []), \
                    "Dataset has %d test targets, while model expects %d" % \
                    (len(raise_to_list(dataset.test_targets) or []), n_model_targets)

        # now we are happy, we can add them to `self`
        self.model = model
        self.dataset = dataset
        self.loss = loss

        # Learning rate - how drastic of a step do the parameters change
        self.learning_rate = sharedX(learning_rate, 'learning_rate')
        # whether to scale individual model parameters' learning rates.
        self.lr_scalers = self.model.get_lr_scalers()
        # whether to decay
        if lr_decay:
            self.learning_rate_decay = get_decay_function(lr_decay,
                                                          self.learning_rate,
                                                          learning_rate,
                                                          lr_decay_factor)
        else:
            self.learning_rate_decay = False

        # rest of initial parameters needed for training.
        self.batch_size = batch_size
        self.min_batch_size = min_batch_size
        self.n_epoch = epochs
        self.save_frequency = save_freq
        self.early_stop_threshold = stop_threshold
        self.early_stop_length = stop_patience
        self.grad_clip = grad_clip
        self.hard_clip = hard_clip
Beispiel #4
0
    def __init__(self, inputs=None, hiddens=None, outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens Theano symbolic expressions
        or variables (if applicable) to set your inputs and hidden representation in a modular fashion,
        allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace
        the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the
        default model hidden representation (which means you need to adapt creating your computation graph
        to not care about the inputs and to instead run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces
            the tuple with zip(Model.output_size, Model.get_outputs()).
        hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape
            information is the same format as the monad for `inputs`.
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.params = params or {}
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Beispiel #5
0
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 input_size=None, output_size=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs_hook is a tuple of (shape, variable) that should replace the default model inputs.
        hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        input_size : int or shape tuple
            The dimensionality of the input for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer.
        output_size : int or shape tuple
            The dimensionality of the output for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and output_size.
        """
        log.info("Creating a new instance of %s", str(type(self)))

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs_hook  = inputs_hook
        self.hiddens_hook = hiddens_hook
        self.params_hook  = params_hook
        self.input_size   = input_size
        self.output_size  = output_size
        self.outdir       = outdir

        # make sure outdir ends in a directory separator
        if self.outdir and self.outdir[-1] != os.sep:
            self.outdir += os.sep

        # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists.
        if self.inputs_hook and self.inputs_hook[0] is not None:
            self.input_size = self.inputs_hook[0]

        # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error
        # or it could be during the automatic stacking in a Container. Since that is a common use case, set
        # the input_size to 1 to avoid errors when instantiating the model.
        if not self.input_size:
            # Could be error, or more commonly, when adding models to a Container
            log.warning("No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size"
                        "=1 for the Container now...")
            self.input_size = 1

        # Also, check if no output_size was given - this could be the case for generative models. Copy input_size
        # in that case.
        if not self.output_size:
            # Could be an error (hopefully not), so give the warning.
            log.warning("No output_size given! Make sure this is from a generative model (where output_size is the "
                        "same as input_size. Setting output_size=input_size now...")
            self.output_size = self.input_size

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['input_size']  = self.input_size
        self.args['output_size'] = self.output_size

        # Now create the directory for outputs of the model
        # set up base path for the outputs of the model during training, etc.
        self.args['outdir'] = self.outdir
        if self.args['outdir']:
            mkdir_p(self.args['outdir'])

        # log the arguments.
        log.info("%s self.args: %s", str(type(self)), str(self.args))
        # save the arguments.
        self.save_args()
Beispiel #6
0
    def __init__(self,
                 inputs=None,
                 hiddens=None,
                 outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens Theano symbolic expressions
        or variables (if applicable) to set your inputs and hidden representation in a modular fashion,
        allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace
        the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the
        default model hidden representation (which means you need to adapt creating your computation graph
        to not care about the inputs and to instead run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces
            the tuple with zip(Model.output_size, Model.get_outputs()).
        hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape
            information is the same format as the monad for `inputs`.
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(
                        input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.params = params or {}
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Beispiel #7
0
    def __init__(self, dataset, loss=None, model=None,
                 epochs=1000, batch_size=100, min_batch_size=1,
                 save_freq=10, stop_threshold=None, stop_patience=50,
                 learning_rate=1e-3, lr_decay=None, lr_decay_factor=None,
                 grad_clip=None, hard_clip=False,
                 **kwargs):
        """
        Initialize the Optimizer.

        Parameters
        ----------
        dataset : Dataset
            The :class:`opendeep.data.Dataset` to use when training the Model.
        loss : Loss
            The :class:`opendeep.optimization.loss.Loss` function to compare the model to a 'target' result.
        model : Model
            The :class:`opendeep.models.Model` to train. Needed if the Optimizer isn't being passed to a
            Model's .train() method.
        epochs : int
            How many training iterations over the dataset to go.
        batch_size : int
            How many examples from the training dataset to use in parallel.
        min_batch_size : int
            The minimum number of examples required at a time (for things like time series, this would be > 1).
        save_freq : int, optional
            How many epochs to train between each new save of the Model's parameters.
        stop_threshold : float, optional
            The factor by how much the best validation training score needs to improve to determine early stopping.
        stop_patience : int, optional
            The patience or number of epochs to wait after the stop_threshold has been reached before stopping.
        learning_rate : float
            The multiplicative amount to adjust parameters based on their gradient values.
        lr_decay : str
            The decay function to use for changing the learning rate over epochs. See
            `opendeep.utils.decay` for classes of decay and documentation.
        lr_decay_factor : float
            The amount of decay to use for the ``lr_decay`` type of decay.
        grad_clip : float, optional
            Whether to clip gradients. This will clip the norm of the gradients either with a hard cutoff or rescaling.
        hard_clip : bool
            Whether to use a hard cutoff or rescaling for clipping gradients.
        """
        log.info("Initializing optimizer %s", str(self.__class__.__name__))

        # Deal with early stopping None initializations (no early stopping).
        if not stop_threshold:
            stop_threshold = numpy.inf
        if not save_freq:
            save_freq = 1000000
        if not stop_patience:
            stop_patience = 1

        # Put all init parameters in self.args so we can log the initial configuration.
        self.args = locals().copy()
        self.args.pop('self')
        kwargs = self.args.pop('kwargs')
        self.args = add_kwargs_to_dict(kwargs, self.args)
        # log the arguments
        log.info("Optimizer config args: %s", str(self.args))
        # if the optimizer wasn't initialized with a Model (train() being called from the model class itself),
        # just return. (This seems kinda hacky but hey, people wanted .train() to happen from Model and there
        # wasn't really a better way unless the epoch looping logic was in that method for Model. That wasn't
        # the best option because other methods besides stochastic ones can exist for optimizers in the future.
        # TODO: fix this up - feels like a hack just to make model.train() work...
        if not model:
            return
        # Otherwise, things are proceeding as normal. Carry on...

        assert isinstance(model, Model), "Optimizer input model needs to be a Model class! " \
                                         "Found %s" % str(model.__class__.__name__)
        assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be a Dataset class! " \
                                             "Found %s" % str(dataset.__class__.__name__)
        # deal with loss expression/targets
        if loss is not None:
            assert isinstance(loss, Loss), "Optimizer input loss needs to be a Loss class! " \
                                           "Found %s" % str(loss.__class__.__name__)
        if isinstance(loss, Loss):
            self.loss_targets = loss.get_targets()
            self.loss_expression = loss.get_loss()
        else:
            assert model.get_loss() is not None, "No Loss specified, and the model does not have one implemented."
            if isinstance(model.get_loss(), tuple):
                self.loss_targets = raise_to_list(model.get_loss()[0])
                self.loss_expression = model.get_loss()[1]
            else:
                self.loss_targets = None
                self.loss_expression = model.get_loss()

        model_inputs = raise_to_list(model.get_inputs())
        n_model_inputs = len(model_inputs)

        model_targets = self.loss_targets or []
        for input in model_inputs:
            if input in model_targets:
                model_targets.remove(input)

        n_model_targets = len(model_targets)
        self.unsupervised = (n_model_targets is 0)
        # make sure the number of inputs/targets matches up with the dataset properties
        # train
        assert n_model_inputs == len(raise_to_list(dataset.train_inputs)), \
            "Dataset has %d train inputs, while model expects %d" % \
            (len(raise_to_list(dataset.train_inputs)), n_model_inputs)
        if not self.unsupervised:
            assert n_model_targets == len(raise_to_list(dataset.train_targets) or []), \
                "Dataset has %d train targets, while model expects %d" % \
                (len(raise_to_list(dataset.train_targets) or []), n_model_targets)
        # valid
        if dataset.valid_inputs is not None:
            assert n_model_inputs == len(raise_to_list(dataset.valid_inputs)), \
                "Dataset has %d valid inputs, while model expects %d" % \
                (len(raise_to_list(dataset.valid_inputs)), n_model_inputs)
            if not self.unsupervised:
                assert n_model_targets == len(raise_to_list(dataset.valid_targets) or []), \
                    "Dataset has %d valid targets, while model expects %d" % \
                    (len(raise_to_list(dataset.valid_targets) or []), n_model_targets)
        # test
        if dataset.test_inputs is not None:
            assert n_model_inputs == len(raise_to_list(dataset.test_inputs)), \
                "Dataset has %d test inputs, while model expects %d" % \
                (len(raise_to_list(dataset.test_inputs)), n_model_inputs)
            if not self.unsupervised:
                assert n_model_targets == len(raise_to_list(dataset.test_targets) or []), \
                    "Dataset has %d test targets, while model expects %d" % \
                    (len(raise_to_list(dataset.test_targets) or []), n_model_targets)

        # now we are happy, we can add them to `self`
        self.model = model
        self.dataset = dataset
        self.loss = loss

        # Learning rate - how drastic of a step do the parameters change
        self.learning_rate = sharedX(learning_rate, 'learning_rate')
        # whether to scale individual model parameters' learning rates.
        self.lr_scalers = self.model.get_lr_scalers()
        # whether to decay
        if lr_decay:
            self.learning_rate_decay = get_decay_function(lr_decay,
                                                          self.learning_rate,
                                                          learning_rate,
                                                          lr_decay_factor)
        else:
            self.learning_rate_decay = False

        # rest of initial parameters needed for training.
        self.batch_size = batch_size
        self.min_batch_size = min_batch_size
        self.n_epoch = epochs
        self.save_frequency = save_freq
        self.early_stop_threshold = stop_threshold
        self.early_stop_length = stop_patience
        self.grad_clip = grad_clip
        self.hard_clip = hard_clip
Beispiel #8
0
    def __init__(self,
                 inputs_hook=None,
                 hiddens_hook=None,
                 params_hook=None,
                 input_size=None,
                 output_size=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs_hook is a tuple of (shape, variable) that should replace the default model inputs.
        hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        input_size : int or shape tuple
            The dimensionality of the input for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer.
        output_size : int or shape tuple
            The dimensionality of the output for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and output_size.
        """
        log.info("Creating a new instance of %s", str(type(self)))

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs_hook = inputs_hook
        self.hiddens_hook = hiddens_hook
        self.params_hook = params_hook
        self.input_size = input_size
        self.output_size = output_size
        self.outdir = outdir

        # make sure outdir ends in a directory separator
        if self.outdir and self.outdir[-1] != os.sep:
            self.outdir += os.sep

        # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists.
        if self.inputs_hook and self.inputs_hook[0] is not None:
            self.input_size = self.inputs_hook[0]

        # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error
        # or it could be during the automatic stacking in a Container. Since that is a common use case, set
        # the input_size to 1 to avoid errors when instantiating the model.
        if not self.input_size:
            # Could be error, or more commonly, when adding models to a Container
            log.warning(
                "No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size"
                "=1 for the Container now...")
            self.input_size = 1

        # Also, check if no output_size was given - this could be the case for generative models. Copy input_size
        # in that case.
        if not self.output_size:
            # Could be an error (hopefully not), so give the warning.
            log.warning(
                "No output_size given! Make sure this is from a generative model (where output_size is the"
                "same as input_size. Setting output_size=input_size now...")
            self.output_size = self.input_size

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['output_size'] = self.output_size

        # Now create the directory for outputs of the model
        # set up base path for the outputs of the model during training, etc.
        self.args['outdir'] = self.outdir
        if self.args['outdir']:
            mkdir_p(self.args['outdir'])

        # log the arguments.
        log.info("%s self.args: %s", str(type(self)), str(self.args))
        # save the arguments.
        self.save_args()
Beispiel #9
0
    def __init__(self, inputs=None, hiddens=None, outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens SharedVariables (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs can have a tuple of (shape, variable) that should replace the default model inputs.
        hiddens can have a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None]
            The dimensionality of the inputs for this model, and/or the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the inputs i.e. n_in).
        hiddens : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None], optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden).
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(outputs)
        self.params = params
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Beispiel #10
0
    def __init__(self, model, dataset,
                 n_epoch=1000, batch_size=100, minimum_batch_size=1,
                 save_frequency=10, early_stop_threshold=.9995, early_stop_length=30,
                 learning_rate=1e-3, lr_decay='exponential', lr_factor=1,
                 **kwargs):
        """
        Initialize the Optimizer.

        Parameters
        ----------
        model : Model
            The Model to train.
        dataset : Dataset
            The Dataset to use when training the Model.
        n_epoch : int
            how many training iterations over the dataset to go.
        batch_size : int
            How many examples from the training dataset to use in parallel.
        minimum_batch_size : int
            The minimum number of examples required at a time (for things like time series, this would be > 1).
        save_frequency : int
            How many epochs to train between each new save of the Model's parameters.
        early_stop_threshold : float
            The factor by how much the best validation training score needs to improve to determine early stopping.
        early_stop_length : int
            The patience or number of epochs to wait after the early_stop_threshold has been reached before stopping.
        learning_rate : float
            The multiplicative amount to adjust parameters based on their gradient values.
        lr_decay : str
            The type of decay function to use for changing the learning rate over epochs. See
            `opendeep.utils.decay` for options.
        lr_factor : float
            The amount to use for the decay function when changing the learning rate over epochs. See
            `opendeep.utils.decay` for its effect for given decay functions.
        """
        log.info("Initializing optimizer %s", str(type(self)))

        if early_stop_threshold is None:
            early_stop_threshold = 1.
        if save_frequency is None:
            save_frequency = 1000000
        if early_stop_length is None:
            early_stop_length = 100

        self.args = locals().copy()
        self.args.pop('self')
        kwargs = self.args.pop('kwargs')
        self.args = add_kwargs_to_dict(kwargs, self.args)
        # log the arguments
        log.info("optimizer config args: %s", str(self.args))

        assert isinstance(model, Model), "Optimizer input model needs to be an opendeep Model class!"
        assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be an opendeep Dataset class!"
        self.model = model
        self.dataset = dataset

        # Learning rate - how drastic of a step do the parameters change
        self.learning_rate = sharedX(learning_rate, 'learning_rate')
        self.lr_scalers = self.model.get_lr_scalers()
        if lr_decay:
            self.learning_rate_decay = get_decay_function(lr_decay,
                                                          self.learning_rate,
                                                          self.learning_rate.get_value(),
                                                          lr_factor)
        else:
            self.learning_rate_decay = False

        self.noise_switches = raise_to_list(self.model.get_noise_switch())
        self.batch_size = batch_size
        self.minimum_batch_size = minimum_batch_size
        self.n_epoch = n_epoch
        self.save_frequency = save_frequency
        self.early_stop_threshold = early_stop_threshold
        self.early_stop_length = early_stop_length