def __init__(self, inputs=None, outputs=None, function=None, **kwargs): """ Parameters ---------- inputs : List of [tuple(shape, `Theano.TensorType`)] List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer. `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be: [((None, 784), <TensorType(float32, matrix)>)]. outputs : List of [int or shape tuple] The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`. function : theano expression, optional If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container` object, such as a :class:`opendeep.models.Prototype`. """ self._classname = self.__class__.__name__ self.inputs = raise_to_list(inputs) self.output_size = raise_to_list(kwargs.get('output_size', outputs)) self.function = function self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['inputs'] = self.inputs if self.output_size is not None: self.args['output_size'] = self.output_size if self.function is not None: self.args['function'] = self.function # Don't know the position of switches! self.switches_on = None log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
def __init__(self, inputs=None, outputs=None, function=None, **kwargs): """ Parameters ---------- inputs : List of [tuple(shape, `Theano.TensorType`)] List of [tuple(shape, `Theano.TensorType`)] or None describing the inputs to use for this layer. `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be: [((None, 784), <TensorType(float32, matrix)>)]. outputs : List of [int or shape tuple] The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`. function : theano expression, optional If you want to pass a custom Theano expression to run on the inputs, you can do that here. This is mostly to enable a wrapper for processing data when adding layers to a :class:`opendeep.models.container` object, such as a :class:`opendeep.models.Prototype`. """ self._classname = self.__class__.__name__ self.inputs = raise_to_list(inputs) if self.inputs is not None: ins = [] # deal with Models or ModifyLayers being passed as an input. for input in self.inputs: if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'): sizes = raise_to_list(input.output_size) outs = raise_to_list(input.get_outputs()) if len(sizes) == 1 and len(sizes) < len(outs): sizes = sizes * len(outs) input = raise_to_list(zip(sizes, outs)) for i in input: ins.append(i) else: ins.append(input) # replace self.inputs self.inputs = ins self.output_size = raise_to_list(kwargs.get('output_size', outputs)) self.function = function self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['inputs'] = self.inputs if self.output_size is not None: self.args['output_size'] = self.output_size if self.function is not None: self.args['function'] = self.function # Don't know the position of switches! self.switches_on = None log.debug("Creating a new ModifyLayer: %s with args: %s" % (self._classname, str(self.args)))
def __init__(self, dataset, loss=None, model=None, epochs=1000, batch_size=100, min_batch_size=1, save_freq=10, stop_threshold=None, stop_patience=50, learning_rate=1e-3, lr_decay=None, lr_decay_factor=None, grad_clip=None, hard_clip=False, **kwargs): """ Initialize the Optimizer. Parameters ---------- dataset : Dataset The :class:`opendeep.data.Dataset` to use when training the Model. loss : Loss The :class:`opendeep.optimization.loss.Loss` function to compare the model to a 'target' result. model : Model The :class:`opendeep.models.Model` to train. Needed if the Optimizer isn't being passed to a Model's .train() method. epochs : int How many training iterations over the dataset to go. batch_size : int How many examples from the training dataset to use in parallel. min_batch_size : int The minimum number of examples required at a time (for things like time series, this would be > 1). save_freq : int, optional How many epochs to train between each new save of the Model's parameters. stop_threshold : float, optional The factor by how much the best validation training score needs to improve to determine early stopping. stop_patience : int, optional The patience or number of epochs to wait after the stop_threshold has been reached before stopping. learning_rate : float The multiplicative amount to adjust parameters based on their gradient values. lr_decay : str The decay function to use for changing the learning rate over epochs. See `opendeep.utils.decay` for classes of decay and documentation. lr_decay_factor : float The amount of decay to use for the ``lr_decay`` type of decay. grad_clip : float, optional Whether to clip gradients. This will clip the norm of the gradients either with a hard cutoff or rescaling. hard_clip : bool Whether to use a hard cutoff or rescaling for clipping gradients. """ log.info("Initializing optimizer %s", str(self.__class__.__name__)) # Deal with early stopping None initializations (no early stopping). if not stop_threshold: stop_threshold = numpy.inf if not save_freq: save_freq = 1000000 if not stop_patience: stop_patience = 1 # Put all init parameters in self.args so we can log the initial configuration. self.args = locals().copy() self.args.pop('self') kwargs = self.args.pop('kwargs') self.args = add_kwargs_to_dict(kwargs, self.args) # log the arguments log.info("Optimizer config args: %s", str(self.args)) # if the optimizer wasn't initialized with a Model (train() being called from the model class itself), # just return. (This seems kinda hacky but hey, people wanted .train() to happen from Model and there # wasn't really a better way unless the epoch looping logic was in that method for Model. That wasn't # the best option because other methods besides stochastic ones can exist for optimizers in the future. # TODO: fix this up - feels like a hack just to make model.train() work... if not model: return # Otherwise, things are proceeding as normal. Carry on... assert isinstance(model, Model), "Optimizer input model needs to be a Model class! " \ "Found %s" % str(model.__class__.__name__) assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be a Dataset class! " \ "Found %s" % str(dataset.__class__.__name__) # deal with loss expression/targets if loss is not None: assert isinstance(loss, Loss), "Optimizer input loss needs to be a Loss class! " \ "Found %s" % str(loss.__class__.__name__) if isinstance(loss, Loss): self.loss_targets = loss.get_targets() self.loss_expression = loss.get_loss() else: assert model.get_loss() is not None, "No Loss specified, and the model does not have one implemented." if isinstance(model.get_loss(), tuple): self.loss_targets = raise_to_list(model.get_loss()[0]) self.loss_expression = model.get_loss()[1] else: self.loss_targets = None self.loss_expression = model.get_loss() model_inputs = raise_to_list(model.get_inputs()) n_model_inputs = len(model_inputs) model_targets = self.loss_targets or [] for input in model_inputs: if input in model_targets: model_targets.remove(input) n_model_targets = len(model_targets) self.unsupervised = (n_model_targets is 0) # make sure the number of inputs/targets matches up with the dataset properties # train assert n_model_inputs == len(raise_to_list(dataset.train_inputs)), \ "Dataset has %d train inputs, while model expects %d" % \ (len(raise_to_list(dataset.train_inputs)), n_model_inputs) if not self.unsupervised: assert n_model_targets == len(raise_to_list(dataset.train_targets) or []), \ "Dataset has %d train targets, while model expects %d" % \ (len(raise_to_list(dataset.train_targets) or []), n_model_targets) # valid if dataset.valid_inputs is not None: assert n_model_inputs == len(raise_to_list(dataset.valid_inputs)), \ "Dataset has %d valid inputs, while model expects %d" % \ (len(raise_to_list(dataset.valid_inputs)), n_model_inputs) if not self.unsupervised: assert n_model_targets == len(raise_to_list(dataset.valid_targets) or []), \ "Dataset has %d valid targets, while model expects %d" % \ (len(raise_to_list(dataset.valid_targets) or []), n_model_targets) # test if dataset.test_inputs is not None: assert n_model_inputs == len(raise_to_list(dataset.test_inputs)), \ "Dataset has %d test inputs, while model expects %d" % \ (len(raise_to_list(dataset.test_inputs)), n_model_inputs) if not self.unsupervised: assert n_model_targets == len(raise_to_list(dataset.test_targets) or []), \ "Dataset has %d test targets, while model expects %d" % \ (len(raise_to_list(dataset.test_targets) or []), n_model_targets) # now we are happy, we can add them to `self` self.model = model self.dataset = dataset self.loss = loss # Learning rate - how drastic of a step do the parameters change self.learning_rate = sharedX(learning_rate, 'learning_rate') # whether to scale individual model parameters' learning rates. self.lr_scalers = self.model.get_lr_scalers() # whether to decay if lr_decay: self.learning_rate_decay = get_decay_function(lr_decay, self.learning_rate, learning_rate, lr_decay_factor) else: self.learning_rate_decay = False # rest of initial parameters needed for training. self.batch_size = batch_size self.min_batch_size = min_batch_size self.n_epoch = epochs self.save_frequency = save_freq self.early_stop_threshold = stop_threshold self.early_stop_length = stop_patience self.grad_clip = grad_clip self.hard_clip = hard_clip
def __init__(self, inputs=None, hiddens=None, outputs=None, params=None, outdir=None, **kwargs): """ Initialize a new Model. Your model implementations should accept optional inputs and hiddens Theano symbolic expressions or variables (if applicable) to set your inputs and hidden representation in a modular fashion, allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the default model hidden representation (which means you need to adapt creating your computation graph to not care about the inputs and to instead run outputs directly from the hidden variable provided). You can also accept a params to share model parameters rather than instantiate a new set of parameters. Parameters ---------- inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None The dimensionality of the inputs for this model, and the routing information for the model to accept inputs from elsewhere. This is used for linking different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a newly supervised classification model). `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be: [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces the tuple with zip(Model.output_size, Model.get_outputs()). hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional The dimensionality of the hidden representation for this model, and/or the routing information for the model to accept its hidden representation from elsewhere. This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape information is the same format as the monad for `inputs`. outputs : List of [int or shape tuple], optional The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`. params : Dict(string_name: theano SharedVariable), optional A dictionary of model parameters (shared theano variables) that you should use when constructing this model (instead of initializing your own shared variables). This parameter is useful when you want to have two versions of the model that use the same parameters - such as siamese networks or pretraining some weights. outdir : str, optional The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will be saved. kwargs : dict, optional This will be all the other left-over keyword parameters passed to the class as a dictionary of {param: value}. These get created into `self.args` along with outdir and outputs. """ self._classname = self.__class__.__name__ log.info("Creating a new instance of %s", self._classname) # Necessary inputs to a Model - these are the minimum requirements for modularity to work. self.inputs = raise_to_list(inputs) if self.inputs is not None: ins = [] # deal with Models or ModifyLayers being passed as an input. for input in self.inputs: if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'): sizes = raise_to_list(input.output_size) outs = raise_to_list(input.get_outputs()) if len(sizes) == 1 and len(sizes) < len(outs): sizes = sizes * len(outs) input = raise_to_list(zip(sizes, outs)) for i in input: ins.append(i) else: ins.append(input) # replace self.inputs self.inputs = ins self.hiddens = raise_to_list(hiddens) self.output_size = raise_to_list(kwargs.get('output_size', outputs)) self.params = params or {} self.outdir = outdir # make the directory to output configuration and parameters from the model if self.outdir: self.outdir = os.path.realpath(self.outdir) mkdir_p(self.outdir) # copy all of the parameters from the class into an args (configuration) dictionary self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['inputs'] = self.inputs self.args['hiddens'] = self.hiddens if self.output_size is not None: self.args['output_size'] = self.output_size self.args['params'] = self.params self.args['outdir'] = self.outdir # log the arguments. log.info("%s self.args: %s", self._classname, str(self.args)) # save the arguments. self.save_args() # Boom! Hyperparameters are now dealt with. Take that! # Don't know the position of switches! self.switches_on = None
def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, input_size=None, output_size=None, outdir=None, **kwargs): """ Initialize a new Model. Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable) to set your inputs and hidden representation in a modular fashion, allowing models to link together. inputs_hook is a tuple of (shape, variable) that should replace the default model inputs. hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation (which means you need to adapt creating your computation graph to not care about the inputs and to instead run outputs directly from the hidden variable provided). You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters. Parameters ---------- inputs_hook : Tuple of (shape, variable) Routing information for the model to accept inputs from elsewhere. This is used for linking different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a newly supervised classification model). For now, it needs to include the shape information (normally the dimensionality of the input i.e. n_in). hiddens_hook : Tuple of (shape, variable) Routing information for the model to accept its hidden representation from elsewhere. This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). params_hook : List(theano shared variable) A list of model parameters (shared theano variables) that you should use when constructing this model (instead of initializing your own shared variables). This parameter is useful when you want to have two versions of the model that use the same parameters - such as a training model with dropout applied to layers and one without for testing, where the parameters are shared between the two. input_size : int or shape tuple The dimensionality of the input for this model. This is required for stacking models automatically - where the input to one layer is the output of the previous layer. output_size : int or shape tuple The dimensionality of the output for this model. This is required for stacking models automatically - where the input to one layer is the output of the previous layer. Currently, we cannot run the size from Theano's graph, so it needs to be explicit. outdir : str The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will be saved. kwargs : dict This will be all the other left-over keyword parameters passed to the class as a dictionary of {param: value}. These get created into `self.args` along with outdir and output_size. """ log.info("Creating a new instance of %s", str(type(self))) # Necessary inputs to a Model - these are the minimum requirements for modularity to work. self.inputs_hook = inputs_hook self.hiddens_hook = hiddens_hook self.params_hook = params_hook self.input_size = input_size self.output_size = output_size self.outdir = outdir # make sure outdir ends in a directory separator if self.outdir and self.outdir[-1] != os.sep: self.outdir += os.sep # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists. if self.inputs_hook and self.inputs_hook[0] is not None: self.input_size = self.inputs_hook[0] # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error # or it could be during the automatic stacking in a Container. Since that is a common use case, set # the input_size to 1 to avoid errors when instantiating the model. if not self.input_size: # Could be error, or more commonly, when adding models to a Container log.warning("No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size" "=1 for the Container now...") self.input_size = 1 # Also, check if no output_size was given - this could be the case for generative models. Copy input_size # in that case. if not self.output_size: # Could be an error (hopefully not), so give the warning. log.warning("No output_size given! Make sure this is from a generative model (where output_size is the " "same as input_size. Setting output_size=input_size now...") self.output_size = self.input_size # copy all of the parameters from the class into an args (configuration) dictionary self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['input_size'] = self.input_size self.args['output_size'] = self.output_size # Now create the directory for outputs of the model # set up base path for the outputs of the model during training, etc. self.args['outdir'] = self.outdir if self.args['outdir']: mkdir_p(self.args['outdir']) # log the arguments. log.info("%s self.args: %s", str(type(self)), str(self.args)) # save the arguments. self.save_args()
def __init__(self, inputs=None, hiddens=None, outputs=None, params=None, outdir=None, **kwargs): """ Initialize a new Model. Your model implementations should accept optional inputs and hiddens Theano symbolic expressions or variables (if applicable) to set your inputs and hidden representation in a modular fashion, allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the default model hidden representation (which means you need to adapt creating your computation graph to not care about the inputs and to instead run outputs directly from the hidden variable provided). You can also accept a params to share model parameters rather than instantiate a new set of parameters. Parameters ---------- inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None The dimensionality of the inputs for this model, and the routing information for the model to accept inputs from elsewhere. This is used for linking different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a newly supervised classification model). `shape` will be a monad tuple representing known sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be: [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces the tuple with zip(Model.output_size, Model.get_outputs()). hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional The dimensionality of the hidden representation for this model, and/or the routing information for the model to accept its hidden representation from elsewhere. This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape information is the same format as the monad for `inputs`. outputs : List of [int or shape tuple], optional The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`. params : Dict(string_name: theano SharedVariable), optional A dictionary of model parameters (shared theano variables) that you should use when constructing this model (instead of initializing your own shared variables). This parameter is useful when you want to have two versions of the model that use the same parameters - such as siamese networks or pretraining some weights. outdir : str, optional The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will be saved. kwargs : dict, optional This will be all the other left-over keyword parameters passed to the class as a dictionary of {param: value}. These get created into `self.args` along with outdir and outputs. """ self._classname = self.__class__.__name__ log.info("Creating a new instance of %s", self._classname) # Necessary inputs to a Model - these are the minimum requirements for modularity to work. self.inputs = raise_to_list(inputs) if self.inputs is not None: ins = [] # deal with Models or ModifyLayers being passed as an input. for input in self.inputs: if hasattr(input, 'output_size') and hasattr( input, 'get_outputs'): sizes = raise_to_list(input.output_size) outs = raise_to_list(input.get_outputs()) if len(sizes) == 1 and len(sizes) < len(outs): sizes = sizes * len(outs) input = raise_to_list(zip(sizes, outs)) for i in input: ins.append(i) else: ins.append(input) # replace self.inputs self.inputs = ins self.hiddens = raise_to_list(hiddens) self.output_size = raise_to_list(kwargs.get('output_size', outputs)) self.params = params or {} self.outdir = outdir # make the directory to output configuration and parameters from the model if self.outdir: self.outdir = os.path.realpath(self.outdir) mkdir_p(self.outdir) # copy all of the parameters from the class into an args (configuration) dictionary self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['inputs'] = self.inputs self.args['hiddens'] = self.hiddens if self.output_size is not None: self.args['output_size'] = self.output_size self.args['params'] = self.params self.args['outdir'] = self.outdir # log the arguments. log.info("%s self.args: %s", self._classname, str(self.args)) # save the arguments. self.save_args() # Boom! Hyperparameters are now dealt with. Take that! # Don't know the position of switches! self.switches_on = None
def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, input_size=None, output_size=None, outdir=None, **kwargs): """ Initialize a new Model. Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable) to set your inputs and hidden representation in a modular fashion, allowing models to link together. inputs_hook is a tuple of (shape, variable) that should replace the default model inputs. hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation (which means you need to adapt creating your computation graph to not care about the inputs and to instead run outputs directly from the hidden variable provided). You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters. Parameters ---------- inputs_hook : Tuple of (shape, variable) Routing information for the model to accept inputs from elsewhere. This is used for linking different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a newly supervised classification model). For now, it needs to include the shape information (normally the dimensionality of the input i.e. n_in). hiddens_hook : Tuple of (shape, variable) Routing information for the model to accept its hidden representation from elsewhere. This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). params_hook : List(theano shared variable) A list of model parameters (shared theano variables) that you should use when constructing this model (instead of initializing your own shared variables). This parameter is useful when you want to have two versions of the model that use the same parameters - such as a training model with dropout applied to layers and one without for testing, where the parameters are shared between the two. input_size : int or shape tuple The dimensionality of the input for this model. This is required for stacking models automatically - where the input to one layer is the output of the previous layer. output_size : int or shape tuple The dimensionality of the output for this model. This is required for stacking models automatically - where the input to one layer is the output of the previous layer. Currently, we cannot run the size from Theano's graph, so it needs to be explicit. outdir : str The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will be saved. kwargs : dict This will be all the other left-over keyword parameters passed to the class as a dictionary of {param: value}. These get created into `self.args` along with outdir and output_size. """ log.info("Creating a new instance of %s", str(type(self))) # Necessary inputs to a Model - these are the minimum requirements for modularity to work. self.inputs_hook = inputs_hook self.hiddens_hook = hiddens_hook self.params_hook = params_hook self.input_size = input_size self.output_size = output_size self.outdir = outdir # make sure outdir ends in a directory separator if self.outdir and self.outdir[-1] != os.sep: self.outdir += os.sep # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists. if self.inputs_hook and self.inputs_hook[0] is not None: self.input_size = self.inputs_hook[0] # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error # or it could be during the automatic stacking in a Container. Since that is a common use case, set # the input_size to 1 to avoid errors when instantiating the model. if not self.input_size: # Could be error, or more commonly, when adding models to a Container log.warning( "No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size" "=1 for the Container now...") self.input_size = 1 # Also, check if no output_size was given - this could be the case for generative models. Copy input_size # in that case. if not self.output_size: # Could be an error (hopefully not), so give the warning. log.warning( "No output_size given! Make sure this is from a generative model (where output_size is the" "same as input_size. Setting output_size=input_size now...") self.output_size = self.input_size # copy all of the parameters from the class into an args (configuration) dictionary self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['output_size'] = self.output_size # Now create the directory for outputs of the model # set up base path for the outputs of the model during training, etc. self.args['outdir'] = self.outdir if self.args['outdir']: mkdir_p(self.args['outdir']) # log the arguments. log.info("%s self.args: %s", str(type(self)), str(self.args)) # save the arguments. self.save_args()
def __init__(self, inputs=None, hiddens=None, outputs=None, params=None, outdir=None, **kwargs): """ Initialize a new Model. Your model implementations should accept optional inputs and hiddens SharedVariables (if applicable) to set your inputs and hidden representation in a modular fashion, allowing models to link together. inputs can have a tuple of (shape, variable) that should replace the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the default model hidden representation (which means you need to adapt creating your computation graph to not care about the inputs and to instead run outputs directly from the hidden variable provided). You can also accept a params to share model parameters rather than instantiate a new set of parameters. Parameters ---------- inputs : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None] The dimensionality of the inputs for this model, and/or the routing information for the model to accept inputs from elsewhere. This is used for linking different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a newly supervised classification model). For now, variable hook tuples need to include the shape information (normally the dimensionality of the inputs i.e. n_in). hiddens : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None], optional The dimensionality of the hidden representation for this model, and/or the routing information for the model to accept its hidden representation from elsewhere. This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). outputs : List of [int or shape tuple], optional The dimensionality of the output(s) for this model. This is required for stacking models automatically - where the input to one layer is the output of the previous layer. Currently, we cannot run the size from Theano's graph, so it needs to be explicit. params : Dict(string_name: theano SharedVariable), optional A dictionary of model parameters (shared theano variables) that you should use when constructing this model (instead of initializing your own shared variables). This parameter is useful when you want to have two versions of the model that use the same parameters - such as siamese networks or pretraining some weights. outdir : str, optional The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will be saved. kwargs : dict, optional This will be all the other left-over keyword parameters passed to the class as a dictionary of {param: value}. These get created into `self.args` along with outdir and outputs. """ self._classname = self.__class__.__name__ log.info("Creating a new instance of %s", self._classname) # Necessary inputs to a Model - these are the minimum requirements for modularity to work. self.inputs = raise_to_list(inputs) self.hiddens = raise_to_list(hiddens) self.output_size = raise_to_list(outputs) self.params = params self.outdir = outdir # make the directory to output configuration and parameters from the model if self.outdir: self.outdir = os.path.realpath(self.outdir) mkdir_p(self.outdir) # copy all of the parameters from the class into an args (configuration) dictionary self.args = {} self.args = add_kwargs_to_dict(kwargs.copy(), self.args) self.args['inputs'] = self.inputs self.args['hiddens'] = self.hiddens self.args['output_size'] = self.output_size self.args['params'] = self.params self.args['outdir'] = self.outdir # log the arguments. log.info("%s self.args: %s", self._classname, str(self.args)) # save the arguments. self.save_args() # Boom! Hyperparameters are now dealt with. Take that! # Don't know the position of switches! self.switches_on = None
def __init__(self, model, dataset, n_epoch=1000, batch_size=100, minimum_batch_size=1, save_frequency=10, early_stop_threshold=.9995, early_stop_length=30, learning_rate=1e-3, lr_decay='exponential', lr_factor=1, **kwargs): """ Initialize the Optimizer. Parameters ---------- model : Model The Model to train. dataset : Dataset The Dataset to use when training the Model. n_epoch : int how many training iterations over the dataset to go. batch_size : int How many examples from the training dataset to use in parallel. minimum_batch_size : int The minimum number of examples required at a time (for things like time series, this would be > 1). save_frequency : int How many epochs to train between each new save of the Model's parameters. early_stop_threshold : float The factor by how much the best validation training score needs to improve to determine early stopping. early_stop_length : int The patience or number of epochs to wait after the early_stop_threshold has been reached before stopping. learning_rate : float The multiplicative amount to adjust parameters based on their gradient values. lr_decay : str The type of decay function to use for changing the learning rate over epochs. See `opendeep.utils.decay` for options. lr_factor : float The amount to use for the decay function when changing the learning rate over epochs. See `opendeep.utils.decay` for its effect for given decay functions. """ log.info("Initializing optimizer %s", str(type(self))) if early_stop_threshold is None: early_stop_threshold = 1. if save_frequency is None: save_frequency = 1000000 if early_stop_length is None: early_stop_length = 100 self.args = locals().copy() self.args.pop('self') kwargs = self.args.pop('kwargs') self.args = add_kwargs_to_dict(kwargs, self.args) # log the arguments log.info("optimizer config args: %s", str(self.args)) assert isinstance(model, Model), "Optimizer input model needs to be an opendeep Model class!" assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be an opendeep Dataset class!" self.model = model self.dataset = dataset # Learning rate - how drastic of a step do the parameters change self.learning_rate = sharedX(learning_rate, 'learning_rate') self.lr_scalers = self.model.get_lr_scalers() if lr_decay: self.learning_rate_decay = get_decay_function(lr_decay, self.learning_rate, self.learning_rate.get_value(), lr_factor) else: self.learning_rate_decay = False self.noise_switches = raise_to_list(self.model.get_noise_switch()) self.batch_size = batch_size self.minimum_batch_size = minimum_batch_size self.n_epoch = n_epoch self.save_frequency = save_frequency self.early_stop_threshold = early_stop_threshold self.early_stop_length = early_stop_length