def __init__(self, optimization_options, network, *args, **kwargs): """Creates an RMSProp SGD optimizer. :type optimization_options: dict :param optimization_options: a dictionary of optimization options :type network: Network :param network: the neural network object """ self._params = Parameters() for path, param in network.get_variables().items(): self._params.add(path + '_gradient', numpy.zeros_like(param.get_value())) # Initialize mean squared gradient to ones, otherwise the first # update will be divided by close to zero. self._params.add(path + '_mean_sqr_gradient', numpy.ones_like(param.get_value())) # geometric rate for averaging gradients if 'gradient_decay_rate' not in optimization_options: raise ValueError("Gradient decay rate is not given in training " "options.") self._gamma = optimization_options['gradient_decay_rate'] super().__init__(optimization_options, network, *args, **kwargs)
def __init__(self, optimization_options, network, *args, **kwargs): """Creates an Adadelta optimizer. :type optimization_options: dict :param optimization_options: a dictionary of optimization options :type network: Network :param network: the neural network object """ self._params = Parameters() for path, param in network.get_variables().items(): self._params.add(path + '_gradient', numpy.zeros_like(param.get_value())) self._params.add(path + '_mean_sqr_gradient', numpy.zeros_like(param.get_value())) self._params.add(path + '_mean_sqr_velocity', numpy.zeros_like(param.get_value())) # geometric rate for averaging gradients if 'gradient_decay_rate' not in optimization_options: raise ValueError("Gradient decay rate is not given in optimization " "options.") self._gamma = optimization_options['gradient_decay_rate'] super().__init__(optimization_options, network, *args, **kwargs)
def __init__(self, optimization_options, network, *args, **kwargs): """Creates a Stochastic Gradient Descent optimizer. :type optimization_options: dict :param optimization_options: a dictionary of optimization options :type network: Network :param network: the neural network object """ self._params = Parameters() for path, param in network.get_variables().items(): self._params.add(path + '_gradient', numpy.zeros_like(param.get_value())) super().__init__(optimization_options, network, *args, **kwargs)
def __init__(self, layer_options, network, profile=False): """Saves some attributes that are common to all layers. :type layer_options: dict :param layer_options: dictionary of layer options :type network: Network :param network: the network object creating this layer :type profile: bool :param profile: if set to True, creates a Theano profile object """ self.name = layer_options['name'] self._input_layers = layer_options['input_layers'] self._params = Parameters() self._devices = layer_options['devices'] if 'size' in layer_options: self.output_size = int(layer_options['size']) else: self.output_size = \ sum([x.output_size for x in self._input_layers]) # Convolutional layers may produce two-dimensional output. In that case, # the state matrix is four-dimensional and the size of the last # dimension is self.output_depth. if 'depth' in layer_options: self.output_depth = int(layer_options['depth']) else: self.output_depth = 1 if 'reverse_time' in layer_options: self._reverse_time = bool(layer_options['reverse_time']) else: self._reverse_time = False logging.debug( "- %s name=%s inputs=[%s] size=%d depth=%d%s devices=[%s]", self.__class__.__name__, self.name, ', '.join([x.name for x in self._input_layers]), self.output_size, self.output_depth, ' reverse,' if self._reverse_time else '', ', '.join([str(x) for x in self._devices])) self._network = network self._profile = profile
def __init__(self, optimization_options, network, *args, **kwargs): """Creates an Adam optimizer. :type optimization_options: dict :param optimization_options: a dictionary of optimization options :type network: Network :param network: the neural network object """ self._params = Parameters() float_type = numpy.dtype(theano.config.floatX).type self._params.add('optimizer/timestep', float_type(0.0)) for path, param in network.get_variables().items(): self._params.add(path + '_gradient', numpy.zeros_like(param.get_value())) self._params.add(path + '_mean_gradient', numpy.zeros_like(param.get_value())) self._params.add(path + '_mean_sqr_gradient', numpy.zeros_like(param.get_value())) # geometric rate for averaging gradients if 'gradient_decay_rate' not in optimization_options: raise ValueError("Gradient decay rate is not given in training " "options.") self._gamma_m = optimization_options['gradient_decay_rate'] # geometric rate for averaging squared gradients if 'sqr_gradient_decay_rate' not in optimization_options: raise ValueError("Squared gradient decay rate is not given in " "optimization options.") self._gamma_ms = optimization_options['sqr_gradient_decay_rate'] # momentum if 'momentum' not in optimization_options: raise ValueError("Momentum is not given in optimization options.") self._momentum = optimization_options['momentum'] super().__init__(optimization_options, network, *args, **kwargs)
def __init__(self, optimization_options, network, *args, **kwargs): """Creates a Nesterov momentum optimizer. :type optimization_options: dict :param optimization_options: a dictionary of optimization options :type network: Network :param network: the neural network object """ self._params = Parameters() for path, param in network.get_variables().items(): self._params.add(path + '_gradient', numpy.zeros_like(param.get_value())) self._params.add(path + '_velocity', numpy.zeros_like(param.get_value())) # momentum if 'momentum' not in optimization_options: raise ValueError("Momentum is not given in optimization options.") self._momentum = optimization_options['momentum'] super().__init__(optimization_options, network, *args, **kwargs)