def __init__(self, shared_value, final_value, epochs_to_saturation): assert_is_instance(shared_value, theano.tensor.sharedvar.SharedVariable) assert_is_subdtype(shared_value.dtype, numpy.floating) assert_equal(shared_value.ndim == 0, numpy.isscalar(final_value)) if numpy.isscalar(final_value): assert_floating(final_value) else: assert_is_subdtype(final_value.dtype, numpy.floating) assert_equal(final_value.shape, shared_value.get_value().shape) assert_integer(epochs_to_saturation) assert_greater(epochs_to_saturation, 0) self.shared_value = shared_value cast = numpy.cast[shared_value.dtype] self._final_value = cast(final_value) self._epochs_to_saturation = epochs_to_saturation self._num_epochs_seen = None self._initial_value = None
def limit_param_norms(parameter_updater, param, max_norm, input_axes): ''' Modifies the update of an SgdParameterUpdater to limit param L2 norms. Parameter norms are computed by summing over the input_axes, provided. These are so named because you typically want to sum over the axes that get dotted with the input to the node (e.g. input_axes=[0] for Linear, input_axes=[1, 2, 3] for Conv2D). Parameters ---------- parameter_updater: simplelearn.training.ParameterUpdater The parameter updater whose updates this will modify. param: theano shared variable The parameter being updated by parameter_updater. (No way to get this from SgdParameterUpdater at present; it updates the parameter and its velocity, and there's no way to safely distinguish them in parameter_updates.update_pairs) max_norm: floating-point scalar The maximum L2 norm to be permitted for the parameters. input_axes: Sequence A Sequence of ints. The indices to sum over when computing the L2 norm of the updated params. ''' assert_is_instance(parameter_updater, ParameterUpdater) assert_in(param, parameter_updater.update_pairs) assert_floating(max_norm) assert_greater(max_norm, 0.0) assert_greater(len(input_axes), 0) assert_all_integer(input_axes) assert_all_greater_equal(input_axes, 0) assert_all_less(input_axes, param.ndim) input_axes = numpy.asarray(input_axes) updated_param = parameter_updater.update_pairs[param] norms = T.sqrt(T.sum(T.sqr(updated_param), axis=input_axes, keepdims=True)) desired_norms = T.clip(norms, 0, max_norm) broadcast_mask = numpy.zeros(param.ndim, dtype=bool) broadcast_mask[input_axes] = True scales = T.patternbroadcast(desired_norms / (1e-7 + norms), broadcast_mask) parameter_updater.update_pairs[param] = updated_param * scales
def normal_distribution_init(rng, params, stddev): ''' Fills params with values uniformly sampled from [-init_range, init_range] ''' assert_floating(stddev) assert_greater_equal(stddev, 0) values = params.get_value() values[...] = rng.standard_normal(values.shape) * stddev params.set_value(values)
def uniform_init(rng, params, init_range): """ Fills params with values uniformly sampled from [-init_range, init_range] """ assert_floating(init_range) assert_greater_equal(init_range, 0) values = params.get_value() values[...] = rng.uniform(low=-init_range, high=init_range, size=values.shape) params.set_value(values)
def __init__(self, shared_value, final_scale, epochs_to_saturation): assert_is_instance(shared_value, theano.tensor.sharedvar.SharedVariable) assert_floating(final_scale) assert_greater_equal(final_scale, 0.0) assert_less_equal(final_scale, 1.0) assert_integer(epochs_to_saturation) assert_greater(epochs_to_saturation, 0) self.shared_value = shared_value self._final_scale = final_scale self._epochs_to_saturation = epochs_to_saturation # initialized in on_start_training() self._initial_value = None self._num_epochs_seen = None