예제 #1
0
    def __init__(self,
                 shared_value,
                 final_value,
                 epochs_to_saturation):
        assert_is_instance(shared_value,
                           theano.tensor.sharedvar.SharedVariable)
        assert_is_subdtype(shared_value.dtype, numpy.floating)

        assert_equal(shared_value.ndim == 0, numpy.isscalar(final_value))

        if numpy.isscalar(final_value):
            assert_floating(final_value)
        else:
            assert_is_subdtype(final_value.dtype, numpy.floating)
            assert_equal(final_value.shape,
                         shared_value.get_value().shape)

        assert_integer(epochs_to_saturation)
        assert_greater(epochs_to_saturation, 0)

        self.shared_value = shared_value

        cast = numpy.cast[shared_value.dtype]
        self._final_value = cast(final_value)

        self._epochs_to_saturation = epochs_to_saturation

        self._num_epochs_seen = None
        self._initial_value = None
예제 #2
0
def limit_param_norms(parameter_updater, param, max_norm, input_axes):
    '''
    Modifies the update of an SgdParameterUpdater to limit param L2 norms.

    Parameter norms are computed by summing over the input_axes, provided.
    These are so named because you typically want to sum over the axes
    that get dotted with the input to the node (e.g. input_axes=[0] for Linear,
    input_axes=[1, 2, 3] for Conv2D).

    Parameters
    ----------

    parameter_updater: simplelearn.training.ParameterUpdater
      The parameter updater whose updates this will modify.

    param: theano shared variable

      The parameter being updated by parameter_updater.

      (No way to get this from SgdParameterUpdater at present; it updates the
      parameter and its velocity, and there's no way to safely distinguish them
      in parameter_updates.update_pairs)

    max_norm: floating-point scalar
      The maximum L2 norm to be permitted for the parameters.

    input_axes: Sequence
      A Sequence of ints. The indices to sum over when computing the
      L2 norm of the updated params.
    '''

    assert_is_instance(parameter_updater, ParameterUpdater)
    assert_in(param, parameter_updater.update_pairs)

    assert_floating(max_norm)
    assert_greater(max_norm, 0.0)

    assert_greater(len(input_axes), 0)
    assert_all_integer(input_axes)
    assert_all_greater_equal(input_axes, 0)
    assert_all_less(input_axes, param.ndim)

    input_axes = numpy.asarray(input_axes)
    updated_param = parameter_updater.update_pairs[param]

    norms = T.sqrt(T.sum(T.sqr(updated_param),
                         axis=input_axes,
                         keepdims=True))
    desired_norms = T.clip(norms, 0, max_norm)

    broadcast_mask = numpy.zeros(param.ndim, dtype=bool)
    broadcast_mask[input_axes] = True
    scales = T.patternbroadcast(desired_norms / (1e-7 + norms),
                                broadcast_mask)

    parameter_updater.update_pairs[param] = updated_param * scales
예제 #3
0
    def normal_distribution_init(rng, params, stddev):
        '''
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        '''

        assert_floating(stddev)
        assert_greater_equal(stddev, 0)

        values = params.get_value()
        values[...] = rng.standard_normal(values.shape) * stddev
        params.set_value(values)
예제 #4
0
    def uniform_init(rng, params, init_range):
        """
        Fills params with values uniformly sampled from
        [-init_range, init_range]
        """

        assert_floating(init_range)
        assert_greater_equal(init_range, 0)

        values = params.get_value()
        values[...] = rng.uniform(low=-init_range, high=init_range, size=values.shape)
        params.set_value(values)
예제 #5
0
    def __init__(self,
                 shared_value,
                 final_scale,
                 epochs_to_saturation):
        assert_is_instance(shared_value,
                           theano.tensor.sharedvar.SharedVariable)
        assert_floating(final_scale)
        assert_greater_equal(final_scale, 0.0)
        assert_less_equal(final_scale, 1.0)
        assert_integer(epochs_to_saturation)
        assert_greater(epochs_to_saturation, 0)

        self.shared_value = shared_value
        self._final_scale = final_scale
        self._epochs_to_saturation = epochs_to_saturation

        # initialized in on_start_training()
        self._initial_value = None
        self._num_epochs_seen = None