def test_evaluate_initializer_with_fallback_calls_fallback(): init = mock.create_autospec(Initializer()) fallback = mock.create_autospec(Initializer()) fallback.side_effect = lambda x: np.array(1) init.side_effect = InitializationError evaluate_initializer(init, (7, 5), fallback) fallback.assert_called_once_with((7, 5))
def test_evaluate_initializer_with_fallback_calls_fallback(): init = mock.create_autospec(Initializer()) fallback = mock.create_autospec(Initializer()) fallback.side_effect = lambda x: np.array(1) init.side_effect = InitializationError evaluate_initializer(init, (7, 5), fallback) fallback.assert_called_once_with((7, 5))
def test_evaluate_initializer_without_fallback_propagates_error(): init = mock.create_autospec(Initializer()) init.side_effect = InitializationError with pytest.raises(InitializationError): evaluate_initializer(init, (7, 5))
def test_evaluate_initializer_calls_initializer(): init = mock.create_autospec(Initializer()) init.side_effect = lambda x: np.array(1) evaluate_initializer(init, (7, 5)) init.assert_called_once_with((7, 5))
def test_evaluate_initializer_with_number(): assert np.all(evaluate_initializer(1.4, (7, 5)) == 1.4)
def initialize(self, default_or_init_dict=None, seed=None, **kwargs): """Initialize the weights of the network. Initialization can be specified in three equivalent ways: 1. just a default initializer: >>> net.initialize(Gaussian()) Note that this is equivalent to: >>> net.initialize(default=Gaussian()) 2. by passing a dictionary: >>> net.initialize({'RegularLayer': Uniform(), ... 'LstmLayer': Gaussian()}) 3. by using keyword arguments: >>> net.initialize(RegularLayer=Uniform(), ... LstmLayer=Uniform()) All following explanations will be with regards to the dictionary style of initialization, because it is the most general one. Note: It is not recommended to combine 2. and 3. but if they are, then keyword arguments take precedence. Each initialization consists of a layer-pattern and that maps to an initializer or a weight-pattern dictionary. Layer patterns can take the following forms: 1. ``{'layer_name': INIT_OR_SUBDICT}`` Matches all the weights of the layer named layer_name 2. ``{'layer_*': INIT_OR_SUBDICT}`` Matches all layers with a name that starts with ``layer_`` The wild-card ``*`` can appear at arbitrary positions and even multiple times in one path. There are two special layer patterns: 3. ``{'default': INIT}`` Matches all weights that are not matched by any other path-pattern 4. ``{'fallback': INIT}`` Set a fallback initializer for every weight. It will only be evaluated for the weights for which the regular initializer failed with an InitializationError. `This is useful for initializers that require a certain shape of weights and will not work otherwise. The fallback will then be used for all cases when that initializer failed.` The weight-pattern sub-dictionary follows the same form as the layer- pattern: 1) ``{'layer_pattern': {'a': INIT_A, 'b': INIT_B}}`` 2) ``{'layer_pattern': {'a*': INIT}`` 3) ``{'layer_pattern': {'default': INIT}`` 4) ``{'layer_pattern': {'fallback': INIT}`` An initializer can either be a scalar, something that converts to a numpy array of the correct shape or an :class:`Initializer` object. So for example: >>> net.initialize(default=0, ... RnnLayer={'b': [1, 2, 3, 4, 5]}, ... ForwardLayer=bs.Gaussian()) Note: Each view must match exactly one initialization and up to one fallback to be unambiguous. Otherwise the initialization will fail. You can specify a seed to make the initialization reproducible: >>> net.initialize({'default': bs.Gaussian()}, seed=1234) """ init_refs = _update_references_with_dict(default_or_init_dict, kwargs) self.initializers = get_description(init_refs) all_parameters = { k: v.parameters for k, v in self.buffer.items() if isinstance(v, BufferView) and 'parameters' in v } _replace_lists_with_array_initializers(init_refs) initializers, fallback = resolve_references(all_parameters, init_refs) init_rnd = self.rnd.create_random_state(seed) for layer_name, views in sorted(all_parameters.items()): if views is None: continue for view_name, view in sorted(views.items()): init = initializers[layer_name][view_name] fb = fallback[layer_name][view_name] if len(init) > 1: raise NetworkValidationError( "Multiple initializers for {}.{}: {}".format( layer_name, view_name, init)) if len(init) == 0: raise NetworkValidationError( "No initializer for {}.{}".format( layer_name, view_name)) if len(fb) > 1: raise NetworkValidationError( "Multiple fallbacks for {}.{}: {}".format( layer_name, view_name, fb)) fb = fb.pop() if len(fb) else None self.handler.set_from_numpy( view, evaluate_initializer(init.pop(), view.shape, fb, seed=init_rnd.generate_seed()))
def test_evaluate_initializer_calls_initializer(): init = mock.create_autospec(Initializer()) init.side_effect = lambda x: np.array(1) evaluate_initializer(init, (7, 5)) init.assert_called_once_with((7, 5))
def test_evaluate_initializer_with_number(): assert np.all(evaluate_initializer(1.4, (7, 5)) == 1.4)
def test_evaluate_initializer_without_fallback_propagates_error(): init = mock.create_autospec(Initializer()) init.side_effect = InitializationError with pytest.raises(InitializationError): evaluate_initializer(init, (7, 5))
def initialize(self, default_or_init_dict=None, seed=None, **kwargs): """Initialize the weights of the network. Initialization can be specified in three equivalent ways: 1. just a default initializer: >>> net.initialize(Gaussian()) Note that this is equivalent to: >>> net.initialize(default=Gaussian()) 2. by passing a dictionary: >>> net.initialize({'RegularLayer': Uniform(), ... 'LstmLayer': Gaussian()}) 3. by using keyword arguments: >>> net.initialize(RegularLayer=Uniform(), ... LstmLayer=Uniform()) All following explanations will be with regards to the dictionary style of initialization, because it is the most general one. Note: It is not recommended to combine 2. and 3. but if they are, then keyword arguments take precedence. Each initialization consists of a layer-pattern and that maps to an initializer or a weight-pattern dictionary. Layer patterns can take the following forms: 1. ``{'layer_name': INIT_OR_SUBDICT}`` Matches all the weights of the layer named layer_name 2. ``{'layer_*': INIT_OR_SUBDICT}`` Matches all layers with a name that starts with ``layer_`` The wild-card ``*`` can appear at arbitrary positions and even multiple times in one path. There are two special layer patterns: 3. ``{'default': INIT}`` Matches all weights that are not matched by any other path-pattern 4. ``{'fallback': INIT}`` Set a fallback initializer for every weight. It will only be evaluated for the weights for which the regular initializer failed with an InitializationError. `This is useful for initializers that require a certain shape of weights and will not work otherwise. The fallback will then be used for all cases when that initializer failed.` The weight-pattern sub-dictionary follows the same form as the layer- pattern: 1) ``{'layer_pattern': {'a': INIT_A, 'b': INIT_B}}`` 2) ``{'layer_pattern': {'a*': INIT}`` 3) ``{'layer_pattern': {'default': INIT}`` 4) ``{'layer_pattern': {'fallback': INIT}`` An initializer can either be a scalar, something that converts to a numpy array of the correct shape or an :class:`Initializer` object. So for example: >>> net.initialize(default=0, ... RnnLayer={'b': [1, 2, 3, 4, 5]}, ... ForwardLayer=bs.Gaussian()) Note: Each view must match exactly one initialization and up to one fallback to be unambiguous. Otherwise the initialization will fail. You can specify a seed to make the initialization reproducible: >>> net.initialize({'default': bs.Gaussian()}, seed=1234) """ init_refs = _update_references_with_dict(default_or_init_dict, kwargs) self.initializers = get_description(init_refs) all_parameters = {k: v.parameters for k, v in self.buffer.items() if isinstance(v, BufferView) and 'parameters' in v} _replace_lists_with_array_initializers(init_refs) initializers, fallback = resolve_references(all_parameters, init_refs) init_rnd = self.rnd.create_random_state(seed) for layer_name, views in sorted(all_parameters.items()): if views is None: continue for view_name, view in sorted(views.items()): init = initializers[layer_name][view_name] fb = fallback[layer_name][view_name] if len(init) > 1: raise NetworkValidationError( "Multiple initializers for {}.{}: {}".format( layer_name, view_name, init)) if len(init) == 0: raise NetworkValidationError("No initializer for {}.{}". format(layer_name, view_name)) if len(fb) > 1: raise NetworkValidationError( "Multiple fallbacks for {}.{}: {}".format( layer_name, view_name, fb)) fb = fb.pop() if len(fb) else None self.handler.set_from_numpy( view, evaluate_initializer(init.pop(), view.shape, fb, seed=init_rnd.generate_seed()))