def test_gated_average_layer_exceptions(self): networks = layers.parallel( layers.Input((10, 3, 3)), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), ) error_message = "should be 2-dimensional" with self.assertRaisesRegexp(LayerConnectionError, error_message): layers.join(networks, layers.GatedAverage()) networks = layers.parallel( layers.Input(10) >> layers.Softmax(3), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), ) error_message = "only 3 networks, got 2 networks" with self.assertRaisesRegexp(LayerConnectionError, error_message): layers.join(networks, layers.GatedAverage()) networks = layers.parallel( layers.Input(10) >> layers.Softmax(2), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(10), ) error_message = "expect to have the same shapes" with self.assertRaisesRegexp(LayerConnectionError, error_message): layers.join(networks, layers.GatedAverage())
def test_gated_average_layer_exceptions(self): gated_avg_layer = layers.GatedAverage() with self.assertRaisesRegexp(LayerConnectionError, "should be vector"): layers.join( [ layers.Input((10, 3, 3)), # shape not 1d layers.Input(20) > layers.Relu(8), layers.Input(20) > layers.Relu(8), ], gated_avg_layer) gated_avg_layer = layers.GatedAverage() error_message = "only 3 networks, got 2 networks" with self.assertRaisesRegexp(LayerConnectionError, error_message): layers.join([ layers.Input(10) > layers.Softmax(3), layers.Input(20) > layers.Relu(8), layers.Input(20) > layers.Relu(8), ], gated_avg_layer) gated_avg_layer = layers.GatedAverage() error_message = "expect to have the same shapes" with self.assertRaisesRegexp(LayerConnectionError, error_message): layers.join([ layers.Input(10) > layers.Softmax(2), layers.Input(20) > layers.Relu(8), layers.Input(20) > layers.Relu(10), ], gated_avg_layer)
def test_gated_average_layer_exceptions_index_position(self): networks = layers.parallel( layers.Input(10) >> layers.Softmax(2), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), ) with self.assertRaisesRegexp(LayerConnectionError, "Invalid index"): layers.join(networks, layers.GatedAverage(gate_index=3)) with self.assertRaisesRegexp(LayerConnectionError, "Invalid index"): layers.join(networks, layers.GatedAverage(gate_index=-4))
def test_gated_average_layer_negative_index(self): network = layers.join( layers.parallel( layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), layers.Input(10) >> layers.Softmax(2), ), layers.GatedAverage(gate_index=-1, name='gate')) self.assertShapesEqual(network.output_shape, (None, 8)) network = layers.join( layers.parallel( layers.Input(10) >> layers.Softmax(2), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), ), layers.GatedAverage(gate_index=-3, name='gate')) self.assertShapesEqual(network.output_shape, (None, 8))
def test_gated_average_layer_exceptions_index_position(self): gated_avg_layer = layers.GatedAverage(gating_layer_index=3) with self.assertRaisesRegexp(LayerConnectionError, "Invalid index"): layers.join([ layers.Input(20) > layers.Relu(8), layers.Input(10) > layers.Softmax(2), layers.Input(20) > layers.Relu(8), ], gated_avg_layer) gated_avg_layer = layers.GatedAverage(gating_layer_index=-4) with self.assertRaisesRegexp(LayerConnectionError, "Invalid index"): layers.join([ layers.Input(10) > layers.Softmax(2), layers.Input(20) > layers.Relu(8), layers.Input(20) > layers.Relu(8), ], gated_avg_layer)
def test_gated_average_layer_output_shape(self): network = layers.join( layers.parallel( layers.Input(10) >> layers.Softmax(2), layers.Input(20) >> layers.Relu(8), layers.Input(20) >> layers.Relu(8), ), layers.GatedAverage()) self.assertShapesEqual(network.output_shape, (None, 8))
def test_gated_average_layer_non_default_index(self): gated_avg_layer = layers.GatedAverage(gating_layer_index=1) layers.join([ layers.Input(20) > layers.Relu(8), layers.Input(10) > layers.Softmax(2), layers.Input(20) > layers.Relu(8), ], gated_avg_layer) self.assertEqual(gated_avg_layer.output_shape, (8, )) self.assertEqual(gated_avg_layer.input_shape, [(8, ), (2, ), (8, )])
def test_gated_average_layer_output_shape(self): gated_avg_layer = layers.GatedAverage() self.assertIsNone(gated_avg_layer.output_shape) layers.join([ layers.Input(10) > layers.Softmax(2), layers.Input(20) > layers.Relu(8), layers.Input(20) > layers.Relu(8), ], gated_avg_layer) self.assertEqual(gated_avg_layer.output_shape, (8, )) self.assertEqual(gated_avg_layer.input_shape, [(2, ), (8, ), (8, )])
def test_gated_average_layer_output(self): input_layer = layers.Input(10) network = layers.join([ input_layer > layers.Softmax(2), input_layer > layers.Relu(8), input_layer > layers.Relu(8), ], layers.GatedAverage()) random_input = asfloat(np.random.random((20, 10))) actual_output = self.eval(network.output(random_input)) self.assertEqual(actual_output.shape, (20, 8))
def test_gated_average_layer_output(self): network = layers.join( layers.Input(10), layers.parallel( layers.Softmax(2), layers.Relu(8), layers.Relu(8), ), layers.GatedAverage(), ) random_input = asfloat(np.random.random((20, 10))) actual_output = self.eval(network.output(random_input)) self.assertShapesEqual(actual_output.shape, (20, 8))
def test_gated_average_layer_multi_dimensional_inputs(self): input_layer = layers.Input((5, 5, 1)) network = layers.join([ input_layer > layers.Reshape() > layers.Softmax(2), input_layer > layers.Convolution((2, 2, 3)), input_layer > layers.Convolution((2, 2, 3)), ], layers.GatedAverage()) self.assertEqual(network.input_shape, (5, 5, 1)) self.assertEqual(network.output_shape, (4, 4, 3)) random_input = asfloat(np.random.random((8, 5, 5, 1))) actual_output = self.eval(network.output(random_input)) self.assertEqual(actual_output.shape, (8, 4, 4, 3))
def test_gated_average_layer_multi_dimensional_inputs(self): input_layer = layers.Input((1, 5, 5)) network = layers.join([ input_layer > layers.Reshape() > layers.Softmax(2), input_layer > layers.Convolution((3, 2, 2)), input_layer > layers.Convolution((3, 2, 2)), ], layers.GatedAverage()) self.assertEqual(network.input_shape, (1, 5, 5)) self.assertEqual(network.output_shape, (3, 4, 4)) predict = network.compile() random_input = asfloat(np.random.random((8, 1, 5, 5))) actual_output = predict(random_input) self.assertEqual(actual_output.shape, (8, 3, 4, 4))
def test_gated_average_layer_multi_dimensional_inputs(self): network = layers.join( layers.Input((5, 5, 1)), layers.parallel( layers.Reshape() >> layers.Softmax(2), layers.Convolution((2, 2, 3)), layers.Convolution((2, 2, 3)), ), layers.GatedAverage(), ) self.assertShapesEqual(network.input_shape, (None, 5, 5, 1)) self.assertShapesEqual(network.output_shape, (None, 4, 4, 3)) random_input = asfloat(np.random.random((8, 5, 5, 1))) actual_output = self.eval(network.output(random_input)) self.assertEqual(actual_output.shape, (8, 4, 4, 3))
def mixture_of_experts(networks, gating_layer=None): """ Generates mixture of experts architecture from the set of networks that has the same input and output shapes. Mixture of experts learns to how to mix results from different networks in order to get better performances. It adds gating layer that using input data tries to figure out which of the networks will make better contribution to the final result. The final result mixes from all networks using different weights. The higher the weight the larger contribution from the individual layer. Parameters ---------- networks : list of connections or networks These networks will be combine into mixture of experts. Every network should have single 1D input layer and single output layer. Another restriction is that all networks should expect the same input and output layers. gating_layer : None or layer In case if value equal to `None` that the following layer will be created. .. code-block:: python gating_layer = layers.Softmax(len(networks)) Output from the gating layer should be 1D and equal to the number of networks. Raises ------ ValueError In case if there is some problem with input networks or custom gating layer. Returns ------- connection Mixture of experts network that combine all networks into single one and adds gating layer to it. Examples -------- >>> from neupy import layers, algorithms, architectures >>> >>> network = architectures.mixture_of_experts([ ... layers.join( ... layers.Input(10), ... layers.Relu(5), ... ), ... layers.join( ... layers.Input(10), ... layers.Relu(33), ... layers.Relu(5), ... ), ... layers.join( ... layers.Input(10), ... layers.Relu(12), ... layers.Relu(25), ... layers.Relu(5), ... ), ... ]) >>> network 10 -> [... 12 layers ...] -> 5 >>> >>> gdnet = algorithms.Momentum(network, step=0.1) """ if not isinstance(networks, (list, tuple)): raise ValueError("Networks should be specified as a list") connections = [] for index, network in enumerate(networks): connection = extract_connection(network) check_if_connection_is_valid(connection, index) connections.append(connection) check_if_connections_compatible(connections) first_connection = connections[0] n_features = first_connection.input_shape[0] n_layers_to_combine = len(connections) if gating_layer is None: gating_layer = layers.Softmax(n_layers_to_combine) check_if_gating_layer_valid(gating_layer, n_layers_to_combine) return layers.join( layers.Input(n_features), # Note: Gating network should be specified # as a first parameter. [gating_layer] + connections, layers.GatedAverage(), )
def mixture_of_experts(networks, gating_layer=None): """ Generates mixture of experts architecture from the set of networks that has the same input and output shapes. Mixture of experts learns to how to mix results from different networks in order to get better performances. It adds gating layer that using input data tries to figure out which of the networks will make better contribution to the final result. The final result mixes from all networks using different weights. The higher the weight the larger contribution from the individual layer. Parameters ---------- networks : list of networks/layers gating_layer : None or layer In case if value equal to `None` that the following layer will be created. .. code-block:: python gating_layer = layers.Softmax(len(networks)) Output from the gating layer should be 1D and equal to the number of networks. Raises ------ ValueError In case if there is some problem with input networks or custom gating layer. Returns ------- network Mixture of experts network that combine all networks into single one and adds gating layer to it. Examples -------- >>> from neupy import algorithms, architectures >>> from neupy.layers import * >>> >>> network = architectures.mixture_of_experts([ ... join( ... Input(10), ... Relu(5), ... ), ... join( ... Input(10), ... Relu(33), ... Relu(5), ... ), ... join( ... Input(10), ... Relu(12), ... Relu(25), ... Relu(5), ... ), ... ]) >>> network (?, 10) -> [... 12 layers ...] -> (?, 5) >>> >>> optimizer = algorithms.Momentum(network, step=0.1) """ if not isinstance(networks, (list, tuple)): raise ValueError("Networks should be specified as a list") for index, network in enumerate(networks): check_if_network_is_valid(network, index) check_if_networks_compatible(networks) input_shape = tf.TensorShape(None) for network in networks: input_shape = input_shape.merge_with(network.input_shape) n_layers_to_combine = len(networks) n_features = input_shape[1].value if n_features is None: raise ValueError( "Cannot create mixture of experts model, because " "number of input features is unknown") if gating_layer is None: gating_layer = layers.Softmax(n_layers_to_combine) if not isinstance(gating_layer, layers.BaseLayer): raise ValueError( "Invalid type for gating layer. Type: {}" "".format(type(gating_layer))) return layers.join( layers.Input(n_features), # Note: Gating network should be specified # as a first parameter. layers.parallel(*as_tuple(gating_layer, networks)), layers.GatedAverage(), )