Example #1
0
    def testCompositeTypeSpecArgWithoutDtype(self):
        for assign_variant_dtype in [False, True]:
            # Create a Keras Input
            spec = TwoTensorsSpecNoOneDtype(
                (1, 2, 3),
                tf.float32,
                (1, 2, 3),
                tf.int64,
                assign_variant_dtype=assign_variant_dtype,
            )
            x = input_layer_lib.Input(type_spec=spec)

            def lambda_fn(tensors):
                return tf.cast(tensors.x, tf.float64) + tf.cast(
                    tensors.y, tf.float64)

            # Verify you can construct and use a model w/ this input
            model = functional.Functional(x, core.Lambda(lambda_fn)(x))

            # And that the model works
            two_tensors = TwoTensors(
                tf.ones((1, 2, 3)) * 2.0, tf.ones(1, 2, 3))
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))

            # Test serialization / deserialization
            model = functional.Functional.from_config(model.get_config())
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))
            model = model_config.model_from_json(model.to_json())
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))
Example #2
0
def convnet_simple_lion_keras(image_dims):
    model = keras.models.Sequential()

    model.add(core.Lambda(lambda x: (x / 255.0) - 0.5, input_shape=image_dims))

    model.add(
        convolutional.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(convolutional.MaxPooling2D(pool_size=(2, 2)))
    model.add(
        convolutional.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(convolutional.MaxPooling2D(pool_size=(2, 2)))
    model.add(
        convolutional.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(convolutional.MaxPooling2D(pool_size=(2, 2)))

    model.add(core.Flatten())

    model.add(core.Dense(512, activation='relu'))
    model.add(core.Dropout(0.5))
    model.add(core.Dense(1024, activation='relu'))
    model.add(core.Dropout(0.5))
    model.add(core.Dense(6, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])

    return model
Example #3
0
    def test_Bidirectional_ragged_input(self, merge_mode):
        np.random.seed(100)
        rnn = keras.layers.LSTM
        units = 3
        x = tf.ragged.constant(
            [
                [[1, 1, 1], [1, 1, 1]],
                [[1, 1, 1]],
                [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
                [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
            ],
            ragged_rank=1,
        )
        x = tf.cast(x, "float32")

        # pylint: disable=g-long-lambda
        with self.cached_session():
            if merge_mode == "ave":
                merge_func = lambda y, y_rev: (y + y_rev) / 2
            elif merge_mode == "concat":
                merge_func = lambda y, y_rev: tf.concat((y, y_rev), axis=-1)
            elif merge_mode == "mul":
                merge_func = lambda y, y_rev: (y * y_rev)
                # pylint: enable=g-long-lambda

            inputs = keras.Input(shape=(None, 3),
                                 batch_size=4,
                                 dtype="float32",
                                 ragged=True)
            layer = keras.layers.Bidirectional(rnn(units,
                                                   return_sequences=True),
                                               merge_mode=merge_mode)
            f_merged = keras.backend.function([inputs], layer(inputs))
            f_forward = keras.backend.function([inputs],
                                               layer.forward_layer(inputs))

            # TODO(kaftan): after KerasTensor refactor TF op layers should work
            # with many composite tensors, and this shouldn't need to be a lambda
            # layer.
            reverse_layer = core.Lambda(tf.reverse, arguments=dict(axis=[1]))
            f_backward = keras.backend.function(
                [inputs], reverse_layer(layer.backward_layer(inputs)))

            y_merged = f_merged(x)
            y_expected = merge_func(
                convert_ragged_tensor_value(f_forward(x)),
                convert_ragged_tensor_value(f_backward(x)),
            )

            y_merged = convert_ragged_tensor_value(y_merged)
            self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
Example #4
0
    def test_fixed_loss_scaling(self, strategy_fn):
        # Note: We do not test mixed precision in this method, only loss scaling.
        loss_scale = 8.0
        batch_size = 4
        with strategy_fn().scope():
            x = layers.Input(shape=(1,), batch_size=batch_size)
            layer = mp_test_util.MultiplyLayer()
            y = layer(x)

            # The gradient of 'y' at this point is 1. With loss scaling, the gradient
            # is 'loss_scale'. We divide by the batch size since the loss is averaged
            # across batch elements.
            expected_gradient = loss_scale / batch_size
            identity_with_grad_check_fn = (
                mp_test_util.create_identity_with_grad_check_fn(
                    [expected_gradient]
                )
            )
            y = core.Lambda(identity_with_grad_check_fn)(y)
            model = models.Model(inputs=x, outputs=y)

            def loss_fn(y_true, y_pred):
                del y_true
                return tf.reduce_mean(y_pred)

            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, dynamic=False, initial_scale=loss_scale
            )
            model.compile(
                opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly()
            )

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 1 subtracted from it.
        expected = 0
        self.assertEqual(backend.eval(layer.v), expected)
    def test_adapt_preprocessing_stage_with_dict_input(self):
        x0 = Input(shape=(3, ), name='x0')
        x1 = Input(shape=(4, ), name='x1')
        x2 = Input(shape=(3, 5), name='x2')

        # dimension will mismatch if x1 incorrectly placed.
        x1_sum = core.Lambda(
            lambda x: tf.reduce_sum(x, axis=-1, keepdims=True))(x1)
        x2_sum = core.Lambda(lambda x: tf.reduce_sum(x, axis=-1))(x2)

        l0 = PLMerge()
        y = l0([x0, x1_sum])

        l1 = PLMerge()
        y = l1([y, x2_sum])

        l2 = PLSplit()
        z, y = l2(y)
        stage = preprocessing_stage.FunctionalPreprocessingStage(
            {
                'x2': x2,
                'x0': x0,
                'x1': x1
            }, [y, z])
        stage.compile()

        # Test with dict of NumPy array
        one_array0 = np.ones((4, 3), dtype='float32')
        one_array1 = np.ones((4, 4), dtype='float32')
        one_array2 = np.ones((4, 3, 5), dtype='float32')
        adapt_data = {'x1': one_array1, 'x0': one_array0, 'x2': one_array2}
        stage.adapt(adapt_data)
        self.assertEqual(l0.adapt_count, 1)
        self.assertEqual(l1.adapt_count, 1)
        self.assertEqual(l2.adapt_count, 1)
        self.assertLessEqual(l0.adapt_time, l1.adapt_time)
        self.assertLessEqual(l1.adapt_time, l2.adapt_time)

        # Check call
        y, z = stage({
            'x1': tf.constant(one_array1),
            'x2': tf.constant(one_array2),
            'x0': tf.constant(one_array0)
        })
        self.assertAllClose(y, np.zeros((4, 3), dtype='float32') + 9.)
        self.assertAllClose(z, np.zeros((4, 3), dtype='float32') + 11.)

        # Test with list of NumPy array
        adapt_data = [one_array0, one_array1, one_array2]
        stage.adapt(adapt_data)
        self.assertEqual(l0.adapt_count, 2)
        self.assertEqual(l1.adapt_count, 2)
        self.assertEqual(l2.adapt_count, 2)
        self.assertLessEqual(l0.adapt_time, l1.adapt_time)
        self.assertLessEqual(l1.adapt_time, l2.adapt_time)

        # Test with flattened dataset
        adapt_data = tf.data.Dataset.from_tensor_slices(
            (one_array0, one_array1, one_array2))
        adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples

        stage.adapt(adapt_data)
        self.assertEqual(l0.adapt_count, 3)
        self.assertEqual(l1.adapt_count, 3)
        self.assertEqual(l2.adapt_count, 3)
        self.assertLessEqual(l0.adapt_time, l1.adapt_time)
        self.assertLessEqual(l1.adapt_time, l2.adapt_time)

        # Test with dataset in dict shape
        adapt_data = tf.data.Dataset.from_tensor_slices({
            'x0': one_array0,
            'x2': one_array2,
            'x1': one_array1
        })
        adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
        stage.adapt(adapt_data)
        self.assertEqual(l0.adapt_count, 4)
        self.assertEqual(l1.adapt_count, 4)
        self.assertEqual(l2.adapt_count, 4)
        self.assertLessEqual(l0.adapt_time, l1.adapt_time)
        self.assertLessEqual(l1.adapt_time, l2.adapt_time)

        # Test error with bad data
        with self.assertRaisesRegex(ValueError, 'requires a '):
            stage.adapt(None)
Example #6
0
    def test_dynamic_loss_scaling(self, strategy_fn, get_config=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.0
        batch_size = 4
        expected_gradient = backend.variable(
            [initial_loss_scale / batch_size], dtype=tf.float16
        )
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=tf.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2
            )
            with policy.policy_scope("mixed_float16"):
                x = layers.Input(
                    shape=(1,), batch_size=batch_size, dtype=tf.float16
                )
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients
                    )
                )
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=tf.float16,
                        expected_gradient=expected_gradient,
                    )
                )
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient * 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient / 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
Example #7
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.0
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy("mixed_float16")):
                x = layers.Input(shape=(1,), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True,
                )
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16, use_operator=True
                )
                layer3 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16, use_operator=False
                )
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False,
                )
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=tf.float16,
                            expected_gradient=[expected_gradient],
                        )
                    )
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale
                    )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Example #8
0
    def build_model_200(self, hyperparameters):

        seed = None
        np.random.seed(None)

        model = None

        num_hiddenunits = hyperparameters['num_hiddenunits']
        hidden_layers = hyperparameters['num_hiddenlayers']

        drop = hyperparameters['dropout']
        act = hyperparameters['non_linearity']
        #bias_initializer = 'zeros'
        kernel_initializer = VarianceScaling(scale=1.0,
                                             mode='fan_in',
                                             distribution='normal',
                                             seed=None)
        kernel_regularizer = hyperparameters['weight_decay_type'](
            hyperparameters['weight_decay'])
        activity_regularizer = l2(0.00)
        bnorm_kwargs = {
            'axis': -1,
            'momentum': 0.99,
            'epsilon': 0.001,
            'center': True,
            'scale': True,
            'beta_initializer': 'zeros',
            'gamma_initializer': 'ones',
            'moving_mean_initializer': 'zeros',
            'moving_variance_initializer': 'ones',
            'beta_regularizer': None,
            'gamma_regularizer': None,
            'beta_constraint': None,
            'gamma_constraint': None
        }

        dense_kwargs = {
            'kernel_initializer': kernel_initializer,
            'kernel_regularizer': kernel_regularizer
        }

        def output_bias(shape, dtype=None):
            return class_weights

        def dummyscore_bias(shape, dtype=None):
            return dummyscore_distro

        def dummytots_bias(shape, dtype=None):
            return dummytots_distro

        def dummyhomescore_bias(shape, dtype=None):
            return dummyhomescore_distro

        def dummyawayscore_bias(shape, dtype=None):
            return dummyawayscore_distro

        def scores_bias_f(shape, dtype=None):
            return scores_bias

        def winner_bias(shape, dtype=None):
            return class_weights_money

        def batchcorrelate(ia, ib):
            assert ia.shape[0] == ib.shape[0]
            out = []
            for n in range(ia.shape[0]):
                a = ia[n, :]
                b = ib[n, :]
                out.append(correlate(a, b))
            return np.array(out)

        def batchconvolve(ia, ib):
            assert ia.shape[0] == ib.shape[0]
            out = []
            for n in range(ia.shape[0]):
                a = ia[n, :]
                b = ib[n, :]
                out.append(convolve(a, b))
            return np.array(out)

        def k_batchcorrelate(inp_list):
            out = K.tf.py_func(batchcorrelate,
                               inp_list,
                               K.tf.float32,
                               stateful=False)
            out.set_shape(
                (None, inp_list[0].shape[-1] + inp_list[1].shape[-1] - 1))
            return out

        def k_batchcorrelate_shape(input_shape):
            return (None, input_shape[0][-1] + input_shape[1][-1] - 1)

        def k_batchconvolve(inp_list):
            out = K.tf.py_func(batchconvolve,
                               inp_list,
                               K.tf.float32,
                               stateful=False)
            out.set_shape(
                (None, inp_list[0].shape[-1] + inp_list[1].shape[-1] - 1))
            return out

        def k_batchconvolve_shape(input_shape):
            return (None, input_shape[0][-1] + input_shape[1][-1] - 1)

        sys.setrecursionlimit(10000)

        num_hoao_inputfeatures = hyperparameters['num_inputfeatures']

        try:
            assert hidden_layers % 2 != 0
        except:
            print('ERROR: number of hidden layers must be odd')
            raise

        ###########################################################

        ### NEURAL ###
        ### NETWORK ###

        VisibleLayer = {}
        for x in ['ho', 'ao']:
            VisibleLayer[x] = Input(shape=(num_hoao_inputfeatures, ),
                                    name=x + '_input')

        HiddenCell = {}
        HiddenCell['layers'], HiddenCell['tensors'] = {}, {}
        for n in range(hidden_layers):
            nn = str(n + 1)
            HiddenCell['layers']['rep' + nn] = Dense(num_hiddenunits,
                                                     name='rep' + nn,
                                                     **dense_kwargs)
            #HiddenCell['layers']['rep'+nn+'_norm'] = normalization.BatchNormalization(**bnorm_kwargs)
            HiddenCell['layers']['rep' + nn + '_act'] = Activation(act)
            HiddenCell['layers']['rep' + nn + '_drop'] = Dropout(drop)
            if n > 0 and n % 2 == 0:
                HiddenCell['layers']['rep' + nn + '_add'] = mergeadd()

            for x in ['ho', 'ao']:
                if n == 0: inp = VisibleLayer[x]
                elif n < 3 or n % 2 == 0:
                    inp = HiddenCell['tensors'][x + '_rep' + str(n) + '_drop']
                else:
                    inp = HiddenCell['tensors'][x + '_rep' + str(n) + '_add']
                HiddenCell['tensors'][x + '_rep' +
                                      nn] = HiddenCell['layers']['rep' +
                                                                 nn](inp)
                #HiddenCell['tensors'][x+'_rep'+nn+'_norm'] = HiddenCell['layers']['rep'+nn+'_norm'](HiddenCell['tensors'][x+'_rep'+nn])
                #HiddenCell['tensors'][x+'_rep'+nn+'_act'] = HiddenCell['layers']['rep'+nn+'_act'](HiddenCell['tensors'][x+'_rep'+nn+'_norm'])
                HiddenCell['tensors'][
                    x + '_rep' + nn +
                    '_act'] = HiddenCell['layers']['rep' + nn + '_act'](
                        HiddenCell['tensors'][
                            x + '_rep' + nn])  ### USE THIS FOR NO BATCH NORM
                HiddenCell['tensors'][
                    x + '_rep' + nn +
                    '_drop'] = HiddenCell['layers']['rep' + nn + '_drop'](
                        HiddenCell['tensors']
                        [x + '_rep' + nn + '_act'])  ### USE THIS FOR DROPOUT
                if n == 2:
                    HiddenCell['tensors'][
                        x + '_rep' + nn +
                        '_add'] = HiddenCell['layers']['rep' + nn + '_add']([
                            HiddenCell['tensors'][x + '_rep' + str(n - 1) +
                                                  '_drop'],
                            HiddenCell['tensors'][x + '_rep' + nn + '_drop']
                        ])
                elif n > 2 and n % 2 == 0:
                    HiddenCell['tensors'][
                        x + '_rep' + nn +
                        '_add'] = HiddenCell['layers']['rep' + nn + '_add']([
                            HiddenCell['tensors'][x + '_rep' + str(n - 1) +
                                                  '_add'],
                            HiddenCell['tensors'][x + '_rep' + nn + '_drop']
                        ])

        if hidden_layers < 4:
            ho_repfin_drop = HiddenCell['tensors']['ho_rep' +
                                                   str(hidden_layers) +
                                                   '_drop']
            ao_repfin_drop = HiddenCell['tensors']['ao_rep' +
                                                   str(hidden_layers) +
                                                   '_drop']
        else:
            ho_repfin_drop = HiddenCell['tensors']['ho_rep' +
                                                   str(hidden_layers) + '_add']
            ao_repfin_drop = HiddenCell['tensors']['ao_rep' +
                                                   str(hidden_layers) + '_add']

        output_score_layer = Dense(26,
                                   activation='softmax',
                                   bias_initializer='zeros',
                                   name='dummyscore')
        output_dummyhomescore = output_score_layer(ho_repfin_drop)
        output_dummyawayscore = output_score_layer(ao_repfin_drop)

        batchcorrelate_layer = core.Lambda(k_batchcorrelate,
                                           output_shape=k_batchcorrelate_shape,
                                           name='dummydif_pre')
        output_dummyruns_pre = batchcorrelate_layer(
            [output_dummyhomescore, output_dummyawayscore])
        output_dummyruns_mid = Dense(256, activation=act,
                                     name='dummydif_mid')(output_dummyruns_pre)
        output_dummyruns = Dense(51, activation='softmax',
                                 name='dummydif')(output_dummyruns_mid)

        batchconvolve_layer = core.Lambda(k_batchconvolve,
                                          output_shape=k_batchconvolve_shape,
                                          name='dummytots')
        output_dummytots = batchconvolve_layer(
            [output_dummyhomescore, output_dummyawayscore])

        output_winner_layer = Dense(2,
                                    activation='linear',
                                    name='winner_pre',
                                    trainable=False)
        output_winner_pre = output_winner_layer(output_dummyruns_pre)
        output_winner_mid = Dense(48, activation=act,
                                  name='winner_mid')(output_winner_pre)
        output_winner = Dense(2, activation='softmax',
                              name='winner')(output_winner_mid)

        ###########################################################

        model = Model(inputs=[VisibleLayer['ho'], VisibleLayer['ao']],
                      outputs=[
                          output_dummyhomescore, output_dummyawayscore,
                          output_dummyruns, output_dummytots, output_winner
                      ])

        model.compile(loss=hyperparameters['loss'],
                      optimizer=hyperparameters['learning_algo'](
                          lr=hyperparameters['learning_rate'],
                          decay=0.001,
                          momentum=hyperparameters['momentum']),
                      metrics=['accuracy'],
                      loss_weights=hyperparameters['loss_weights'])

        # Set weights for untrainable layers

        ones = np.ones(25).reshape(-1, 1)
        zeros = np.zeros(25).reshape(-1, 1)
        bottom = np.concatenate([ones, zeros], axis=1)
        top = np.concatenate([zeros, ones], axis=1)
        middle = np.array([0, 0]).reshape(1, 2)

        winner_weights = np.concatenate([top, middle, bottom], axis=0)
        winner_biases = middle.reshape(2, )
        output_winner_layer.set_weights([winner_weights, winner_biases])

        return model
merge_flat = keras.layers.concatenate([before_merge_rgb, before_merge_nir])
print merge_flat._keras_shape

soft_flat = core.Flatten()(soft_dense)
print soft_flat._keras_shape
repeat = core.RepeatVector(before_merge_nir._keras_shape[1])(soft_flat)
print repeat._keras_shape
repeat_flat = core.Flatten()(repeat)
print repeat_flat._keras_shape

reshape_now = keras.layers.multiply([repeat_flat, merge_flat])
reshape_now = core.Reshape((2, -1))(reshape_now)
outshape = reshape_now._keras_shape

layer1 = core.Lambda(lambda x: x[:, 0:1, :],
                     output_shape=lambda x:
                     (outshape[0], 1, outshape[2]))(reshape_now)
layer2 = core.Lambda(lambda x: x[:, 1:2, :],
                     output_shape=lambda x:
                     (outshape[0], 1, outshape[2]))(reshape_now)

#-------------------------------------------------------------------------------------------------------------------------------
# CONACTENATE the ends of RGB & NIR
merge_rgb_nir = keras.layers.add([layer1, layer2])
print merge_rgb_nir._keras_shape
#merge_rgb_nir = keras.layers.merge([soft_dense,before_merge_rgb,before_merge_nir], mode=scalarmult)
merge_rgb_nir = core.Flatten()(merge_rgb_nir)
merge_rgb_nir = core.Reshape(
    (inshape[1], inshape[2], inshape[3]))(merge_rgb_nir)

# DECONVOLUTION Layers