def test_cutoffs_no_projection_bind(self):
     input_layer = keras.layers.Input(shape=(None, ))
     embed_layer = AdaptiveEmbedding(
         input_dim=30,
         output_dim=8,
         cutoffs=[10, 20, 25],
         div_val=2,
         mask_zero=True,
         force_projection=False,
         return_embeddings=True,
         return_projections=True,
     )(input_layer)
     softmax_layer = AdaptiveSoftmax(
         input_dim=8,
         output_dim=30,
         cutoffs=[10, 20, 25],
         div_val=2,
         force_projection=False,
         bind_embeddings=True,
         bind_projections=True,
     )(embed_layer)
     model = keras.models.Model(input_layer, softmax_layer)
     model_path = os.path.join(
         tempfile.gettempdir(),
         'test_ada_softmax_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path,
                                     custom_objects={
                                         'AdaptiveEmbedding':
                                         AdaptiveEmbedding,
                                         'AdaptiveSoftmax': AdaptiveSoftmax,
                                     })
     model.summary()
 def test_force_projection_no_binding(self):
     input_layer = keras.layers.Input(shape=(None, ))
     embed_layer = AdaptiveEmbedding(
         input_dim=3,
         output_dim=16,
         force_projection=True,
         return_embeddings=True,
         return_projections=True,
     )(input_layer)
     softmax_layer = AdaptiveSoftmax(
         input_dim=16,
         output_dim=3,
         force_projection=True,
     )(embed_layer)
     model = keras.models.Model(input_layer, softmax_layer)
     model_path = os.path.join(
         tempfile.gettempdir(),
         'test_ada_softmax_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path,
                                     custom_objects={
                                         'AdaptiveEmbedding':
                                         AdaptiveEmbedding,
                                         'AdaptiveSoftmax': AdaptiveSoftmax,
                                     })
     model.summary()
    def test_fit(self):
        input_layer = keras.layers.Input(shape=(None, ))
        embed_layer = AdaptiveEmbedding(
            input_dim=30,
            output_dim=32,
            cutoffs=[5, 15, 25],
            div_val=2,
            return_embeddings=True,
            return_projections=True,
            mask_zero=True,
        )(input_layer)
        dense_layer = keras.layers.Dense(
            units=32,
            activation='tanh',
        )(embed_layer[0])
        softmax_layer = AdaptiveSoftmax(
            input_dim=32,
            output_dim=30,
            cutoffs=[5, 15, 25],
            div_val=2,
            bind_embeddings=True,
            bind_projections=True,
        )([dense_layer] + embed_layer[1:])
        model = keras.models.Model(inputs=input_layer, outputs=softmax_layer)
        model.compile('adam', 'sparse_categorical_crossentropy')
        model.summary()

        inputs = np.random.randint(0, 30, (4096, 10))
        outputs = np.expand_dims(inputs, axis=-1)
        model.fit(
            inputs,
            outputs,
            epochs=100,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='loss',
                                              min_delta=1e-4,
                                              patience=2),
            ],
        )

        model = keras.models.Model(input_layer, softmax_layer)
        model_path = os.path.join(
            tempfile.gettempdir(),
            'test_ada_softmax_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path,
                                        custom_objects={
                                            'AdaptiveEmbedding':
                                            AdaptiveEmbedding,
                                            'AdaptiveSoftmax': AdaptiveSoftmax,
                                        })

        inputs = np.random.randint(0, 30, (128, 10))
        outputs = model.predict(inputs).argmax(axis=-1)
        outputs *= np.not_equal(inputs, 0).astype('int32')
        diff = np.sum(np.not_equal(inputs, outputs))
        self.assertLess(diff, 5)
Ejemplo n.º 4
0
 def test_sample_default(self):
     input_layer = keras.layers.Input(shape=(None, ))
     embed_layer = AdaptiveEmbedding(
         input_dim=3,
         output_dim=16,
         return_embeddings=True,
         return_projections=True,
     )(input_layer)
     func = K.function([input_layer], embed_layer)
     outputs = func([np.array([[0, 1, 2]])])
     self.assertTrue(np.allclose(outputs[0], outputs[1]))
Ejemplo n.º 5
0
 def test_single_projection(self):
     input_layer = keras.layers.Input(shape=(None, ))
     embed_layer = AdaptiveEmbedding(
         input_dim=3,
         output_dim=16,
         embed_dim=5,
         return_embeddings=True,
         return_projections=True,
     )(input_layer)
     model = keras.models.Model(input_layer, embed_layer)
     model_path = os.path.join(tempfile.gettempdir(),
                               'test_ada_embed_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects={'AdaptiveEmbedding': AdaptiveEmbedding})
     model.summary()
Ejemplo n.º 6
0
def build_albert(token_num,
                 pos_num=512,
                 seq_len=512,
                 embed_dim=128,
                 hidden_dim=768,
                 transformer_num=12,
                 head_num=12,
                 feed_forward_dim=3072,
                 dropout_rate=0.1,
                 attention_activation=None,
                 feed_forward_activation='gelu',
                 training=True,
                 trainable=None,
                 output_layers=None):
    """Get ALBERT model.
    See: https://arxiv.org/pdf/1909.11942.pdf
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param seq_len: Maximum length of the input sequence or None.
    :param embed_dim: Dimensions of embeddings.
    :param hidden_dim: Dimensions of hidden layers.
    :param transformer_num: Number of transformers.
    :param head_num: Number of heads in multi-head attention
                    in each transformer.
    :param feed_forward_dim: Dimension of the feed forward layer
                             in each transformer.
    :param dropout_rate: Dropout rate.
    :param attention_activation: Activation for attention layers.
    :param feed_forward_activation: Activation for feed-forward layers.
    :param training: A built model with MLM and NSP outputs will be returned
                     if it is `True`, otherwise the input layers and the last
                     feature extraction layer will be returned.
    :param trainable: Whether the model is trainable.
    :param output_layers: A list of indices of output layers.
    """
    if attention_activation == 'gelu':
        attention_activation = gelu
    if feed_forward_activation == 'gelu':
        feed_forward_activation = gelu
    if trainable is None:
        trainable = training

    def _trainable(_layer):
        if isinstance(trainable, (list, tuple, set)):
            for prefix in trainable:
                if _layer.name.startswith(prefix):
                    return True
            return False
        return trainable

    # Build inputs
    input_token = keras.layers.Input(shape=(seq_len, ), name='Input-Token')
    input_segment = keras.layers.Input(shape=(seq_len, ), name='Input-Segment')
    inputs = [input_token, input_segment]

    # Build embeddings
    embed_token, embed_weights, embed_projection = AdaptiveEmbedding(
        input_dim=token_num,
        output_dim=hidden_dim,
        embed_dim=embed_dim,
        mask_zero=True,
        trainable=trainable,
        return_embeddings=True,
        return_projections=True,
        name='Embed-Token',
    )(input_token)
    embed_segment = keras.layers.Embedding(
        input_dim=2,
        output_dim=hidden_dim,
        trainable=trainable,
        name='Embed-Segment',
    )(input_segment)
    embed_layer = keras.layers.Add(name='Embed-Token-Segment')(
        [embed_token, embed_segment])
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=hidden_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)

    if dropout_rate > 0.0:
        dropout_layer = keras.layers.Dropout(
            rate=dropout_rate,
            name='Embedding-Dropout',
        )(embed_layer)
    else:
        dropout_layer = embed_layer
    embed_layer = LayerNormalization(
        trainable=trainable,
        name='Embedding-Norm',
    )(dropout_layer)

    # Build shared transformer
    attention_layer = MultiHeadAttention(
        head_num=head_num,
        activation=attention_activation,
        name='Attention',
    )
    attention_normal = LayerNormalization(name='Attention-Normal')
    feed_forward_layer = FeedForward(units=feed_forward_dim,
                                     activation=feed_forward_activation,
                                     name='Feed-Forward')
    feed_forward_normal = LayerNormalization(name='Feed-Forward-Normal')

    transformed = embed_layer
    transformed_layers = []
    for i in range(transformer_num):
        attention_input = transformed
        transformed = attention_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Attention-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Attention-Add-{}'.format(i + 1), )(
                [attention_input, transformed])
        transformed = attention_normal(transformed)

        feed_forward_input = transformed
        transformed = feed_forward_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Feed-Forward-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Feed-Forward-Add-{}'.format(i + 1), )(
                [feed_forward_input, transformed])
        transformed = feed_forward_normal(transformed)
        transformed_layers.append(transformed)

    if training:
        # Build tasks
        mlm_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation=feed_forward_activation,
            name='MLM-Dense',
        )(transformed)
        mlm_norm_layer = LayerNormalization(name='MLM-Norm')(mlm_dense_layer)
        mlm_pred_layer = AdaptiveSoftmax(
            input_dim=hidden_dim,
            output_dim=token_num,
            embed_dim=embed_dim,
            bind_embeddings=True,
            bind_projections=True,
            name='MLM-Sim',
        )([mlm_norm_layer, embed_weights, embed_projection])
        masked_layer = Masked(name='MLM')([mlm_pred_layer, inputs[-1]])
        extract_layer = Extract(index=0, name='Extract')(transformed)
        nsp_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation='tanh',
            name='SOP-Dense',
        )(extract_layer)
        nsp_pred_layer = keras.layers.Dense(
            units=2,
            activation='softmax',
            name='SOP',
        )(nsp_dense_layer)
        model = keras.models.Model(inputs=inputs,
                                   outputs=[masked_layer, nsp_pred_layer])
        for layer in model.layers:
            layer.trainable = _trainable(layer)
        return model
    if output_layers is not None:
        if isinstance(output_layers, list):
            output_layers = [
                transformed_layers[index] for index in output_layers
            ]
            output = keras.layers.Concatenate(name='Output', )(output_layers)
        else:
            output = transformed_layers[output_layers]
        model = keras.models.Model(inputs=inputs, outputs=output)
        return model
    model = keras.models.Model(inputs=inputs, outputs=transformed)
    for layer in model.layers:
        layer.trainable = _trainable(layer)
    return inputs, transformed
Ejemplo n.º 7
0
def build_transformer_xl(units,
                         embed_dim,
                         hidden_dim,
                         num_token,
                         num_block,
                         num_head,
                         batch_size,
                         memory_len,
                         target_len,
                         dropout=0.0,
                         attention_dropout=0.0,
                         cutoffs=None,
                         div_val=1,
                         force_projection=None,
                         bind_embeddings=True,
                         bind_projections=True,
                         clamp_len=None,
                         share_biases=True):
    """Build transformer-XL model.

    :param units: Units inside the transformer.
    :param embed_dim: Dimension of embeddings.
    :param hidden_dim: Dimension inside position-wise feed-forward layer.
    :param num_token: Number of distinct input tokens.
    :param num_block: Number of basic encoder blocks.
    :param num_head: Number of heads for attention.
    :param batch_size: Maximum batch size.
    :param memory_len: The maximum length of memories.
    :param target_len: The length of prediction block.
    :param dropout: General dropout rate.
    :param attention_dropout: Dropout rate inside attention layer.
    :param cutoffs: Cutoffs of adaptive embedding.
    :param div_val: Scale factor of adaptive embedding.
    :param force_projection: Add projection when the dimensions are equal.
    :param bind_embeddings: Whether to bind embeddings to adaptive softmax.
    :param bind_projections: Whether to bind projections to adaptive softmax.
    :param clamp_len: The maximum value of relative position.
    :param share_biases: Whether to use the same biases for all layers.
    :return: The built model.
    """
    token_input = keras.layers.Input(shape=(target_len,), name='Input-Token')
    memory_length_input = keras.layers.Input(shape=(1,), name='Input-Memory-Length')
    inputs = [token_input, memory_length_input]

    results = AdaptiveEmbedding(
        input_dim=num_token,
        output_dim=units,
        embed_dim=embed_dim,
        cutoffs=cutoffs,
        div_val=div_val,
        mask_zero=True,
        force_projection=force_projection,
        return_embeddings=True,
        return_projections=True,
        name='Embed-Token',
    )(token_input)
    token_embed, embedding_weights = results[0], results[1:]
    token_embed = Scale(scale=np.sqrt(units), name='Embed-Token-Scaled')(token_embed)
    last_memory = Memory(
        batch_size=batch_size,
        memory_len=memory_len,
        target_len=target_len,
        output_dim=units,
        name='Memory-0',
    )([token_embed, memory_length_input])

    position_embed = PositionalEmbedding(
        output_dim=units,
        clamp_len=clamp_len,
        name='Embed-Position',
    )([token_input, last_memory])

    if 0.0 < dropout < 1.0:
        token_embed = keras.layers.Dropout(rate=dropout, name='Embed-Token-Dropped')(token_embed)
        position_embed = keras.layers.Dropout(rate=dropout, name='Embed-Position-Dropped')(position_embed)

    context_bias, relative_bias = None, None
    if share_biases:
        context_bias, relative_bias = RelativeBias(units=units, name='Biases')(last_memory)

    outputs = [token_embed]
    for i in range(num_block):
        block_input, block_output = outputs[-1], outputs[-1]
        if not share_biases:
            context_bias, relative_bias = RelativeBias(units=units, name='Biases-{}'.format(i + 1))(last_memory)
        block_output = RelativePartialMultiHeadSelfAttention(
            units=units,
            num_head=num_head,
            use_bias=False,
            attention_dropout=attention_dropout,
            name='Attention-{}'.format(i + 1),
        )([block_output, position_embed, last_memory, context_bias, relative_bias])
        if 0.0 < dropout < 1.0:
            block_output = keras.layers.Dropout(rate=dropout, name='Attention-Dropped-{}'.format(i + 1))(block_output)
        block_output = keras.layers.Add(name='Attention-Res-{}'.format(i + 1))([block_input, block_output])
        block_output = LayerNormalization(name='Attention-Norm-{}'.format(i + 1))(block_output)

        block_input = block_output
        block_output = FeedForward(
            units=hidden_dim,
            dropout_rate=dropout,
            name='FeedForward-{}'.format(i + 1),
        )(block_output)
        if 0.0 < dropout < 1.0:
            block_output = keras.layers.Dropout(rate=dropout, name='FeedForward-Dropped-{}'.format(i + 1))(block_output)
        block_output = keras.layers.Add(name='FeedForward-Res-{}'.format(i + 1))([block_input, block_output])
        block_output = LayerNormalization(name='FeedForward-Norm-{}'.format(i + 1))(block_output)

        if i < num_block - 1:
            last_memory = Memory(
                batch_size=batch_size,
                memory_len=memory_len,
                target_len=target_len,
                output_dim=units,
                name='Memory-{}'.format(i + 1),
            )([block_output, memory_length_input])

        outputs.append(block_output)

    if 0.0 < dropout < 1.0:
        outputs[-1] = keras.layers.Dropout(rate=dropout, name='Output-Dropped')(outputs[-1])
    softmax = AdaptiveSoftmax(
        input_dim=units,
        output_dim=num_token,
        embed_dim=embed_dim,
        cutoffs=cutoffs,
        div_val=div_val,
        force_projection=force_projection,
        bind_embeddings=bind_embeddings,
        bind_projections=bind_projections,
        name='Softmax',
    )(outputs[-1:] + embedding_weights)

    model = keras.models.Model(inputs=inputs, outputs=softmax)
    return model
Ejemplo n.º 8
0
    def test_sample_cutoffs(self):
        embed_0 = np.array([
            [
                0.7562694862279867,
                -0.7532437781410828,
                -0.2882295795429552,
                -1.6990371818805843,
                -0.09864164817566004,
                -0.5235034477186453,
                -1.600153091413999,
                0.03441732751250957,
            ],
            [
                -0.3680529905261407,
                1.1673600332887637,
                -0.6914459306809843,
                -0.7645030146906124,
                2.0434827620248606,
                -0.2743642839675437,
                0.04834288951969495,
                -1.0368596183756285,
            ],
            [
                -0.8440324158987662,
                0.05585795322288273,
                -0.5827731797867599,
                1.502853709909658,
                -0.09311037618863122,
                1.366316512453695,
                -0.3834091917878978,
                -1.2647642860801802,
            ],
            [
                1.5212768184170435,
                -0.7854311748221854,
                -0.4674213048014483,
                -1.0460200278367862,
                0.3705555995848165,
                -0.12273261562651422,
                1.8138708310050653,
                -0.26957084415202764,
            ],
            [
                -0.15162771245260723,
                -0.19654664890167275,
                -1.77930041719533,
                -0.6987101769248606,
                0.32681036318004547,
                0.19156716698736181,
                0.8386004334587568,
                -1.8390076172747616,
            ],
            [
                -1.1363779747587972,
                -0.15233131547247872,
                0.158423477487577,
                -0.6984487776859649,
                1.2424950830966563,
                -0.16130616338419873,
                -1.6298737099566283,
                1.7229575808498785,
            ],
            [
                0.613169803410901,
                -1.5391239758406403,
                -1.2476893436624792,
                -0.05514513857644962,
                -0.5537408608863357,
                -0.9965187549427492,
                -0.6842234254089083,
                -1.2420165307152238,
            ],
            [
                -0.4086071455923046,
                -0.7286151488450243,
                1.2938629380821804,
                0.7450912596769113,
                -0.13042129128885002,
                -1.4269400640112133,
                -0.713571658756806,
                -0.5036154349645774,
            ],
            [
                0.7326026846217363,
                0.12752591749386982,
                0.7387086112901271,
                -1.4161019970745967,
                -0.6396944907214142,
                -2.0010110577965783,
                0.5843029435066284,
                -0.4033331631189724,
            ],
            [
                1.22301664512685,
                -0.024541032664251092,
                -0.27128167541306714,
                1.910258142043872,
                -0.9673069099782774,
                0.6614265651081772,
                -1.165650716838653,
                -0.5085143504562967,
            ],
        ])
        embed_1 = np.array([
            [
                0.6593494357199338, -0.06233478795012013, 0.3394579881849406,
                0.05894554241531747
            ],
            [
                1.0015451559801243, 0.7487130375684998, -0.4244371286817957,
                -0.45182923128222996
            ],
            [
                -0.41965070720383035, -0.2875756074838825, 1.8712603426351773,
                2.531083895835167
            ],
            [
                -0.6800689195006436, -0.39454047242128376, 0.5442439581019961,
                -0.21672610899025968
            ],
            [
                -1.3119449289237803, 1.5645034642903253, 1.3203132828621442,
                1.7673879116655695
            ],
            [
                -0.8817194029613362, -0.6655645822150862, 0.2341787847442309,
                -0.7641095447924122
            ],
            [
                -0.47497798682688624, 1.0109350638555383, -0.5514102704837403,
                -0.1450007600387442
            ],
            [
                -0.531267085230172, 0.12862169808408846, 0.18339345878624577,
                1.5279135983387981
            ],
            [
                0.43338928943049837, 0.2660771849859784, 1.4227633495535283,
                -0.5072818940455809
            ],
            [
                0.8704222505796531, 0.9361117741463981, 0.7442665348863866,
                0.91392694614948
            ],
        ])
        embed_2 = np.array([
            [1.2712292341556446, 1.009655780936284],
            [0.4420362222435132, 1.5186087787070979],
            [-0.10018465175352317, -0.09182475290216006],
            [-1.246047485363712, 1.6404603895987184],
            [1.4427767754835976, 1.2102150762070925],
        ])
        embed_3 = np.array([
            [0.8285545743394414],
            [0.7111875779008273],
            [0.35799413043562894],
            [-0.15005629449852656],
            [0.6263946579941496],
        ])
        proj_0 = np.array([
            [0.3409731658714878, 0.032745006392315756, 0.668797744010083],
            [-0.3082491589087075, -1.0028023345331745, 0.2122102239605163],
            [-0.3751562822576601, -0.5825445529201775, 0.43389258576225614],
            [0.26067868083146517, 0.8192897299406429, 0.073726048897453],
            [1.1346146882950412, -2.456072992985481, -0.054474463562940736],
            [-1.0283521269636255, -0.1983876737118115, 1.0132159972212373],
            [2.72334361610427, 0.5683724225575054, 2.403638230905517],
            [-0.2137114185905606, 0.3048293347650425, 1.510425235737199],
        ])
        proj_1 = np.array([
            [0.42186259731067743, 0.6034344571434473, 2.362015513199549],
            [-0.9313583984951119, -0.8242699945665621, 0.2596454482698166],
            [0.8871149648450185, -0.663397984939589, -1.195129355668761],
            [0.8016784490871957, 0.13830808473255815, -0.6580242457235711],
        ])
        proj_2 = np.array([
            [1.4802477891158519, 0.12638370704617574, -0.18503256737397666],
            [-0.3900434531439191, 0.14771223879593204, -0.8863321455068343],
        ])
        proj_3 = np.array(
            [[-0.589729339138385, 2.018799784975004, -0.08431336326635828]])

        input_layer = keras.layers.Input(shape=(None, ))
        embed_layer = AdaptiveEmbedding(
            input_dim=30,
            output_dim=3,
            embed_dim=8,
            cutoffs=[10, 20, 25],
            div_val=2,
            mask_zero=True,
            return_embeddings=True,
            return_projections=True,
        )
        func = K.function([input_layer], embed_layer(input_layer))
        embed_layer.set_weights([
            embed_0,
            proj_0,
            embed_1,
            proj_1,
            embed_2,
            proj_2,
            embed_3,
            proj_3,
        ])
        outputs = func([np.array([list(range(30))])])
        expected = np.array([
            [-3.783413887023926, -0.9968423843383789, -4.223631381988525],
            [2.528728485107422, -6.659335613250732, -2.194012403488159],
            [-1.9791769981384277, 0.8412808179855347, -2.137157917022705],
            [6.2075581550598145, 0.31576472520828247, 4.379002094268799],
            [3.3448808193206787, -0.268412709236145, -1.552351474761963],
            [-3.813311815261841, -3.9697980880737305, -2.3214385509490967],
            [-0.06424117088317871, 3.0353987216949463, -4.962082862854004],
            [-0.7221541404724121, 0.6183103322982788, -3.726100444793701],
            [2.573601245880127, 0.48284363746643066, -0.4642190933227539],
            [-3.8191750049591064, 3.2147698402404785, -2.0111422538757324],
            [0.6846045255661011, 0.23221178352832794, 1.0967247486114502],
            [-1.013551950454712, 0.20630428194999695, 3.3646368980407715],
            [3.7799394130706787, -0.9075126051902771, -4.967802047729492],
            [0.3896251916885376, -0.4761944115161896, -2.216604709625244],
            [0.5775725841522217, -2.712695360183716, -5.433547496795654],
            [-0.1569119393825531, -0.24449113011360168, -2.0325169563293457],
            [-1.7473266124725342, -0.7741519212722778, -0.10500013828277588],
            [1.04367196559906, -0.33694392442703247, -2.4460482597351074],
            [0.7904950380325317, -0.971816897392273, -0.2738245725631714],
            [0.8882685303688049, -0.6137074828147888, 0.8081271648406982],
            [1.487924575805664, 0.3098011910915375, -1.130109190940857],
            [0.06199967861175537, 0.2801832854747772, -1.4277828931808472],
            [-0.1124824583530426, -0.026225347071886063, 0.09992465376853943],
            [-2.484309673309326, 0.0848359763622284, -1.2234333753585815],
            [1.6636306047439575, 0.3611070513725281, -1.3396131992340088],
            [-0.4886229634284973, 1.6726857423782349, -0.06985822319984436],
            [-0.4194082021713257, 1.435745358467102, -0.059962619096040726],
            [-0.2111196517944336, 0.7227184772491455, -0.030183689668774605],
            [0.08849260210990906, -0.30293360352516174, 0.012651749886572361],
            [-0.36940330266952515, 1.264565348625183, -0.05281343683600426],
        ])
        self.assertTrue(np.allclose(expected, outputs[0][0]))