コード例 #1
0
def build_network(train_x, train_y, test_x, test_y, epochs, total, max_length):

    print("total words", total)
    model = tf.keras.Sequential([
        layers.Embedding(total + 1, 64, input_length=max_length),
        layers.Dropout(.1),
        layers.Flatten(),
        layers.Dense(600, activation='relu'),
        layers.Dense(300, activation='relu'),
        layers.Dense(16, activation='softmax')
    ])
    model.compile(
        optimizer='Adam',  # Optimizer
        # Loss function to minimize
        loss="sparse_categorical_crossentropy",
        metrics=['acc'])
    model.summary()
    print('# Fit model on training data')
    print('validation sets', test_x.shape, test_y.shape)
    # print('validation sets', test_x, test_y)
    print('train sets', train_x.shape, train_y.shape)
    history = model.fit(train_x,
                        train_y,
                        batch_size=2,
                        epochs=10,
                        validation_data=(test_x, test_y))
    print('\nhistory dict:', history.history)
    return model
コード例 #2
0
ファイル: transformer.py プロジェクト: alanpurple/transformer
    def encode(self, inputs, attention_bias, training):
        """Generate continuous representation for inputs.

        Args:
            inputs: int tensor with shape [batch_size, input_length].
        attention_bias: float tensor with shape [batch_size, 1, 1, input_length].
        training: boolean, whether in training mode or not.

        Returns:
        float tensor with shape [batch_size, input_length, hidden_size]
        """
        with tf.name_scope('encode'):
            embedded_inputs = self.embedding_softmax_layer(inputs)
            embedded_inputs = tf.cast(embedded_inputs, self.params['dtype'])
            inputs_padding = model_utils.get_padding(inputs)
            attention_bias = tf.cast(attention_bias, self.params['dtype'])

            with tf.name_scope('add_pos_encoding'):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params['hidden_size'])
                pos_encoding = tf.cast(pos_encoding, self.params['dtype'])
                encoder_inputs = embedded_inputs + pos_encoding

            if training:
                encoder_inputs = layers.Dropout(
                    self.params['layer_postprocess_dropout'])(encoder_inputs)

            return self.encoder_stack(encoder_inputs,
                                      attention_bias,
                                      inputs_padding,
                                      training=training)
コード例 #3
0
    def call(self,x,training):
        """Return outputs of the feedforward network.

        Args:
        x: tensor with shape [batch_size, length, hidden_size]
        training: boolean, whether in training mode or not.

        Returns:
        Output of the feedforward network.
        tensor with shape [batch_size, length, hidden_size]
        """
        output=self.filter_dense_layer(x)
        if training:
            output=layers.Dropout(self.relu_dropout)(output)
        output=self.output_dense_layer(output)

        return output
コード例 #4
0
ファイル: transformer.py プロジェクト: alanpurple/transformer
 def call(self, x, *args, **kwargs):
     y = self.layer_norm(x)
     y = self.layer_norm(y, *args, **kwargs)
     if kwargs['training']:
         y = layers.Dropout(self.postprocess_dropout)(y)
     return x + y
コード例 #5
0
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)
# make the training data 80% and testing 20%
x_train = np.concatenate((input_train, input_test[:15000]))
input_test = input_test[15000:]
y_train = np.concatenate((y_train, y_test[:15000]))
y_test = y_test[15000:]

from tensorflow_core.python.keras import models
from tensorflow_core.python.keras import layers

embedding_size = 128
model = models.Sequential()
model.add(layers.Embedding(max_features, embedding_size, input_length=maxlen))
model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(128)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
his = model.fit(x_train,
                y_train,
                epochs=4,
                batch_size=64,
                validation_split=0.025)
コード例 #6
0
    def call(self,query_input,source_input,bias,training,cache=None,decode_loop_step=None):
        """Apply attention mechanism to query_input and source_input.

        Args:
            query_input: [B , len_query , hidden_size]
            source_input: [B , len_souce , hidden_size ]
            bias: [B, 1, len_query, len_source]
            training: bool
            cache: (Used during prediction) A dictionary with tensors containing
                results of previous attentions. The dictionary must have the items:
                    {'k': tensor with shape [B,i,heads,dim_per_head],
                     'v': tensor with shape [B,i,heads,dim_per_head]}
                where i is the current decoded length for non-padded decode, or max
                sequence length for padded decode.
            decode_loop_step: An integer, step number of the decoding loop. Used only
                for autoregressive inference on TPU.

        Returns:
            Attention layer output with shape [B,len_query,hidden_size]
        """
        # Linearly project query, key and value using different learned
        # projections. Splitting heads is automatically done during the linear
        # projections --> [B, len, num_heads, dim_per_head]
        query=self.query_dense_layer(query_input)
        key=self.key_dense_layer(source_input)
        value=self.value_dense_layer(source_input)

        if cache is not None:
            # Combine cached keys and values with new keys and values.
            if decode_loop_step is not None:
                cache_k_shape=cache['k'].shape.as_list()
                indices=tf.reshape(
                    tf.one_hot(decode_loop_step,cache_k_shape[1],dtype=key.dtype),
                    [1,cache_k_shape[1],1,1]
                )
                key=cache['k']+key*indices
                cache_v_shape=cache['v'].shape.as_list()
                indices=tf.reshape(
                    tf.one_hot(decode_loop_step,cache_v_shape[1],dtype=value.dtype),
                    [1,cache_v_shape[1],1,1]
                )
                value=cache['v']+value*indices
            else:
                key=layers.concatenate([tf.cast(cache['k'],key.dtype),key],axis=1)
                value=layers.concatenate([tf.cast(cache['v'],value.dtype),key],axis=1)

            # Update cache
            cache['k']=key
            cache['v']=value

        # Scale query to prevent the dot product between query and key from growing too large.
        depth=(self.hidden_size//self.num_heads)
        query*=depth**-0.5

        # Calculate dot product attention
        logits=tf.einsum('BTNH,BFNH->BNFT',key,query)
        logits+=bias
        # Note that softmax internally performs math operations using float32
        # for numeric stability. When training with float16, we keep the input
        # and output in float16 for better performance.
        weights=layers.Softmax('attention_weights')(logits)
        if training:
            weights=layers.Dropout(self.attention_dropout)(weights)
        attention_output=tf.einsum('BNFT,BTNH->BFNH',weights,value)

        # Run the outputs through another linear projection layer. Recombining heads
        # is automatically done --> [batch_size, length, hidden_size]
        attention_output=self.output_dense_layer(attention_output)
        return attention_output