def build_network(train_x, train_y, test_x, test_y, epochs, total, max_length): print("total words", total) model = tf.keras.Sequential([ layers.Embedding(total + 1, 64, input_length=max_length), layers.Dropout(.1), layers.Flatten(), layers.Dense(600, activation='relu'), layers.Dense(300, activation='relu'), layers.Dense(16, activation='softmax') ]) model.compile( optimizer='Adam', # Optimizer # Loss function to minimize loss="sparse_categorical_crossentropy", metrics=['acc']) model.summary() print('# Fit model on training data') print('validation sets', test_x.shape, test_y.shape) # print('validation sets', test_x, test_y) print('train sets', train_x.shape, train_y.shape) history = model.fit(train_x, train_y, batch_size=2, epochs=10, validation_data=(test_x, test_y)) print('\nhistory dict:', history.history) return model
def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope('encode'): embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params['dtype']) inputs_padding = model_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, self.params['dtype']) with tf.name_scope('add_pos_encoding'): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params['hidden_size']) pos_encoding = tf.cast(pos_encoding, self.params['dtype']) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = layers.Dropout( self.params['layer_postprocess_dropout'])(encoder_inputs) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)
def call(self,x,training): """Return outputs of the feedforward network. Args: x: tensor with shape [batch_size, length, hidden_size] training: boolean, whether in training mode or not. Returns: Output of the feedforward network. tensor with shape [batch_size, length, hidden_size] """ output=self.filter_dense_layer(x) if training: output=layers.Dropout(self.relu_dropout)(output) output=self.output_dense_layer(output) return output
def call(self, x, *args, **kwargs): y = self.layer_norm(x) y = self.layer_norm(y, *args, **kwargs) if kwargs['training']: y = layers.Dropout(self.postprocess_dropout)(y) return x + y
print('input_train shape:', input_train.shape) print('input_test shape:', input_test.shape) # make the training data 80% and testing 20% x_train = np.concatenate((input_train, input_test[:15000])) input_test = input_test[15000:] y_train = np.concatenate((y_train, y_test[:15000])) y_test = y_test[15000:] from tensorflow_core.python.keras import models from tensorflow_core.python.keras import layers embedding_size = 128 model = models.Sequential() model.add(layers.Embedding(max_features, embedding_size, input_length=maxlen)) model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True))) model.add(layers.Dropout(0.2)) model.add(layers.Bidirectional(layers.LSTM(128))) model.add(layers.Dropout(0.2)) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(32, activation='relu')) model.add(layers.Dense(16, activation='relu')) model.add(layers.Dense(16, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) his = model.fit(x_train, y_train, epochs=4, batch_size=64, validation_split=0.025)
def call(self,query_input,source_input,bias,training,cache=None,decode_loop_step=None): """Apply attention mechanism to query_input and source_input. Args: query_input: [B , len_query , hidden_size] source_input: [B , len_souce , hidden_size ] bias: [B, 1, len_query, len_source] training: bool cache: (Used during prediction) A dictionary with tensors containing results of previous attentions. The dictionary must have the items: {'k': tensor with shape [B,i,heads,dim_per_head], 'v': tensor with shape [B,i,heads,dim_per_head]} where i is the current decoded length for non-padded decode, or max sequence length for padded decode. decode_loop_step: An integer, step number of the decoding loop. Used only for autoregressive inference on TPU. Returns: Attention layer output with shape [B,len_query,hidden_size] """ # Linearly project query, key and value using different learned # projections. Splitting heads is automatically done during the linear # projections --> [B, len, num_heads, dim_per_head] query=self.query_dense_layer(query_input) key=self.key_dense_layer(source_input) value=self.value_dense_layer(source_input) if cache is not None: # Combine cached keys and values with new keys and values. if decode_loop_step is not None: cache_k_shape=cache['k'].shape.as_list() indices=tf.reshape( tf.one_hot(decode_loop_step,cache_k_shape[1],dtype=key.dtype), [1,cache_k_shape[1],1,1] ) key=cache['k']+key*indices cache_v_shape=cache['v'].shape.as_list() indices=tf.reshape( tf.one_hot(decode_loop_step,cache_v_shape[1],dtype=value.dtype), [1,cache_v_shape[1],1,1] ) value=cache['v']+value*indices else: key=layers.concatenate([tf.cast(cache['k'],key.dtype),key],axis=1) value=layers.concatenate([tf.cast(cache['v'],value.dtype),key],axis=1) # Update cache cache['k']=key cache['v']=value # Scale query to prevent the dot product between query and key from growing too large. depth=(self.hidden_size//self.num_heads) query*=depth**-0.5 # Calculate dot product attention logits=tf.einsum('BTNH,BFNH->BNFT',key,query) logits+=bias # Note that softmax internally performs math operations using float32 # for numeric stability. When training with float16, we keep the input # and output in float16 for better performance. weights=layers.Softmax('attention_weights')(logits) if training: weights=layers.Dropout(self.attention_dropout)(weights) attention_output=tf.einsum('BNFT,BTNH->BFNH',weights,value) # Run the outputs through another linear projection layer. Recombining heads # is automatically done --> [batch_size, length, hidden_size] attention_output=self.output_dense_layer(attention_output) return attention_output