def __init__(self, num_layers, D, d_model, num_heads, dff, maximum_position_encoding, rate=0.1): super(TransformerDecoder, self).__init__() self.rate = rate self.d_model = d_model self.num_layers = num_layers # as in: https://arxiv.org/abs/1711.03905 self.embedding = tf.keras.layers.Conv1D(d_model, kernel_size=1, activation='relu', input_shape=[D, d_model]) self.pos_encoding = positional_encoding(maximum_position_encoding, d_model) self.dec_layers = [ TransformerDecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] if self.rate > 0.0: self.dropout = tf.keras.layers.Dropout(self.rate)
def __init__(self, vocab_size, d_model): super(Embedding, self).__init__() self.vocab_size = vocab_size self.d_model = d_model self.embedding = tf.keras.layers.Embedding(vocab_size, d_model) self.pos_encoding = positional_encoding(vocab_size, d_model)
def __init__(self, vocab_size, embedding_matrix, config): super(DAM, self).__init__() self.batch_sz = config.batch_size self.enc_units = config.hidden_size self.encoder1 = Encoder(enc_units=config.hidden_size) self.encoder2 = Encoder(enc_units=config.hidden_size) self.embedding = tf.keras.layers.Embedding( vocab_size, config.emb_size, embeddings_initializer=keras.initializers.constant( embedding_matrix), trainable=True) self.pos_encoding = positional_encoding(config.max_utterance_len, config.hidden_size) self.match_self = SelfMatch(num_layer=config.num_layer) self.match_u_attend_r = AttentiveModule() self.match_r_attend_u = AttentiveModule() self.aggregat1 = Aggregation(filters=config.filter_size[0], kernel_size=(3, 3, 3), strides=(1, 1, 1), pool_kernel_size=(3, 3, 3), pool_strides=(3, 3, 3)) self.aggregat2 = Aggregation(filters=config.filter_size[1], kernel_size=(3, 3, 3), strides=(1, 1, 1), pool_kernel_size=(3, 3, 3), pool_strides=(3, 3, 3)) self.flatten = keras.layers.Flatten() self.output_prj = keras.layers.Dense(2)
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate=0.1): """ The constructor for Encoder class. Parameters: num_layers (int): number of stacked encoder layer d_model (int): token embedding size num_heads (int): the number of heads dff (int): the number of units of feed forward hidden layer input_vocab_size: vocabulary size maximum_position_encoding: maximum length of sequence rate (float): dropout rate """ super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model) self.enc_layers = [ EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)
def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, dropout_rate: float, dense_blocks: int, conv_filters: int, conv_activation: str, conv_padding: str, conv_kernel: int, **kwargs): super(CrossAttentionBlocks, self).__init__(**kwargs) self.model_dim = model_dim self.pos_encoding_scalar = tf.Variable(1.) self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) self.dropout = tf.keras.layers.Dropout(dropout_rate) self.CADB = [ CrossAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_CADB_{i}') for i, n_heads in enumerate(num_heads[:dense_blocks])] self.CACB = [ CrossAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, name=f'{self.name}_CACB_{i}', conv_filters=conv_filters, conv_activation=conv_activation, conv_padding=conv_padding, kernel_size=conv_kernel) for i, n_heads in enumerate(num_heads[dense_blocks:])]
def _cluster_selection_component(self): """ Method that creates the cluster selection UI components and their logical dependencies. """ # creates a title UI component st.sidebar.title("Cluster information") # gets all available cluster strategies available_cluster_strategies = ClusterFactory.values_list() # creates a selection box UI component with the available_cluster_strategies # and stores the selected cluster strategy in a variable self.selected_cluster = st.sidebar.selectbox( 'Select a cluster strategy', available_cluster_strategies) # creates the positional encoding checkbox self.positional_encoding_checkbox = st.sidebar.checkbox( "Use Positional Encoding") # creates the auto cluster checkbox self.auto_cluster_checkbox = st.sidebar.checkbox( "Automatic find the optimal number of clusters") # if auto cluster is ticked, just initialize it with an integer if self.auto_cluster_checkbox: self.num_cluster_slider = 2 else: # otherwise creates a slider UI component so the user can pick the number of clusters self.num_cluster_slider = st.sidebar.slider( "Select number of clusters", min_value=2, max_value=8) # gets the feature extractor object from the ExtractorFactory self.cluster = ClusterFactory.get( self.selected_cluster)(n=self.num_cluster_slider) # gets the features from the video object self.features = self.video.features() # if the positional encoding checkbox is ticked, preprocess the features # with the positional_encoding function if self.positional_encoding_checkbox: self.features = positional_encoding(self.features) # if auto cluster checkbox is ticked, use the auto method if self.auto_cluster_checkbox: self.cluster_labels = self.cluster.auto(self.features) else: # otherwise the estimate method self.cluster_labels = self.cluster.estimate(self.features)
def _sentence_order_speaker_feature(self, context_sentence_representation, speaker): position_value = tf.tile(tf.expand_dims(tf.range(tf.shape(speaker)[1]), axis=0), multiples=[tf.shape(speaker)[0], 1]) context_sentence_position = positional_encoding( position_value, lookup_table=self.position_embeddings, num_units=self.hparams.position_embedding_dim, zero_pad=False, scale=False, scope="dialog_sentence_position") context_sentence_position = tf.cast(context_sentence_position, tf.float32) speaker_one_hot = tf.one_hot(speaker, 2) if self.pos_emb_bool and self.user_emb_bool: print("position and user") context_w_sentence_feature = \ tf.concat(axis=-1, values=[context_sentence_representation, context_sentence_position, speaker_one_hot]) elif self.pos_emb_bool and not self.user_emb_bool: print("only position") context_w_sentence_feature = \ tf.concat(axis=-1, values=[context_sentence_representation, context_sentence_position]) elif self.user_emb_bool and not self.pos_emb_bool: print("user") context_w_sentence_feature = \ tf.concat(axis=-1, values=[context_sentence_representation, speaker_one_hot]) projected_context_w_sentence_feature = tf.layers.dense( inputs=context_w_sentence_feature, units=self.hparams.embedding_dim + self.hparams.sentence_rnn_hidden_dim * 2, activation=None, kernel_initializer=tf.initializers.variance_scaling( scale=2.0, mode="fan_in", distribution="normal"), name="context_w_sentence_feature_projection") return projected_context_w_sentence_feature
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, rate=0.1): super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.pos_encoding = positional_encoding(input_vocab_size, self.d_model) self.enc_layers = [ EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, maximum_position_encoding, rate=0.1): super(Decoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, d_model) self.dec_layers = [ DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)
learning_rate = 1e-6 # Data parameters step = 4 lag = 7 d_model = 7 # Date parameters start_dt = datetime(2014, 1, 1) end_train = datetime(2015, 7, 31) end_val = datetime(2015, 9, 30) end_dt = datetime(2016, 1, 1) # Model parameters model = TransformerClassifier(d_model, d_model, lag, 2048, 12, 0.5).to('cuda') optimizer = optim.Adam(model.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5) criterion = nn.CrossEntropyLoss() positional_encoder = torch.tensor(positional_encoding(lag, d_model)).to('cuda') def preprocess_data(): print('Beginning Preprocessing:') start_preprocessing = time() print('\t(1/5) Loading stock price data...') price_data_train, price_data_val, price_data_test = load_stock_price_data( '/home/mrkeaton/Documents/Datasets/stocknet-dataset/price/preprocessed', start_dt, end_train, end_val, end_dt) print('\t(2/5) Loading tweet data...') tweet_data_train, tweet_data_val, tweet_data_test = load_tweet_data( '/home/mrkeaton/Documents/Datasets/stocknet-dataset/tweet/preprocessed', end_train, end_val)
def test_positional_encoding(self): pos_encoded_feat = positional_encoding(self.features) assert np.all(pos_encoded_feat[:, 0::2] == 1.0) assert np.all(pos_encoded_feat[:, 1::2] == 2.0)
#baseline = bool(int(args['baseline'])) model_name = args['model_name'] assert model_name in ['baseline', 'avg_we', 'transformer', 'cnn', 'text_only'] if model_name != 'baseline': if model_name == 'avg_we': tf.logging.info("Average Word Embeddings Model!") text_embeddings = tf.math.reduce_mean(embeds, axis=1, keepdims=False) elif model_name == 'transformer': tf.logging.info("Transformer Encoder Based Model!") key_masks = tf.expand_dims( tf.sign(tf.reduce_sum(tf.abs(embeds), axis=-1)), -1) embeds += utils.positional_encoding(text, T, N, num_units=hidden_units, zero_pad=False, scale=False, scope="enc_pe") embeds *= key_masks # Dropout embeds = tf.nn.dropout(embeds, keep_prob=dropout_keep_prob) enc = embeds # Blocks for i in range(conf.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): # Multihead Attention enc = utils.multihead_attention(queries=enc, keys=embeds, num_units=hidden_units,
def run(self, input_seq, output_seq, is_training = True): ''' Run a single sentence in transformer Args: intput_seq: the input vector of shape [1, <Num_words>] output_seq: the output value to the transformer network [1, <Num_words>] return_sequences: if True return the total array to the ''' # sanity checks if len(input_seq) != len(output_seq): raise ValueError('Length of input and output should be equal. Got in: {0}, out: {1}'.format(len(input_seq), len(output_seq))) # vars transformer_output = [] seqlen = len(input_seq[0]) # due to the shape of input # input_seq = None --> # get the input embeddings here # making the masking lookup table for this sequence masking_matrix = np.array([([1,] * (i+1)) + ([LOWEST_VAL,] * (seqlen - i - 1)) for i in range(seqlen)], dtype = np.float32) # if we need to make our own embedding embed_in = [] # input embeddings embed_out = [] # output embedding labels = [] # labels # get the embeddings if self.need_embedding: for i in range(len(input_seq[0])): # since the input is going to be [1, <NUM WORDS>] so we need to use only the first index val_in = input_seq[0][i] val_out = output_seq[0][i] # for each value get the embedding embed_val_in = self.sess.run(tf.nn.embedding_lookup(self.embedding_matrix, np.int32(val_in))) embed_val_out = self.sess.run(tf.nn.embedding_lookup(self.embedding_matrix, np.int32(val_out))) pos_enc_val = utils.positional_encoding(i, self.DIM_MODEL) # add the mulitplication of embeddings to amin list embed_in.append(embed_val_in * pos_enc_val) embed_out.append(embed_val_out * pos_enc_val) # labels zeros = np.zeros([1, self.VOCAB_SIZE]) zeros[0][val_out] = 1. labels.append(zeros) # replace the input_seq = np.array(embed_in) output_seq = np.array(embed_out) labels = np.array(labels) # variables to make if we are training if is_training: seq_loss = [] # run over the seqlen for i in range(seqlen): # make the feed_dict feed = {self.input_placeholder: input_seq, self.output_placeholder: output_seq, self.labels_placeholder: labels[i], self.generated_length: [i], self.masking_matrix: masking_matrix} # run the model for that input tt_o = self.sess.run(self.decoder_op, feed_dict = feed)[0] transformer_output.append(np.argmax(tt_o)) # if training is to be done if is_training: seq_curr_loss, _ = self.sess.run([self.loss, self.train_step], feed_dict = feed) seq_loss.append(seq_curr_loss) # return the sequential output of the model if is_training: return transformer_output, seq_loss # otherwise return transformer_output