Example #1
0
    def __init__(self,
                 num_layers,
                 D,
                 d_model,
                 num_heads,
                 dff,
                 maximum_position_encoding,
                 rate=0.1):
        super(TransformerDecoder, self).__init__()

        self.rate = rate
        self.d_model = d_model
        self.num_layers = num_layers
        # as in: https://arxiv.org/abs/1711.03905
        self.embedding = tf.keras.layers.Conv1D(d_model,
                                                kernel_size=1,
                                                activation='relu',
                                                input_shape=[D, d_model])
        self.pos_encoding = positional_encoding(maximum_position_encoding,
                                                d_model)
        self.dec_layers = [
            TransformerDecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        if self.rate > 0.0:
            self.dropout = tf.keras.layers.Dropout(self.rate)
Example #2
0
	def __init__(self, vocab_size, d_model):
		super(Embedding, self).__init__()
		self.vocab_size = vocab_size
		self.d_model = d_model

		self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
		self.pos_encoding = positional_encoding(vocab_size, d_model)
Example #3
0
 def __init__(self, vocab_size, embedding_matrix, config):
     super(DAM, self).__init__()
     self.batch_sz = config.batch_size
     self.enc_units = config.hidden_size
     self.encoder1 = Encoder(enc_units=config.hidden_size)
     self.encoder2 = Encoder(enc_units=config.hidden_size)
     self.embedding = tf.keras.layers.Embedding(
         vocab_size,
         config.emb_size,
         embeddings_initializer=keras.initializers.constant(
             embedding_matrix),
         trainable=True)
     self.pos_encoding = positional_encoding(config.max_utterance_len,
                                             config.hidden_size)
     self.match_self = SelfMatch(num_layer=config.num_layer)
     self.match_u_attend_r = AttentiveModule()
     self.match_r_attend_u = AttentiveModule()
     self.aggregat1 = Aggregation(filters=config.filter_size[0],
                                  kernel_size=(3, 3, 3),
                                  strides=(1, 1, 1),
                                  pool_kernel_size=(3, 3, 3),
                                  pool_strides=(3, 3, 3))
     self.aggregat2 = Aggregation(filters=config.filter_size[1],
                                  kernel_size=(3, 3, 3),
                                  strides=(1, 1, 1),
                                  pool_kernel_size=(3, 3, 3),
                                  pool_strides=(3, 3, 3))
     self.flatten = keras.layers.Flatten()
     self.output_prj = keras.layers.Dense(2)
Example #4
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 input_vocab_size,
                 maximum_position_encoding,
                 rate=0.1):
        """
        The constructor for Encoder class.

        Parameters:
           num_layers (int): number of stacked encoder layer
           d_model (int): token embedding size
           num_heads (int): the number of heads
           dff (int): the number of units of feed forward hidden layer
           input_vocab_size: vocabulary size
           maximum_position_encoding: maximum length of sequence
           rate (float):  dropout rate
        """
        super(Encoder, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding,
                                                self.d_model)

        self.enc_layers = [
            EncoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        self.dropout = tf.keras.layers.Dropout(rate)
 def __init__(self,
              model_dim: int,
              feed_forward_dimension: int,
              num_heads: list,
              maximum_position_encoding: int,
              dropout_rate: float,
              dense_blocks: int,
              conv_filters: int,
              conv_activation: str,
              conv_padding: str,
              conv_kernel: int,
              **kwargs):
     super(CrossAttentionBlocks, self).__init__(**kwargs)
     self.model_dim = model_dim
     self.pos_encoding_scalar = tf.Variable(1.)
     self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim)
     self.dropout = tf.keras.layers.Dropout(dropout_rate)
     self.CADB = [
         CrossAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads,
                                  dense_hidden_units=feed_forward_dimension, name=f'{self.name}_CADB_{i}')
         for i, n_heads in enumerate(num_heads[:dense_blocks])]
     self.CACB = [
         CrossAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads,
                                 name=f'{self.name}_CACB_{i}', conv_filters=conv_filters,
                                 conv_activation=conv_activation, conv_padding=conv_padding, kernel_size=conv_kernel)
         for i, n_heads in enumerate(num_heads[dense_blocks:])]
Example #6
0
    def _cluster_selection_component(self):
        """
        Method that creates the cluster selection UI components and their logical
        dependencies.
        """
        # creates a title UI component
        st.sidebar.title("Cluster information")

        # gets all available cluster strategies
        available_cluster_strategies = ClusterFactory.values_list()

        # creates a selection box UI component with the available_cluster_strategies
        # and stores the selected cluster strategy in a variable
        self.selected_cluster = st.sidebar.selectbox(
            'Select a cluster strategy', available_cluster_strategies)

        # creates the positional encoding checkbox
        self.positional_encoding_checkbox = st.sidebar.checkbox(
            "Use Positional Encoding")
        # creates the auto cluster checkbox
        self.auto_cluster_checkbox = st.sidebar.checkbox(
            "Automatic find the optimal number of clusters")

        # if auto cluster is ticked, just initialize it with an integer
        if self.auto_cluster_checkbox:
            self.num_cluster_slider = 2
        else:  # otherwise creates a slider UI component so the user can pick the number of clusters
            self.num_cluster_slider = st.sidebar.slider(
                "Select number of clusters", min_value=2, max_value=8)

        # gets the feature extractor object from the ExtractorFactory
        self.cluster = ClusterFactory.get(
            self.selected_cluster)(n=self.num_cluster_slider)

        # gets the features from the video object
        self.features = self.video.features()

        # if the positional encoding checkbox is ticked, preprocess the features
        # with the positional_encoding function
        if self.positional_encoding_checkbox:
            self.features = positional_encoding(self.features)

        # if auto cluster checkbox is ticked, use the auto method
        if self.auto_cluster_checkbox:
            self.cluster_labels = self.cluster.auto(self.features)
        else:  # otherwise the estimate method
            self.cluster_labels = self.cluster.estimate(self.features)
    def _sentence_order_speaker_feature(self, context_sentence_representation,
                                        speaker):

        position_value = tf.tile(tf.expand_dims(tf.range(tf.shape(speaker)[1]),
                                                axis=0),
                                 multiples=[tf.shape(speaker)[0], 1])

        context_sentence_position = positional_encoding(
            position_value,
            lookup_table=self.position_embeddings,
            num_units=self.hparams.position_embedding_dim,
            zero_pad=False,
            scale=False,
            scope="dialog_sentence_position")
        context_sentence_position = tf.cast(context_sentence_position,
                                            tf.float32)

        speaker_one_hot = tf.one_hot(speaker, 2)

        if self.pos_emb_bool and self.user_emb_bool:
            print("position and user")
            context_w_sentence_feature = \
                tf.concat(axis=-1, values=[context_sentence_representation, context_sentence_position, speaker_one_hot])
        elif self.pos_emb_bool and not self.user_emb_bool:
            print("only position")
            context_w_sentence_feature = \
                tf.concat(axis=-1, values=[context_sentence_representation, context_sentence_position])
        elif self.user_emb_bool and not self.pos_emb_bool:
            print("user")
            context_w_sentence_feature = \
                tf.concat(axis=-1, values=[context_sentence_representation, speaker_one_hot])

        projected_context_w_sentence_feature = tf.layers.dense(
            inputs=context_w_sentence_feature,
            units=self.hparams.embedding_dim +
            self.hparams.sentence_rnn_hidden_dim * 2,
            activation=None,
            kernel_initializer=tf.initializers.variance_scaling(
                scale=2.0, mode="fan_in", distribution="normal"),
            name="context_w_sentence_feature_projection")

        return projected_context_w_sentence_feature
Example #8
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 input_vocab_size,
                 rate=0.1):
        super(Encoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        self.pos_encoding = positional_encoding(input_vocab_size, self.d_model)

        self.enc_layers = [
            EncoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]

        self.dropout = tf.keras.layers.Dropout(rate)
Example #9
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 maximum_position_encoding,
                 rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding,
                                                d_model)

        self.dec_layers = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        self.dropout = tf.keras.layers.Dropout(rate)
Example #10
0
learning_rate = 1e-6
# Data parameters
step = 4
lag = 7
d_model = 7
# Date parameters
start_dt = datetime(2014, 1, 1)
end_train = datetime(2015, 7, 31)
end_val = datetime(2015, 9, 30)
end_dt = datetime(2016, 1, 1)
# Model parameters
model = TransformerClassifier(d_model, d_model, lag, 2048, 12, 0.5).to('cuda')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
criterion = nn.CrossEntropyLoss()
positional_encoder = torch.tensor(positional_encoding(lag, d_model)).to('cuda')


def preprocess_data():
    print('Beginning Preprocessing:')
    start_preprocessing = time()

    print('\t(1/5) Loading stock price data...')
    price_data_train, price_data_val, price_data_test = load_stock_price_data(
        '/home/mrkeaton/Documents/Datasets/stocknet-dataset/price/preprocessed',
        start_dt, end_train, end_val, end_dt)

    print('\t(2/5) Loading tweet data...')
    tweet_data_train, tweet_data_val, tweet_data_test = load_tweet_data(
        '/home/mrkeaton/Documents/Datasets/stocknet-dataset/tweet/preprocessed',
        end_train, end_val)
 def test_positional_encoding(self):
     pos_encoded_feat = positional_encoding(self.features)
     assert np.all(pos_encoded_feat[:, 0::2] == 1.0)
     assert np.all(pos_encoded_feat[:, 1::2] == 2.0)
Example #12
0
#baseline = bool(int(args['baseline']))

model_name = args['model_name']
assert model_name in ['baseline', 'avg_we', 'transformer', 'cnn', 'text_only']
if model_name != 'baseline':
    if model_name == 'avg_we':
        tf.logging.info("Average Word Embeddings Model!")
        text_embeddings = tf.math.reduce_mean(embeds, axis=1, keepdims=False)
    elif model_name == 'transformer':
        tf.logging.info("Transformer Encoder Based Model!")
        key_masks = tf.expand_dims(
            tf.sign(tf.reduce_sum(tf.abs(embeds), axis=-1)), -1)
        embeds += utils.positional_encoding(text,
                                            T,
                                            N,
                                            num_units=hidden_units,
                                            zero_pad=False,
                                            scale=False,
                                            scope="enc_pe")
        embeds *= key_masks

        # Dropout
        embeds = tf.nn.dropout(embeds, keep_prob=dropout_keep_prob)
        enc = embeds
        # Blocks
        for i in range(conf.num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i)):
                # Multihead Attention
                enc = utils.multihead_attention(queries=enc,
                                                keys=embeds,
                                                num_units=hidden_units,
	def run(self, input_seq, output_seq, is_training = True):
		'''
		Run a single sentence in transformer
		Args:
			intput_seq: the input vector of shape [1, <Num_words>]
			output_seq: the output value to the transformer network [1, <Num_words>]
			return_sequences: if True return the total array to the 
		'''
		# sanity checks
		if len(input_seq) != len(output_seq):
			raise ValueError('Length of input and output should be equal. Got in: {0}, out: {1}'.format(len(input_seq), len(output_seq)))

		# vars
		transformer_output = []
		seqlen = len(input_seq[0]) # due to the shape of input
		# input_seq = None --> # get the input embeddings here

		# making the masking lookup table for this sequence
		masking_matrix = np.array([([1,] * (i+1)) + ([LOWEST_VAL,] * (seqlen - i - 1)) for i in range(seqlen)], dtype = np.float32)

		# if we need to make our own embedding
		embed_in = [] # input embeddings
		embed_out = [] # output embedding
		labels = [] # labels

		# get the embeddings
		if self.need_embedding:
			for i in range(len(input_seq[0])):
				# since the input is going to be [1, <NUM WORDS>] so we need to use only the first index
				val_in = input_seq[0][i]
				val_out = output_seq[0][i]

				# for each value get the embedding
				embed_val_in = self.sess.run(tf.nn.embedding_lookup(self.embedding_matrix, np.int32(val_in)))
				embed_val_out = self.sess.run(tf.nn.embedding_lookup(self.embedding_matrix, np.int32(val_out)))
				pos_enc_val = utils.positional_encoding(i, self.DIM_MODEL)

				# add the mulitplication of embeddings to amin list
				embed_in.append(embed_val_in * pos_enc_val)
				embed_out.append(embed_val_out * pos_enc_val)

				# labels
				zeros = np.zeros([1, self.VOCAB_SIZE])
				zeros[0][val_out] = 1.
				labels.append(zeros)

			# replace the 
			input_seq = np.array(embed_in)
			output_seq = np.array(embed_out)
			labels = np.array(labels)

		# variables to make if we are training
		if is_training:
			seq_loss = []

		# run over the seqlen
		for i in range(seqlen):
			# make the feed_dict
			feed = {self.input_placeholder: input_seq,
				self.output_placeholder: output_seq,
				self.labels_placeholder: labels[i],
				self.generated_length: [i],
				self.masking_matrix: masking_matrix}
			
			# run the model for that input
			tt_o = self.sess.run(self.decoder_op, feed_dict = feed)[0]
			transformer_output.append(np.argmax(tt_o))

			# if training is to be done
			if is_training:
				seq_curr_loss, _ = self.sess.run([self.loss, self.train_step], feed_dict = feed)
				seq_loss.append(seq_curr_loss)

		# return the sequential output of the model
		if is_training:
			return transformer_output, seq_loss

		# otherwise
		return transformer_output