Esempio n. 1
0
    def _create_attention_module(self):
        print('[launch-multi] create attention module')
        # project audio dimension_size to text dimension_size
        self.attnM = tf.Variable(tf.random_uniform([
            self.model_audio.final_encoder_dimension,
            self.model_text.final_encoder_dimension
        ],
                                                   minval=-0.25,
                                                   maxval=0.25,
                                                   dtype=tf.float32,
                                                   seed=None),
                                 trainable=True,
                                 name="attn_projection_helper")

        self.attnb = tf.Variable(tf.zeros([1], dtype=tf.float32),
                                 trainable=True,
                                 name="attn_bias")

        self.attn_audio_final_encoder = tf.matmul(
            self.model_audio.final_encoder, self.attnM) + self.attnb

        self.final_encoder, self.tmp_norm = luong_attention(
            batch_size=self.batch_size,
            target=self.model_text.outputs_en,
            condition=self.attn_audio_final_encoder,
            batch_seq=self.encoder_seq_text,
            max_len=self.model_text.encoder_size,
            hidden_dim=self.model_text.final_encoder_dimension)
Esempio n. 2
0
    def _add_attn(self):

        from model_luong_attention import luong_attention
        print('[launch-video] apply Attention')

        with tf.name_scope('video_Attn') as scope:

            # attention memory
            self.attnM = tf.Variable(tf.random.uniform(
                [self.final_encoder_dimension],
                minval=-0.25,
                maxval=0.25,
                dtype=tf.float32,
                seed=None),
                                     trainable=True,
                                     name="attn_memory")

            self.attnB = tf.Variable(tf.zeros([self.final_encoder_dimension],
                                              dtype=tf.float32),
                                     name="attn_bias")

            # multiply attn memoery as many as batch_size ( use same attn memory )
            self.batch_attnM = tf.ones([self.batch_size, 1
                                        ]) * self.attnM + self.attnB

            self.final_encoder, self.attn_norm = luong_attention(
                batch_size=self.batch_size,
                target=self.outputs,
                condition=self.batch_attnM,
                batch_seq=self.encoder_seq,
                max_len=self.encoder_size,
                hidden_dim=self.final_encoder_dimension)
    def _create_attention_layers_type_3(self, name):
        print(
            '[launch-multi-attn] create an attention layer: A2(1)+V2(3) --> T(2)'
        )

        with tf.name_scope('attention_layer_' + str(name)) as scope:

            # for audio case
            # audio_outputs [ batch, encoder_size, hidden_dim ]   - pick this!
            # final_encoder_dimension = hidden_dim + prosody_dim  - Not
            attnM = tf.Variable(tf.random.uniform([
                self.modality_1_final_encoder_dimension +
                self.modality_3_final_encoder_dimension,
                self.modality_2_final_encoder_dimension
            ],
                                                  minval=-0.25,
                                                  maxval=0.25,
                                                  dtype=tf.float32,
                                                  seed=None),
                                trainable=True,
                                name="attn_projection_helper")

            attnb = tf.Variable(tf.zeros(
                [self.modality_2_final_encoder_dimension], dtype=tf.float32),
                                trainable=True,
                                name="attn_bias")

            query_prj = tf.matmul(
                tf.concat([
                    self.modality_1_final_encoder,
                    self.modality_3_final_encoder
                ],
                          axis=1), attnM) + attnb

            target = self.modality_2_outputs
            condition = query_prj
            batch_seq = self.modality_2_encoder_seq
            max_len = self.modality_2_encoder_size
            hidden_dim = self.modality_2_final_encoder_dimension

            self.modality_2_final_encoder, self.attn_norm_3hop = luong_attention(
                batch_size=self.batch_size,
                target=target,
                condition=condition,
                batch_seq=batch_seq,
                max_len=max_len,
                hidden_dim=hidden_dim)

            # apply weighted sum result to final_encoder
            attn_vector_concat = tf.concat([
                self.modality_1_final_encoder, self.modality_2_final_encoder,
                self.modality_3_final_encoder
            ],
                                           axis=1)

            # set fianl information
            self.final_encoder = attn_vector_concat
            self.final_encoder_dimension = self.modality_1_final_encoder_dimension + self.modality_2_final_encoder_dimension + self.modality_3_final_encoder_dimension
Esempio n. 4
0
    def _create_attention_layers(self):
        print '[launch] create attention layer'
        from model_luong_attention import luong_attention
        with tf.name_scope('attention_layer') as scope:

            self.final_encoder, self.attn_norm = luong_attention(
                batch_size=self.batch_size,
                target=self.outputs_con,
                condition=self.final_encoderR,
                target_encoder_length=self.context_size,
                hidden_dim=self.final_encoder_dimension)