Beispiel #1
0
    def _att(self, a, b, keep_prob, scope, q_len=None):
        """ b attends to a """
        with tf.variable_scope(scope):
            # kernel trick here?
            with tf.variable_scope("ah"):
                ah = lib.conv(
                    a, [1, 1, a.get_shape().as_list()[-1], g_channel],
                    w_fn=lib.xavieru_conv_w,
                    b_fn=lib.zero_b,
                    act_fn=g_act)
                ah = tf.nn.dropout(ah, keep_prob,
                                   [self.batch_size, 1, 1, g_channel])

            with tf.variable_scope("bh"):
                bh = lib.conv(
                    b, [1, 1, b.get_shape().as_list()[-1], g_channel],
                    w_fn=lib.xavieru_conv_w,
                    b_fn=lib.zero_b,
                    act_fn=g_act)
                bh = tf.nn.dropout(bh, keep_prob,
                                   [self.batch_size, 1, 1, g_channel])

            with tf.variable_scope("alpha"):
                alpha = ah * bh
                with tf.variable_scope("conv1x1"):
                    alpha = lib.conv(alpha, [1, 1, g_channel, 1],
                                     w_fn=lib.xavieru_conv_w,
                                     b_fn=None,
                                     act_fn=g_linear)

            if q_len is not None:
                with tf.variable_scope("separate_softmax_attention"):
                    attended_list = []
                    alpha_list = tf.unstack(alpha)
                    a_list = tf.unstack(a)
                    for i in xrange(self.batch_size):
                        sliced_alpha = alpha[i][:q_len[i], :, :]
                        sliced_alpha = lib.softmax(sliced_alpha, 0)

                        sliced_a = a[i][:q_len[i], :, :]
                        with tf.variable_scope("attended"):
                            attended_a = tf.reduce_sum(sliced_a * sliced_alpha,
                                                       0, True)
                        attended_list.append(attended_a)
                    attended_a = tf.stack(attended_list, 0)
            else:
                with tf.variable_scope("softmax"):
                    alpha = lib.softmax(alpha, 1)

                with tf.variable_scope("attended"):
                    attended_a = tf.reduce_sum(a * alpha, 1, True)

        return attended_a
Beispiel #2
0
    def forward_graph(self,
                      qid_input=None,
                      v_input=None,
                      q_input=None,
                      q_len_input=None):
        with tf.variable_scope(self.var_scope_str):
            if qid_input is not None:
                qid = qid_input
            else:
                qid = tf.placeholder(tf.int32, [self.batch_size, 1])

            if v_input is not None:
                v_ph = tf.reshape(v_input, [self.batch_size, 14, 14, 2048])
            else:
                v_ph = tf.placeholder(tf.float32,
                                      [self.batch_size, 14, 14, 2048], "v_ph")

            if q_input is not None:
                q_ph = q_input
            else:
                q_ph = tf.placeholder(tf.int32, [self.batch_size, None],
                                      "q_ph")

            if q_len_input is not None:
                q_len = tf.reshape(q_len_input, [self.batch_size])
            else:
                q_len = tf.placeholder(tf.int32, [self.batch_size])

            keep_prob_ph = tf.placeholder(tf.float32, [], "keep_prob_ph")

            v_model = Visual("visual", self.batch_size)
            v = v_model(v_ph, keep_prob_ph)

            embed = Embed("q_embed", self.q_vocab_size, self.embed_size,
                          self.batch_size)
            q_0 = embed(q_ph)

            q_model = Question("question", self.batch_size)
            q = q_model(q_0, keep_prob_ph, q_len)

            att_model_1 = Attention("attention_1", self.batch_size)
            att_1 = att_model_1(v, q, keep_prob_ph, q_len)

            ans_model = Answer("answer", self.batch_size)
            logit = ans_model(att_1, self.a_vocab_size, keep_prob_ph)
            pred_idx = tf.argmax(logit, 1)
            softmax_logit = lib.softmax(logit)

            #v_model.summarize_tensor()
            #embed.summarize_embed()
            #q_model.summarize_tensor()
            att_model_1.summarize_tensor()

            #ans_model.summarize_tensor()

            regu_vars = []
            regu_vars += [w for w in embed.weights]
            regu_vars += [w for w in v_model.weights]
            regu_vars += [w for w in q_model.weights]
            regu_vars += [w for w in att_model_1.weights]
            regu_vars += [w for w in ans_model.weights]
            regu_vars = [
                w for w in regu_vars if "bias" not in w.name
                and "gamma" not in w.name and "beta" not in w.name
            ]

            self.embed_model = embed
            self.q_model = q_model
            self.v_model = v_model
            self.att_model = att_model_1
            self.ans_model = ans_model

            self.regu_vars = regu_vars
            self.qid = qid
            self.v_ph = v_ph
            self.q_ph = q_ph
            self.q_len = q_len
            self.logit = logit
            self.pred_idx = pred_idx
            self.softmax_logit = softmax_logit
            self.keep_prob_ph = keep_prob_ph
Beispiel #3
0
    def co_att(self, v, q, q_len, dim, var_scope_str):

        # v (batch_size, 196, 1, 2048)
        # q (batch_size, 27, 1, 256)
        v_dim = v.get_shape().as_list()[-1]
        q_dim = q.get_shape().as_list()[-1]

        with tf.variable_scope(var_scope_str):
            with tf.variable_scope("C"):
                b_c_q = lib.zero_b("bias_q", [q_dim])
                b_c_v = lib.zero_b("bias_v", [v_dim])
                with tf.variable_scope("QW"):
                    QW = lib.conv(q + b_c_q, [1, 1, q_dim, v_dim],
                                  b_fn=None,
                                  act_fn=g_linear)
                # (b, 27, 1, 2048)
                QW = tf.squeeze(QW, 2)
                # (b, 27, 2048)
                V = tf.squeeze(v + b_c_v)
                # (b, 196, 2048)
                QWV = tf.matmul(QW, V, transpose_b=True)
                # (b, 27, 196)
                C = g_act(QWV)
                # (b, 27, 196)

            with tf.variable_scope("WQ"):
                WQ = lib.conv(q, [1, 1, q_dim, dim],
                              b_fn=lib.zero_b,
                              act_fn=g_linear)
                # (b, 27, 1, dim)
                WQ = tf.transpose(tf.squeeze(WQ, 2), [0, 2, 1])
                # (b, dim, 27)

            with tf.variable_scope("WV"):
                WV = lib.conv(v, [1, 1, v_dim, dim],
                              b_fn=lib.zero_b,
                              act_fn=g_linear)
                # (b, 196, 1, dim)
                WV = tf.transpose(tf.squeeze(WV, 2), [0, 2, 1])
                # (b, dim, 196)

            with tf.variable_scope("H_v"):
                WQC = tf.matmul(WQ, C)
                # (b, dim, 196)
                WV_WQC = tf.add(WV, WQC)
                # (b, dim, 196)
                H_v = g_act(WV_WQC)
                # (b, dim, 196)

            with tf.variable_scope("v_attention"):
                H_v = tf.expand_dims(tf.transpose(H_v, [0, 2, 1]), 2)
                # (b, 196, 1, dim)
                wH_v = lib.conv(H_v, [1, 1, dim, 1], act_fn=g_linear)
                wH_v = tf.squeeze(wH_v, [2, 3])
                # (b, 196)
                a_v = lib.softmax(wH_v)
                # (b, 196)

                with tf.variable_scope("attented_v"):
                    v_hat = tf.multiply(tf.squeeze(v, 2),
                                        tf.expand_dims(a_v, 2))
                    # (b, 196, v_dim)
                    v_hat = tf.reduce_sum(v_hat, 1)
                    # (b, v_dim)

            with tf.variable_scope("H_q"):
                WVC = tf.matmul(WV, C, transpose_b=True)
                # (b, dim, 27)
                WQ_WVC = tf.add(WQ, WVC)
                # (b, dim, 27)
                H_q = g_act(WQ_WVC)
                # (b, dim, 27)

            with tf.variable_scope("q_attention"):
                H_q = tf.expand_dims(tf.transpose(H_q, [0, 2, 1]), 2)
                # (b, 27, 1, dim)
                wH_q = lib.conv(H_q, [1, 1, dim, 1], act_fn=g_linear)
                # (b, 27, 1, 1)
                wH_q = tf.squeeze(wH_q, [2, 3])
                # (b, 27)
                with tf.variable_scope("split_softmax_attention_concat"):
                    q_hat = []
                    for i in xrange(self.batch_size):
                        sliced_q = q[i, :q_len[i], :]
                        # (q_len[i], q_dim)

                        sliced_wH_q = lib.softmax(wH_q[i, :q_len[i]])
                        # (q_len[i])

                        attention = tf.multiply(tf.squeeze(sliced_q, 1),
                                                tf.expand_dims(sliced_wH_q, 1))
                        # (q_len[i], q_dim)
                        attention = tf.reduce_sum(attention, 0)
                        # (q_dim)
                        q_hat.append(attention)
                    q_hat = tf.stack(q_hat, 0)
                    # (b, q_dim)

            return v_hat, q_hat