Esempio n. 1
0
    def __init__(self, label, image_size, channel_num, kernel_num, z_size,
                 device):
        super().__init__()
        self.model_name = "ae_vine"
        self.label = label
        self.image_size = image_size
        self.channel_num = channel_num
        self.kernel_num = kernel_num
        self.z_size = z_size
        self.device = device
        self.vine = None

        # encoder
        self.encoder = nn.Sequential(
            _conv(channel_num, kernel_num // 4),
            _conv(kernel_num // 4, kernel_num // 2),
            _conv(kernel_num // 2, kernel_num),
        )

        # encoded feature's size and volume
        self.feature_size = image_size // 8
        self.feature_volume = kernel_num * (self.feature_size**2)

        # decoder
        self.decoder = nn.Sequential(_deconv(kernel_num, kernel_num // 2),
                                     _deconv(kernel_num // 2, kernel_num // 4),
                                     _deconv(kernel_num // 4, channel_num),
                                     nn.Sigmoid())

        # projection
        self.project = _linear(z_size, self.feature_volume, relu=False)
        self.q_layer = _linear(self.feature_volume, z_size, relu=False)
Esempio n. 2
0
    def __init__(self, label, image_size, channel_num, kernel_num, z_size, device):
        # configurations
        super().__init__()
        self.model_name = "cvae"
        self.label = label
        self.image_size = image_size
        self.channel_num = channel_num
        self.kernel_num = kernel_num
        self.z_size = z_size
        self.device = device

        # encoder
        self.encoder = nn.Sequential(
            _conv(channel_num, kernel_num // 4),
            _conv(kernel_num // 4, kernel_num // 2),
            _conv(kernel_num // 2, kernel_num),
        )

        # encoded feature's size and volume
        self.feature_size = image_size // 8
        self.feature_volume = kernel_num * (self.feature_size ** 2)

        # q
        self.q_mean = _linear(self.feature_volume, z_size, relu=False)
        self.q_logvar = _linear(self.feature_volume, z_size, relu=False)
        n = int(self.z_size * (self.z_size - 1) / 2)
        self.q_atanhcor = _linear(self.feature_volume, n, relu=False)

        # projection
        self.project = _linear(z_size, self.feature_volume, relu=False)

        # decoder
        self.decoder = nn.Sequential(
            _deconv(kernel_num, kernel_num // 2),
            _deconv(kernel_num // 2, kernel_num // 4),
            _deconv(kernel_num // 4, channel_num),
            nn.Sigmoid()
        )
Esempio n. 3
0
    def forward(self):
        config = self.config
        N, PL, QL, CL, d, dc, nh, dw = config.test_batch_size if self.loop_function else config.batch_size, self.c_maxlen, self.q_maxlen, \
                                config.char_limit, config.hidden, config.char_dim, config.num_heads, config.glove_dim

        with tf.variable_scope("Input_Embedding_Layer"):
            ch_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.ch),
                                [N * PL, CL, dc])
            qh_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.qh),
                                [N * QL, CL, dc])
            ch_emb = tf.nn.dropout(ch_emb, 1.0 - 0.5 * self.dropout)
            qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout)

            # Bidaf style conv-highway encoder
            ch_emb = conv(ch_emb,
                          d,
                          bias=True,
                          activation=tf.nn.relu,
                          kernel_size=5,
                          name="char_conv",
                          reuse=None)
            qh_emb = conv(qh_emb,
                          d,
                          bias=True,
                          activation=tf.nn.relu,
                          kernel_size=5,
                          name="char_conv",
                          reuse=True)

            ch_emb = tf.reduce_max(ch_emb, axis=1)
            qh_emb = tf.reduce_max(qh_emb, axis=1)

            ch_emb = tf.reshape(ch_emb, [N, PL, ch_emb.shape[-1]])
            qh_emb = tf.reshape(qh_emb, [N, QL, ch_emb.shape[-1]])

            c_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.c),
                1.0 - self.dropout)
            q_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.q),
                1.0 - self.dropout)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            c_emb = highway(c_emb,
                            size=d,
                            scope="highway",
                            dropout=self.dropout,
                            reuse=None)
            q_emb = highway(q_emb,
                            size=d,
                            scope="highway",
                            dropout=self.dropout,
                            reuse=True)

        with tf.variable_scope("Embedding_Encoder_Layer"):
            c = residual_block(c_emb,
                               num_blocks=1,
                               num_conv_layers=2,
                               kernel_size=7,
                               mask=self.c_mask,
                               num_filters=d,
                               num_heads=nh,
                               seq_len=self.c_len,
                               scope="Encoder_Residual_Block",
                               bias=False,
                               dropout=self.dropout)
            q = residual_block(
                q_emb,
                num_blocks=1,
                num_conv_layers=2,
                kernel_size=7,
                mask=self.q_mask,
                num_filters=d,
                num_heads=nh,
                seq_len=self.q_len,
                scope="Encoder_Residual_Block",
                reuse=True,  # Share the weights between passage and question
                bias=False,
                dropout=self.dropout)

        with tf.variable_scope("Context_to_Query_Attention_Layer"):
            # C = tf.tile(tf.expand_dims(c,2),[1,1,self.q_maxlen,1])
            # Q = tf.tile(tf.expand_dims(q,1),[1,self.c_maxlen,1,1])
            # S = trilinear([C, Q, C*Q], input_keep_prob = 1.0 - self.dropout)
            S = optimized_trilinear_for_attention([c, q],
                                                  self.c_maxlen,
                                                  self.q_maxlen,
                                                  input_keep_prob=1.0 -
                                                  self.dropout)
            mask_q = tf.expand_dims(self.q_mask, 1)
            S_ = tf.nn.softmax(mask_logits(S, mask=mask_q))
            mask_c = tf.expand_dims(self.c_mask, 2)
            S_T = tf.transpose(
                tf.nn.softmax(mask_logits(S, mask=mask_c), dim=1), (0, 2, 1))
            self.c2q = tf.matmul(S_, q)
            self.q2c = tf.matmul(tf.matmul(S_, S_T), c)
            attention_outputs = [c, self.c2q, c * self.c2q, c * self.q2c]

        with tf.variable_scope("Model_Encoder_Layer"):
            inputs = tf.concat(attention_outputs, axis=-1)
            self.enc = [conv(inputs, d, name="input_projection")]
            for i in range(3):
                if i % 2 == 0:  # dropout every 2 blocks
                    self.enc[i] = tf.nn.dropout(self.enc[i],
                                                1.0 - self.dropout)
                self.enc.append(
                    residual_block(self.enc[i],
                                   num_blocks=2,
                                   num_conv_layers=2,
                                   kernel_size=5,
                                   mask=self.c_mask,
                                   num_filters=d,
                                   num_heads=nh,
                                   seq_len=self.c_len,
                                   scope="Model_Encoder",
                                   bias=False,
                                   reuse=True if i > 0 else None,
                                   dropout=self.dropout))

        with tf.variable_scope("Decoder_Layer"):
            memory = tf.concat([self.enc[1], self.enc[2], self.enc[3]],
                               axis=-1)
            oups = tf.split(self.a, [1] * self.a_maxlen, 1)
            h = tf.tanh(
                _linear(tf.reduce_mean(memory, axis=1),
                        output_size=d,
                        bias=False,
                        scope="h_initial"))
            c = tf.tanh(
                _linear(tf.reduce_mean(memory, axis=1),
                        output_size=d,
                        bias=False,
                        scope="c_initial"))
            state = (c, h)
            outputs = []
            prev = None
            prev_probs = [0.0]
            symbols = []
            for i, inp in enumerate(oups):
                einp = tf.reshape(tf.nn.embedding_lookup(self.word_mat, inp),
                                  [N, dw])
                if i > 0:
                    tf.get_variable_scope().reuse_variables()

                if self.loop_function is not None and prev is not None:
                    with tf.variable_scope("loop_function", reuse=True):
                        einp, prev_probs, index, prev_symbol = self.loop_function(
                            prev, prev_probs, self.beam_size, i)
                        h = tf.gather(h, index)  # update prev state
                        state = tuple(tf.gather(s, index)
                                      for s in state)  # update prev state
                        for j, symbol in enumerate(symbols):
                            symbols[j] = tf.gather(
                                symbol, index)  # update prev symbols
                        for j, output in enumerate(outputs):
                            outputs[j] = tf.gather(
                                output, index)  # update prev outputs
                        symbols.append(prev_symbol)

                attn = tf.reshape(
                    multihead_attention(tf.expand_dims(h, 1),
                                        units=d,
                                        num_heads=nh,
                                        memory=memory,
                                        mask=self.c_mask,
                                        bias=False), [-1, nh * d])

                cinp = tf.concat([einp, attn], 1)
                h, state = self.cell(cinp, state)

                with tf.variable_scope("AttnOutputProjection"):
                    output = _linear([h] + [cinp],
                                     output_size=dw * 2,
                                     bias=False,
                                     scope="output")
                    output = tf.reshape(output, [-1, dw, 2])
                    output = tf.reduce_max(output, 2)  # maxout
                    outputs.append(output)

                if self.loop_function is not None:
                    prev = output

            if self.loop_function is not None:
                # process the last symbol
                einp, prev_probs, index, prev_symbol = self.loop_function(
                    prev, prev_probs, self.beam_size, i + 1)
                for j, symbol in enumerate(symbols):
                    symbols[j] = tf.gather(symbol,
                                           index)  # update prev symbols
                for j, output in enumerate(outputs):
                    outputs[j] = tf.gather(output,
                                           index)  # update prev outputs
                symbols.append(prev_symbol)

                # output the final best result of beam search
                for k, symbol in enumerate(symbols):
                    symbols[k] = tf.gather(symbol, 0)
                for k, output in enumerate(outputs):
                    outputs[k] = tf.expand_dims(tf.gather(output, 0), 0)

            self.gen_loss = self._compute_loss(outputs, oups, N)
            self.symbols = symbols

        with tf.variable_scope("Output_Layer"):
            start_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[2]], axis=-1),
                     1,
                     bias=False,
                     name="start_pointer"), -1)
            end_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[3]], axis=-1),
                     1,
                     bias=False,
                     name="end_pointer"), -1)
            self.logits = [
                mask_logits(start_logits, mask=self.c_mask),
                mask_logits(end_logits, mask=self.c_mask)
            ]

            logits1, logits2 = [l for l in self.logits]

            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, config.ans_limit)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)

        self.loss = self.gen_loss

        if config.l2_norm is not None:
            variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer, variables)
            self.loss += l2_loss

        if config.decay is not None:
            self.var_ema = tf.train.ExponentialMovingAverage(config.decay)
            ema_op = self.var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.loss = tf.identity(self.loss)

                self.assign_vars = []
                for var in tf.global_variables():
                    v = self.var_ema.average(var)
                    if v:
                        self.assign_vars.append(tf.assign(var, v))