Beispiel #1
0
def _ffn_layer(inputs, hidden_size, output_size, keep_prob=None,
               data_format="NHWC", dtype=None, scope=None):
    with tf.variable_scope(scope, default_name="ffn_layer", values=[inputs],
                           dtype=dtype):
        with tf.variable_scope("input_layer"):
            hidden = linear(inputs, hidden_size, True, data_format=data_format)
            hidden = tf.nn.relu(hidden)

        if keep_prob and keep_prob < 1.0:
            hidden = tf.nn.dropout(hidden, keep_prob)

        with tf.variable_scope("output_layer"):
            output = linear(hidden, output_size, True, data_format=data_format)

        return output
Beispiel #2
0
    def inference(self, input_):
        conv1 = layers.conv2d_same(input_, self.num_kernel, name='conv1')

        res_block1 = self.res_block(conv1,
                                    self.num_kernel * 2,
                                    is_downsizing=False,
                                    name='res_block1')
        res_block2 = self.res_block(res_block1,
                                    self.num_kernel * 4,
                                    is_downsizing=True,
                                    name='res_block2')
        res_block3 = self.res_block(res_block2,
                                    self.num_kernel * 8,
                                    is_downsizing=True,
                                    name='res_block3')

        act = self.res_act(res_block3)
        pool = layers.avg_pool(act,
                               k_h=self.pool_kernel,
                               k_w=self.pool_kernel,
                               d_h=1,
                               d_w=1,
                               name='pool')
        flat = layers.flatten(pool, 'flat')

        linear = layers.linear(flat, self.num_class, name='linear')

        return linear
Beispiel #3
0
    def inference(self, input_):
        conv1 = layers.conv2d_same_repeat(input_,
                                          self.kernel_num,
                                          num_repeat=2,
                                          name="down1")
        pool1 = layers.max_pool(conv1, name="pool1")

        conv2 = layers.conv2d_same_repeat(pool1,
                                          self.kernel_num * 2,
                                          num_repeat=2,
                                          name="down2")
        pool2 = layers.max_pool(conv2, name="pool2")

        conv3 = layers.conv2d_same_repeat(pool2,
                                          self.kernel_num * 4,
                                          num_repeat=3,
                                          name="down3")
        pool3 = layers.max_pool(conv3, name="pool3")

        conv4 = layers.conv2d_same_repeat(pool3,
                                          self.kernel_num * 8,
                                          num_repeat=3,
                                          name="down4")
        pool4 = layers.max_pool(conv4, name="pool4")

        conv5 = layers.conv2d_same_repeat(pool4,
                                          self.kernel_num * 8,
                                          num_repeat=3,
                                          name="down5")
        pool5 = layers.max_pool(conv5, name="pool5")

        flat = layers.flatten(pool5, 'flat')

        linear = layers.linear(flat,
                               flat.get_shape().as_list()[-1],
                               name='linear')

        logits = layers.linear(linear, self.num_class, name='logits')

        return logits
Beispiel #4
0
    def inference(self, input_, reuse=False):
        with tf.variable_scope('ResNet') as scope:
            if reuse:
                scope.reuse_variables()

            conv1 = layers.conv2d_same_act(input_, self.num_kernel, k_h=7, k_w=7, d_h=2, d_w=2,
                                           activation_fn=self.act_fn, name='conv_1')

            pool1 = layers.max_pool(conv1, k_h=self.pool_kernel, k_w=self.pool_kernel,
                                    padding='SAME', name='pool1')

            layer_blocks = self.layer_repeat(pool1, self.layer_def, name='layers')

            pool2 = layers.global_avg_pool(layer_blocks, name='pool2')

            flat = layers.flatten(pool2, 'flat')

            linear = layers.linear(flat, self.num_class, name='linear')

            logit = tf.sigmoid(linear, name='logit')

            return logit
Beispiel #5
0
def deepatt_model(features, mode, params):
    hparams = params
    params = copy.copy(hparams)

    # disable dropout in evaluation/inference mode
    if mode != tf.contrib.learn.ModeKeys.TRAIN:
        params.attention_dropout = 0.0
        params.residual_dropout = 0.0
        params.relu_dropout = 0.0

    vocab_size = len(params.vocabulary["inputs"])
    label_size = len(params.vocabulary["targets"])
    hidden_size = params.hidden_size
    feature_size = params.feature_size

    tok_seq = features["inputs"]
    pred_seq = features["preds"]
    mask = tf.to_float(tf.not_equal(tok_seq, 0))

    # shared embedding and softmax weights
    initializer = None

    if mode == tf.contrib.learn.ModeKeys.TRAIN:
        if not params.use_global_initializer:
            initializer = tf.random_normal_initializer(0.0,
                                                       feature_size ** -0.5)

    weights = tf.get_variable("weights", [2, feature_size],
                              initializer=initializer)

    if mode == tf.contrib.learn.ModeKeys.TRAIN:
        if params.embedding is not None:
            initializer = lambda shape, dtype, partition_info: params.embedding
    else:
        initializer = None

    embedding = tf.get_variable("embedding", [vocab_size, feature_size],
                                initializer=initializer,
                                trainable=not params.fix_embedding)
    bias = tf.get_variable("bias", [hidden_size])

    # id => embedding
    # src_seq: [batch, max_src_length]
    # tgt_seq: [batch, max_tgt_length]
    inputs = tf.gather(embedding, tok_seq)

    if mode == tf.contrib.learn.ModeKeys.INFER:
        if features.get("mask") is not None:
            keep_mask = features["mask"][:, :, None]
            unk_emb = features["embedding"]
            inputs = inputs * keep_mask + (1.0 - keep_mask) * unk_emb

    preds = tf.gather(weights, pred_seq)
    inputs = tf.concat([inputs, preds], -1)

    if params.multiply_embedding_mode == "sqrt_depth":
        inputs = inputs * (hidden_size ** 0.5)

    inputs = inputs * tf.expand_dims(mask, -1)

    # preparing encoder & decoder input
    encoder_input = tf.nn.bias_add(inputs, bias)

    if params.pos == "timing":
        encoder_input = ops.attention.add_timing_signal(encoder_input)
    elif params.pos == "embedding":
        initializer = tf.random_normal_initializer(0.0, hidden_size ** -0.5)
        embedding = tf.get_variable("position_embedding", [1000, hidden_size],
                                    initializer=initializer)
        indices = tf.range(tf.shape(features["inputs"])[1])[None, :]
        pos_emb = tf.gather(embedding, indices)
        pos_emb = tf.tile(pos_emb, [tf.shape(features["inputs"])[0], 1, 1])
        encoder_input = encoder_input + pos_emb

    if params.residual_dropout:
        keep_prob = 1.0 - params.residual_dropout
        encoder_input = tf.nn.dropout(encoder_input, keep_prob)

    encoder_output = encoder(encoder_input, mask, params)

    initializer = None

    if mode == tf.contrib.learn.ModeKeys.TRAIN:
        if not params.use_global_initializer:
            initializer = tf.random_normal_initializer(0.0,
                                                       hidden_size ** -0.5)

    with tf.variable_scope("prediction", initializer=initializer):
        logits = linear(encoder_output, label_size, True, scope="logits")

    if mode == tf.contrib.learn.ModeKeys.INFER:
        outputs = tf.to_int32(tf.argmax(logits, axis=-1))
        return outputs, tf.nn.softmax(logits)

    labels = features["targets"]
    targets = features["targets"]
    logits = tf.reshape(logits, [-1, label_size])
    labels = tf.reshape(labels, [-1])

    # label smoothing
    ce = ops.layers.smoothed_softmax_cross_entropy_with_logits(
        logits=logits,
        labels=labels,
        label_smoothing=params.label_smoothing,
        normalize=True
    )

    ce = tf.reshape(ce, tf.shape(targets))
    cost = tf.reduce_sum(ce * mask) / tf.reduce_sum(mask)

    # greedy decoding
    if mode == tf.contrib.learn.ModeKeys.EVAL:
        outputs = tf.to_int32(tf.argmax(logits, axis=-1))
        return cost, tf.reshape(outputs, tf.shape(targets))

    return cost
Beispiel #6
0
    def _inference(self, input_):
        conv1 = layers.conv2d_same_act(input_,
                                       16,
                                       activation_fn=self.activation_fn,
                                       name='conv1')
        skip1 = layers.bottleneck_layer(conv1, 32, name='skip1')
        _, conv2 = layers.conv2d_same_repeat(conv1,
                                             32,
                                             num_repeat=2,
                                             activation_fn=self.activation_fn,
                                             with_logit=True,
                                             name='conv2')

        res1 = tf.add(skip1, conv2, name='res1')
        res_act1 = self.res_act(res1)

        _, conv3 = layers.conv2d_same_repeat(res_act1,
                                             32,
                                             num_repeat=2,
                                             activation_fn=self.activation_fn,
                                             with_logit=True,
                                             name='conv3')

        res2 = tf.add(conv3, res1, name='res2')
        res_act2 = self.res_act(res2)

        skip2 = layers.bottleneck_layer(res_act2,
                                        64,
                                        d_h=2,
                                        d_w=2,
                                        name='skip2')
        conv4 = layers.conv2d_same_act(res_act2,
                                       64,
                                       d_h=2,
                                       d_w=2,
                                       activation_fn=self.activation_fn,
                                       name='conv4')
        conv5 = layers.conv2d_same(conv4, 64, name='conv5')

        res3 = tf.add(skip2, conv5, name='res3')
        res_act3 = self.res_act(res3)

        _, conv6 = layers.conv2d_same_repeat(res_act1,
                                             64,
                                             num_repeat=2,
                                             activation_fn=self.activation_fn,
                                             with_logit=True,
                                             name='conv3')

        res4 = tf.add(res3, conv6, name='res4')
        res_act4 = self.res_act(res4)

        skip3 = layers.bottleneck_layer(res_act4,
                                        128,
                                        d_h=2,
                                        d_w=2,
                                        name='skip3')
        conv7 = layers.conv2d_same_act(res_act4,
                                       128,
                                       d_h=2,
                                       d_w=2,
                                       activation_fn=self.activation_fn,
                                       name='conv7')
        conv8 = layers.conv2d_same(conv7, 128, name='conv8')

        res5 = tf.add(skip3, conv8, name='res5')

        res_act5 = self.res_act(res5)
        _, conv9 = layers.conv2d_same_repeat(res_act5,
                                             128,
                                             num_repeat=2,
                                             activation_fn=self.activation_fn,
                                             with_logit=True,
                                             name='conv9')

        res6 = tf.add(res5, conv9, name='res6')
        res_act6 = self.res_act(res6)

        pool = layers.avg_pool(res_act6,
                               k_h=8,
                               k_w=8,
                               d_h=1,
                               d_w=1,
                               name='pool')
        flat = layers.flatten(pool, 'flat')

        linear = layers.linear(flat, self.num_class, name='linear')

        return linear
Beispiel #7
0
def multi_mask_tensorized_self_attn(rep_tensor,
                                    rep_mask,
                                    final_mask_ft,
                                    hn,
                                    head_num,
                                    keep_prob=None,
                                    scope=None):
    data_format = "NHWC"
    assert hn % head_num == 0, "hn (%d) must be divisible by the number of " \
                         "attention heads (%d)." % (hn, head_num)
    head_dim = int(hn / head_num)

    bs, sl = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1]
    with tf.variable_scope(scope or 'proposed_self_attention'):
        combined = linear(rep_tensor,
                          3 * hn,
                          True,
                          True,
                          data_format=data_format,
                          scope="qkv_transform")
        q, k, v = tf.split(combined, 3, 2)  # bs,sl,hn

        q = split_head(q, head_num)
        k = split_head(k, head_num)
        v = split_head(v, head_num)  # num,bs,sl,dim

        with tf.name_scope("dot_product_attention"):
            dot_logits = tf.matmul(q, k, transpose_b=True) * (head_dim**-0.5
                                                              )  # num,bs,sl,sl
            e_dot_logits = tf.exp(new_exp_mask(dot_logits,
                                               final_mask_ft))  # num,bs,sl,sl

        with tf.variable_scope("s2t_multi_dim_attention"):
            multi_logits_before = linear(rep_tensor,
                                         hn,
                                         True,
                                         True,
                                         data_format=data_format,
                                         scope="multi_logits_before")
            multi_logits = split_head(multi_logits_before,
                                      head_num)  # num,bs,sl,dim
            e_multi_logits = tf.exp(
                new_exp_mask(  # mul,bs,sl,dim
                    multi_logits,
                    rep_mask,
                    multi_head=True,
                    high_dim=True))

        with tf.name_scope("hybrid_attn"):
            accum_z_deno = tf.matmul(e_dot_logits,
                                     e_multi_logits)  # num,bs,sl,dim
            accum_z_deno = tf.where(  # in case of nan
                tf.greater(accum_z_deno, tf.zeros_like(accum_z_deno)),
                accum_z_deno, tf.ones_like(accum_z_deno))
            if keep_prob is not None and keep_prob < 1.0:
                real_keep_prob = keep_prob
                e_multi_logits = tf.nn.dropout(e_multi_logits, real_keep_prob)
                e_dot_logits = tf.nn.dropout(e_dot_logits, real_keep_prob)

            rep_mul_score = new_mask(
                v, rep_mask, multi_head=True, high_dim=True) * e_multi_logits
            accum_rep_mul_score = tf.matmul(e_dot_logits, rep_mul_score)
            attn_res = accum_rep_mul_score / accum_z_deno

        with tf.variable_scope("output"):
            attn_output = combine_head(attn_res)  # bs,sl,hn
            final_out = linear(attn_output,
                               hn,
                               True,
                               data_format=data_format,
                               scope="output_transform")

        final_out = new_mask(final_out, rep_mask, high_dim=True)  # bs,sl,hn
        return final_out