예제 #1
0
파일: model2.py 프로젝트: Yaozeng/MRC
    def _decode(self):

        start_logits = tf.squeeze(
            conv(tf.concat([self.enc[1], self.enc[2]], axis=-1),
                 1,
                 bias=False,
                 name="start_pointer"), -1)
        end_logits = tf.squeeze(
            conv(tf.concat([self.enc[1], self.enc[3]], axis=-1),
                 1,
                 bias=False,
                 name="end_pointer"), -1)

        self.logits = [
            mask_logits(start_logits, mask=self.c_mask),
            mask_logits(end_logits, mask=self.c_mask)
        ]

        self.logits1, self.logits2 = [l for l in self.logits]

        outer = tf.matmul(tf.expand_dims(tf.nn.softmax(self.logits1), axis=2),
                          tf.expand_dims(tf.nn.softmax(self.logits2), axis=1))

        outer = tf.matrix_band_part(outer, 0, self.max_a_len)
        self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
        self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
예제 #2
0
    def _create_conv(self):
        self._create_input()
        arg_scope = tf.contrib.framework.arg_scope
        with arg_scope([conv], nl=tf.nn.relu,
                       trainable=True, mode=self.mode, graph=self.graph):
            conv1 = conv(self.image, 7, 96, 'conv1')
            mean1, var1 = tf.nn.moments(conv1, [0,1,2])
            conv1_bn = tf.nn.batch_normalization(conv1, mean1, var1, 0, 1, 1e-5)
            pool1 = max_pool(conv1_bn, 'pool1', padding='SAME')

            conv2 = conv(pool1, 5, 256, 'conv2')
            mean2, var2 = tf.nn.moments(conv2, [0,1,2])
            conv2_bn = tf.nn.batch_normalization(conv2, mean2, var2, 0, 1, 1e-5)
            pool2 = max_pool(conv2_bn, 'pool2', padding='SAME')

            conv3 = conv(pool2, 3, 512, 'conv3', stride = 1)

            conv4 = conv(conv3, 3, 512, 'conv4', stride = 1)

            conv5 = conv(conv4, 3, 512, 'conv5', stride = 1)
            pool5 = max_pool(conv5, 'pool5', padding='SAME')

            self.layer['conv1'] = conv1
            self.layer['conv2'] = conv2
            self.layer['conv3'] = conv3
            self.layer['conv4'] = conv4
            self.layer['pool5'] = pool5
            self.layer['conv_out'] = self.layer['conv5'] = conv5

        return pool5
예제 #3
0
    def _decode(self):

        #  self.config.batch_size if not self.demo else 1,
        #  self.max_p_len,
        #  self.max_q_len,
        #  self.config.max_ch_len,
        #  self.config.hidden_size,
        #  self.config.char_embed_size,
        #  self.config.head_size
        N, PL, QL, CL, d, dc, nh = self._params()
        if self.config.use_position_attn:
            logits = tf.squeeze(
                conv(self._attention(self.enc[3], name="attn_logits"),
                     1,
                     bias=True,
                     name="logits",
                     activation=None), -1)
        else:
            logits = tf.squeeze(
                conv(self.enc[3], 1, bias=True, name="logits",
                     activation=None), -1)

        self.logits = tf.layers.dense(
            logits,
            self.max_a_len,
            use_bias=True,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                self.config.weight_decay),
            activation=None,
            name='fully_connected')

        self.yp = tf.argmax(self.logits, axis=-1)
예제 #4
0
    def _decode(self):

        N, PL, QL, CL, d, dc, nh = self._params()

        if self.config.use_position_attn:
            start_logits = tf.squeeze(
                conv(self._attention(tf.concat([self.enc[1], self.enc[2]], axis = -1), name="attn1"), 1, bias = False, name = "start_pointer"), -1)
            end_logits = tf.squeeze(
                conv(self._attention(tf.concat([self.enc[1], self.enc[3]], axis = -1), name="attn2"), 1, bias = False, name = "end_pointer"), -1)
        else:
            start_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[2]], axis = -1), 1, bias = False, name = "start_pointer"), -1)
            end_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[3]], axis = -1), 1, bias = False, name = "end_pointer"), -1)

        self.logits = [mask_logits(start_logits, mask = tf.reshape(self.c_mask, [N, -1])),
                        mask_logits(end_logits, mask = tf.reshape(self.c_mask, [N, -1]))]

        self.logits1, self.logits2 = [l for l in self.logits]

        outer = tf.matmul(tf.expand_dims(tf.nn.softmax(self.logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(self.logits2), axis=1))

        outer = tf.matrix_band_part(outer, 0, self.max_a_len)
        self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
        self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
예제 #5
0
def pixelCNN(latents, num_iteration=5, depth=256, scope='pixel_cnn'):
    cres = latents
    cres_dim = cres.shape[-1]
    num_channel = depth

    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        for _ in range(num_iteration):
            c = layers.conv(cres, scope='conv1', filter_dims=[1, 1, num_channel], stride_dims=[1, 1],
                            non_linear_fn=tf.nn.relu, bias=True)
            c = layers.conv(c, scope='conv2', filter_dims=[1, 3, num_channel], stride_dims=[1, 1],
                            non_linear_fn=None, bias=True)

            padding = tf.constant([[0, 0], [1, 0], [0, 0], [0, 0]], name='padding')
            c = tf.pad(c, padding, name='pad')

            c = layers.conv(c, scope='conv3', filter_dims=[2, 1, num_channel], stride_dims=[1, 1], padding='VALID',
                            non_linear_fn=tf.nn.relu, bias=True)

            c = layers.conv(c, scope='conv4', filter_dims=[1, 1, cres_dim], stride_dims=[1, 1],
                            non_linear_fn=None, bias=True)

            cres = cres + c

        cres = tf.nn.relu(cres)

    return cres
예제 #6
0
  def build(self):
    """Create the network graph."""
    # 1st Layer: Conv (w ReLu) -> Lrn -> Pool
    conv1 = conv(self.x, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
    norm1 = lrn(conv1, 2, 1e-05, 0.75, name='norm1')
    pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')

    # 2nd Layer: Conv (w ReLu)  -> Lrn -> Pool with 2 groups
    conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
    norm2 = lrn(conv2, 2, 1e-05, 0.75, name='norm2')
    pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')

    # 3rd Layer: Conv (w ReLu)
    conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')

    # 4th Layer: Conv (w ReLu) splitted into two groups
    conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')

    # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
    conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
    pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')

    # 6th Layer: Flatten -> FC (w ReLu) -> Dropout
    flattened = tf.reshape(pool5, [-1, 6 * 6 * 256])
    fc6 = fc(flattened, 6 * 6 * 256, 4096, name='fc6')

    # 7th Layer: FC (w ReLu) -> Dropout
    fc7 = fc(fc6, 4096, 4096, name='fc7')

    # 8th Layer: FC and return unscaled activations
    self.fc8 = fc(fc7, 4096, self.num_classes, relu=False, name='fc8')
예제 #7
0
def graph_forward():
    model = graph.Graph()
    model.add(
        layers.conv(layers.xaxier_initilizer, layers.zero_initilizer, 1, 1, 32,
                    4, 3, 3))
    #model.add(layers.Relu())
    model.add(
        layers.conv(layers.xaxier_initilizer, layers.zero_initilizer, 1, 2, 16,
                    32, 3, 3))
    #model.add(layers.Relu())
    model.add(
        layers.max_pooling(layers.xaxier_initilizer, layers.zero_initilizer, 0,
                           2, 2, 2))
    model.add(layers.flatten())
    model.add(
        layers.FullConn(layers.xaxier_initilizer, layers.zero_initilizer,
                        (1024, 10)))
    #model.add(layers.Relu())
    crit = layers.softmax_with_loss()
    y = np.array([1, 2, 3])

    def foo(x):
        logits = model.forward(x)
        prob = crit.forward(logits)
        dy, loss = crit.backward(logits, y)
        dx = model.backward(x, dy)
        return loss, dx

    return foo
예제 #8
0
def clock_shufflenet(x,training, classes, update_3, update_2, last_frame_stage2,last_frame_stage3):
    """The whole network
        Args:
            x: the input batch
            training: whether we are in the process of training or inference
            classes: total number of classes
            update_3: True: update stage 3; False: use stage 3 output from last frame directly. When training always true
            update_2: True: update stage 2; False: use stage 2 output from last frame directly. When training always true
            last_frame_stage3: Stage three from last frame
            last_frame_stage2: Stafe two from last frame
        Returns:
            The output of the third stage
    """

    stage1 = stage_1(x,training)
    stage2 = tf.case([(update_2,stage_2(stage1,training))],default=last_frame_stage2)
    stage3 = tf.case([(update_3,stage_3(stage2,training,classes))],default=last_frame_stage3)

    ########DECODER#######
    stage1_conv = layers.conv(stage1,"conv4",1,48,classes,1,training)
    stage2_conv = layers.conv(stage2,"conv3",1,96,classes,1,training)

    stage_2_and_3 = tf.add(stage2_conv,stage3)
    stage_2_and_3 = layers.up_sample_unit(stage_2_and_3,"us2",2,classes)

    stage_123 = tf.add(stage_2_and_3,stage1_conv)

    final_output = layers.up_sample_unit(stage_123,"us3",8,classes)

    return final_output, stage2, stage3
예제 #9
0
        def context_transform(input_layer, scope='context_transform'):
            l = layers.conv(input_layer, scope=scope + '_1', filter_dims=[1, 1, target_dim], stride_dims=[1, 1],
                        non_linear_fn=tf.nn.relu, bias=True)

            l = layers.conv(l, scope=scope + '_2', filter_dims=[1, 1, target_dim], stride_dims=[1, 1],
                            non_linear_fn=None, bias=True)

            return l
예제 #10
0
def shallow_net_5x5(x):
    net = L.conv(x, name="conv_sn5x5_1", kh=5, kw=5, n_out=24)
    net = L.pool(net, name="pool_sn5x5_1", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn5x5_2", kw=3, kh=3, n_out=48)
    net = L.pool(net, name="pool_sn5x5_2", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn5x5_3", kw=3, kh=3, n_out=24)
    net = L.conv(net, name="conv_sn5x5_4", kw=3, kh=3, n_out=12)
    return net
예제 #11
0
def shallow_net_9x9(x):
    net = L.conv(x, name="conv_sn9x9_1", kh=9, kw=9, n_out=16)
    net = L.pool(net, name="pool_sn9x9_1", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn9x9_2", kw=7, kh=7, n_out=32)
    net = L.pool(net, name="pool_sn9x9_2", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn9x9_3", kw=7, kh=7, n_out=16)
    net = L.conv(net, name="conv_sn9x9_4", kw=7, kh=7, n_out=8)
    return net
예제 #12
0
def shallow_net_7x7(x):
    net = L.conv(x, name="conv_sn7x7_1", kh=7, kw=7, n_out=20)
    net = L.pool(net, name="pool_sn7x7_1", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn7x7_2", kw=5, kh=5, n_out=40)
    net = L.pool(net, name="pool_sn7x7_2", kh=2, kw=2, dw=2, dh=2)
    net = L.conv(net, name="conv_sn7x7_3", kw=5, kh=5, n_out=20)
    net = L.conv(net, name="conv_sn7x7_4", kw=5, kh=5, n_out=10)
    return net
예제 #13
0
 def __build_net(self):
     """
     Introduction
     ------------
         构建ONet模型结构
     """
     with tf.variable_scope('onet'):
         self.input = tf.placeholder(shape=[None, 48, 48, 3],
                                     dtype=tf.float32,
                                     name='input_data')
         layer = conv('conv1',
                      self.input,
                      kernel_size=(3, 3),
                      channels_output=32,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu1', layer)
         layer = max_pool('pool1', layer, kernel_size=(3, 3), stride=(2, 2))
         layer = conv('conv2',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=64,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu2', layer)
         layer = max_pool('pool2',
                          layer,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding='VALID')
         layer = conv('conv3',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=64,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu3', layer)
         layer = max_pool('pool3', layer, kernel_size=(2, 2), stride=(2, 2))
         layer = conv('conv4',
                      layer,
                      kernel_size=(2, 2),
                      channels_output=128,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu4', layer)
         layer = fc('fc1', layer, channels_output=256, relu=False)
         layer = prelu('prelu5', layer)
         fc2 = fc('fc2-1', layer, channels_output=2, relu=False)
         self.prob = tf.nn.softmax(fc2, axis=1, name='prob')
         self.loc = fc('fc2-2', layer, channels_output=4, relu=False)
예제 #14
0
    def forward(self):
        config = self.config
        N, PL, QL, CL, d, dc, nh = config.batch_size if not self.demo else 1, \
                                   self.c_maxlen, \
                                   self.q_maxlen, \
                                   config.char_limit, \
                                   config.hidden, \
                                   config.char_dim, \
                                   config.num_heads

        with tf.variable_scope('Input_Embedding_Layer', regularizer=regularizer):
            # ******************** char embedding *********************
            # [batch_size, seq_len, word_len] -> [batch_size x seq_len, word_len, char_dim]
            ch_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.ch),
                                shape=[N * PL, CL, dc])
            qh_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.qh),
                                shape=[N * QL, CL, dc])
            ch_emb = tf.nn.dropout(ch_emb, keep_prob=1.0 - 0.5 * self.dropout)
            qh_emb = tf.nn.dropout(qh_emb, keep_prob=1.0 - 0.5 * self.dropout)

            # BiDAF style conv-highway encoder, share weights
            # [N * PL/QL, CL, d]
            ch_emb = conv(ch_emb, d, bias=True, activation=tf.nn.relu, kernel_size=5, name='char_conv', reuse=None)
            qh_emb = conv(qh_emb, d, bias=True, activation=tf.nn.relu, kernel_size=5, name='char_conv', reuse=True)

            # [N * CL/QL, d], reduce max along CL
            ch_emb = tf.reduce_max(ch_emb, axis=1)
            qh_emb = tf.reduce_max(qh_emb, axis=1)

            # [N, PL/QL, d]
            ch_emb = tf.reshape(ch_emb, shape=[N, PL, ch_emb.shape[-1]])
            qh_emb = tf.reshape(qh_emb, shape=[N, QL, ch_emb.shape[-1]])

            # *********************** Word embedding ************************
            # [N, PL/QL, dw]
            c_emb = tf.nn.dropout(tf.nn.embedding_lookup(self.word_mat, self.c),
                                  keep_prob=1.0 - self.dropout)
            q_emb = tf.nn.dropout(tf.nn.embedding_lookup(self.word_mat, self.q),
                                  keep_prob=1.0 - self.dropout)

            # Concat char embedding and word embedding
            # [N, PL/QL, dw + d]
            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            # share weights
            c_emb = highway(c_emb, size=d, scope='highway', dropout=self.dropout, reuse=None)
            q_emb = highway(q_emb, size=d, scope='highway', dropout=self.dropout, reuse=True)

            print('highway, q_emb.shape: {}'.format(q_emb.shape))
            print('highway, c_emb.shape: {}'.format(c_emb.shape))

        """ *************************************Encoer ****************************************"""
		with tf.variable_scope('Encoder_Layer', regularizer=regularizer):
예제 #15
0
def encoder(input):
    # Create a conv network with 3 conv layers and 1 FC layer
    # Conv 1: filter: [3, 3, 1], stride: [2, 2], relu
    conv1 = layers.conv(input, name = 'conv1', filter_dims=[3,3,1], stride_dims=[2,2])
    # Conv 2: filter: [3, 3, 8], stride: [2, 2], relu
    conv2 = layers.conv(conv1, name = 'conv2', filter_dims=[3,3,8], stride_dims=[2,2])
    # Conv 3: filter: [3, 3, 8], stride: [2, 2], relu
    conv3 = layers.conv(conv2, name = 'conv3', filter_dims=[3,3,8], stride_dims=[2,2])
    # FC: output_dim: 100, no non-linearity
    fc = layers.fc(conv3, name='fc', out_dim = 100)
    return fc
    raise NotImplementedError
예제 #16
0
def discriminator(x,
                  y,
                  is_training=True,
                  update_batch_stats=True,
                  act_fn=L.lrelu,
                  bn=FLAGS.dis_bn,
                  reuse=True):
    with tf.variable_scope('discriminator', reuse=reuse):
        if FLAGS.method == 'cgan':
            h = L.fc(y,
                     y_dim,
                     X_dim * X_dim,
                     seed=rng.randint(123456),
                     name='fc_y')
            h = tf.reshape(h, [-1, X_dim, X_dim, 1])
            h = tf.concat((x, h), axis=3)
            h = L.conv(h, 3, 1, num_channels + 1, 32, name="conv1")
        else:
            h = L.conv(x, 3, 1, num_channels, 32, name="conv1")
        h = act_fn(h)

        # 64x64 -> 32x32
        h = L.conv(
            h,
            4,
            2,
            32,
            64,
            name="conv2",
        )
        h = L.bn(h,
                 64,
                 is_training=is_training,
                 update_batch_stats=update_batch_stats,
                 use_gamma=False,
                 name='bn1') if bn else h
        h = act_fn(h)

        # 32x32 -> 16x16
        h = L.conv(h, 4, 2, 64, 128, name="conv3")
        h = L.bn(h,
                 128,
                 is_training=is_training,
                 update_batch_stats=update_batch_stats,
                 use_gamma=False,
                 name='bn2') if bn else h
        h = act_fn(h)
        h = L.conv(h, X_dim / 4, 1, 128, 1, name="conv5", padding="VALID")
        logits = tf.reshape(h, [-1, 1])
        return logits
예제 #17
0
파일: model.py 프로젝트: MiHuangLan/reader
    def _decode(self):
        """
        Employs Pointer Network to get the the probs of each position
        to be the start or end of the predicted answer.
        Note that we concat the fuse_p_encodes for the passages in the same document.
        And since the encodes of queries in the same document is same, we select the first one.
        """
        N, PL, QL, CL, d, dc, nh = self._params()
        if self.use_position_attn:
            start_logits = tf.squeeze(
                conv(self._attention(tf.concat([self.enc[1], self.enc[2]],
                                               axis=-1),
                                     name="attn1"),
                     1,
                     bias=False,
                     name="start_pointer"), -1)
            end_logits = tf.squeeze(
                conv(self._attention(tf.concat([self.enc[1], self.enc[3]],
                                               axis=-1),
                                     name="attn2"),
                     1,
                     bias=False,
                     name="end_pointer"), -1)
        else:
            start_logits = tf.squeeze(
                conv(self.mul_anttion_p, 1, bias=False, name="start_pointer"),
                -1)
            end_logits = tf.squeeze(
                conv(self.mul_anttion_p, 1, bias=False, name="end_pointer"),
                -1)

        start_logits = tf.reshape(start_logits, [N, -1])
        self.sl = start_logits
        end_logits = tf.reshape(end_logits, [N, -1])
        self.el = end_logits
        self.logits = [
            mask_logits(start_logits, mask=tf.reshape(self.c_mask, [N, -1])),
            mask_logits(end_logits, mask=tf.reshape(self.c_mask, [N, -1]))
        ]

        self.logits1, self.logits2 = [l for l in self.logits]

        outer = tf.matmul(tf.expand_dims(tf.nn.softmax(self.logits1), axis=2),
                          tf.expand_dims(tf.nn.softmax(self.logits2), axis=1))

        outer = tf.matrix_band_part(outer, 0, self.max_a_len)
        self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
        self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
예제 #18
0
def latent_discriminator(input_data, activation='swish', scope='ldiscriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = tf.reshape(input_data, shape=[-1, 4, 4, 8])

        l = layers.conv(l, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
예제 #19
0
    def _prediction(self):
        self.batch_size = self.inputs['data'].get_shape().as_list()[0]
        current_layer = self.inputs['data']
        n_filters = 20
        ksize = 8
        depth = 7

        for i in range(depth):
            current_layer = layers.conv(inputs=current_layer,
                                        n_filters=n_filters,
                                        ksize=ksize,
                                        stride=2,
                                        scope='conv{}'.format(i + 1))
            tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, current_layer)
            self.layers['conv{}'.format(i + 1)] = current_layer
            bs, width, _ = current_layer.get_shape().as_list()
            print(bs, width, _)
        bs, width, _ = current_layer.get_shape().as_list()
        print(bs, width, _)
        current_layer = tf.reshape(current_layer, [bs, width * n_filters])
        current_layer = self.fcn(current_layer, 2, scope='logits')
        #current_layer = tf.nn.softmax(current_layer)
        self.layers['logits'] = current_layer
        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, current_layer)

        self.layers['class_prob'] = tf.nn.softmax(current_layer,
                                                  name='class_prob')
        self.layers['class_prediction'] = tf.argmax(self.layers['class_prob'],
                                                    1,
                                                    name='class_pred')
        tf.contrib.layers.apply_regularization(
            tf.contrib.layers.l2_regularizer(1e-3),
            weights_list=tf.get_collection(tf.GraphKeys.WEIGHTS))
예제 #20
0
 def test_conv_shape(self):
     with self.test_session() as sess:
         x = tf.zeros((32,227,227,3), dtype=tf.float32)
         expected_conv_out = tf.zeros((32,55,55,96),dtype=tf.float32, name='expectedout')
         actual_conv_out, weights, biases = layers.conv(x, 11, 11, 96, 4, 4, name='convtest', padding='VALID')
         sess.run(tf.initializers.variables([weights, biases]))
         self.assertAllEqual(tf.shape(actual_conv_out), tf.shape(expected_conv_out))
예제 #21
0
def task(x, activation='relu', output_dim=256, scope='task_network', norm='layer', b_train=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'lrelu':
            act_func = tf.nn.leaky_relu
        else:
            act_func = tf.nn.sigmoid

        print('Task Layer1: ' + str(x.get_shape().as_list()))

        block_depth = dense_block_depth
        l = x
        l = layers.conv(l, scope='conv1', filter_dims=[3, 3, block_depth], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False, dilation=[1, 1, 1, 1])

        if norm == 'layer':
            l = layers.layer_norm(l, scope='ln1')
        elif norm == 'batch':
            l = layers.batch_norm_conv(l, b_train=b_train, scope='bn1')

        l = act_func(l)

        for i in range(15):
            l = layers.add_residual_block(l,  filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func,
                                          norm=norm, b_train=b_train, scope='block1_' + str(i))

        latent = layers.global_avg_pool(l, output_length=output_dim)

    return latent
예제 #22
0
 def __build_net(self):
     """
     Introduction
         构建mtcnn模型级联第一层
     """
     with tf.variable_scope('pnet'):
         self.input = tf.placeholder(name='input_data',
                                     shape=[None, None, None, 3],
                                     dtype=tf.float32)
         layer = conv('conv1',
                      self.input,
                      kernel_size=(3, 3),
                      channels_output=10,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu1', layer)
         layer = max_pool('pool1', layer, kernel_size=[2, 2], stride=(2, 2))
         layer = conv('conv2',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=16,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu2', layer)
         layer = conv('conv3',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=32,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu3', layer)
         conv4_1 = conv('conv4-1',
                        layer,
                        kernel_size=(1, 1),
                        channels_output=2,
                        stride=(1, 1),
                        relu=False)
         self.prob = tf.nn.softmax(conv4_1, axis=3, name='prob')
         self.loc = conv('conv4-2',
                         layer,
                         kernel_size=(1, 1),
                         channels_output=4,
                         stride=(1, 1),
                         relu=False)
예제 #23
0
    def build_graph(self):
        self.iterator = tf.data.Iterator.from_structure(
            (tf.float32, tf.int32),
            (tf.TensorShape([None, 227, 227, 3]), tf.TensorShape([None]))
        )
        self.inputs, self.labels = self.iterator.get_next()

        self.conv1 = layers.conv(self.inputs, [11, 11], 96, [4, 4],
                                 padding='VALID', name='conv1', mask=True)
        norm1 = layers.lrn(self.conv1, 2, 1e-05, 0.75, name='norm1')
        pool1 = layers.max_pool(norm1, [3, 3], [2, 2], padding='VALID',
                                name='pool1')

        self.conv2 = layers.conv(pool1, [5, 5], 256, [1, 1], groups=2,
                                 name='conv2', mask=True)
        norm2 = layers.lrn(self.conv2, 2, 1e-05, 0.75, name='norm2')
        pool2 = layers.max_pool(norm2, [3, 3], [2, 2], padding='VALID',
                                name='pool2')

        self.conv3 = layers.conv(pool2, [3, 3], 384, [1, 1], name='conv3',
                                 mask=True)

        self.conv4 = layers.conv(self.conv3, [3, 3], 384, [1, 1], groups=2,
                                 name='conv4', mask=True)

        self.conv5 = layers.conv(self.conv4, [3, 3], 256, [1, 1], groups=2,
                                 name='conv5', mask=True)
        pool5 = layers.max_pool(self.conv5, [3, 3], [2, 2], padding='VALID',
                                name='pool5')

        self.keep_prob = tf.get_variable('keep_prob', shape=(),
                                         trainable=False)

        flattened = tf.reshape(pool5, [-1, 6 * 6 * 256])
        fc6 = layers.fc(flattened, 4096, name='fc6')
        dropout6 = layers.dropout(fc6, self.keep_prob)

        fc7 = layers.fc(dropout6, 4096, name='fc7')
        dropout7 = layers.dropout(fc7, self.keep_prob)

        self.logits = layers.fc(dropout7, self.num_classes, relu=False,
                                name='fc8')
        self.probs_op = tf.nn.softmax(self.logits)
        self.pred_op = tf.argmax(input=self.logits, axis=1)
        corrects_op = tf.equal(tf.cast(self.pred_op, tf.int32),
                               self.labels)
        self.acc_op = tf.reduce_mean(tf.cast(corrects_op, tf.float32))
예제 #24
0
def logit(h, is_training=True, update_batch_stats=True, stochastic=True, seed=1234, dropout_mask=None, return_mask=False, h_before_dropout=None):
    rng = np.random.RandomState(seed)
    if h_before_dropout is None:
        h = L.conv(h, ksize=3, stride=1, f_in=3, f_out=128, seed=rng.randint(123456), name='c1')
        h = L.lrelu(L.bn(h, 128, is_training=is_training, update_batch_stats=update_batch_stats, name='b1'), FLAGS.lrelu_a)
        h = L.conv(h, ksize=3, stride=1, f_in=128, f_out=128, seed=rng.randint(123456), name='c2')
        h = L.lrelu(L.bn(h, 128, is_training=is_training, update_batch_stats=update_batch_stats, name='b2'), FLAGS.lrelu_a)
        h = L.conv(h, ksize=3, stride=1, f_in=128, f_out=128, seed=rng.randint(123456), name='c3')
        h = L.lrelu(L.bn(h, 128, is_training=is_training, update_batch_stats=update_batch_stats, name='b3'), FLAGS.lrelu_a)

        h = L.max_pool(h, ksize=2, stride=2)
        if stochastic:
            h = tf.nn.dropout(h, keep_prob=FLAGS.keep_prob_hidden)

        h = L.conv(h, ksize=3, stride=1, f_in=128, f_out=256, seed=rng.randint(123456), name='c4')
        h = L.lrelu(L.bn(h, 256, is_training=is_training, update_batch_stats=update_batch_stats, name='b4'), FLAGS.lrelu_a)
        h = L.conv(h, ksize=3, stride=1, f_in=256, f_out=256, seed=rng.randint(123456), name='c5')
        h = L.lrelu(L.bn(h, 256, is_training=is_training, update_batch_stats=update_batch_stats, name='b5'), FLAGS.lrelu_a)
        h = L.conv(h, ksize=3, stride=1, f_in=256, f_out=256, seed=rng.randint(123456), name='c6')
        h = L.lrelu(L.bn(h, 256, is_training=is_training, update_batch_stats=update_batch_stats, name='b6'), FLAGS.lrelu_a)

        h_before_dropout = L.max_pool(h, ksize=2, stride=2)

    # Making it possible to change or return a dropout mask
    if stochastic:
        if dropout_mask is None:
            dropout_mask = tf.cast(
                tf.greater_equal(tf.random_uniform(tf.shape(h_before_dropout), 0, 1, seed=rng.randint(123456)), 1.0 - FLAGS.keep_prob_hidden),
                tf.float32)
        else:
            dropout_mask = tf.reshape(dropout_mask, tf.shape(h_before_dropout))
        h = tf.multiply(h_before_dropout, dropout_mask)
        h = (1.0 / FLAGS.keep_prob_hidden) * h
    else:
        h = h_before_dropout
    h = L.conv(h, ksize=3, stride=1, f_in=256, f_out=512, seed=rng.randint(123456), padding="VALID", name='c7')
    h = L.lrelu(L.bn(h, 512, is_training=is_training, update_batch_stats=update_batch_stats, name='b7'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=1, stride=1, f_in=512, f_out=256, seed=rng.randint(123456), name='c8')
    h = L.lrelu(L.bn(h, 256, is_training=is_training, update_batch_stats=update_batch_stats, name='b8'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=1, stride=1, f_in=256, f_out=128, seed=rng.randint(123456), name='c9')
    h = L.lrelu(L.bn(h, 128, is_training=is_training, update_batch_stats=update_batch_stats, name='b9'), FLAGS.lrelu_a)

    h = tf.reduce_mean(h, reduction_indices=[1, 2])  # Global average pooling
    h = L.fc(h, 128, 10, seed=rng.randint(123456), name='fc')

    if FLAGS.top_bn:
        h = L.bn(h, 10, is_training=is_training,
                 update_batch_stats=update_batch_stats, name='bfc')
    if return_mask:
        return h, tf.reshape(dropout_mask, [-1, 8*8*256]), h_before_dropout
    else:
        return h
예제 #25
0
def discriminator(input_data, activation='swish', scope='discriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = layers.conv(input_data, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_3')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_4')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_5')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_6')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_7')

        # dc_final_layer = batch_norm_conv(last_dense_layer, b_train=bn_phaze, scope='last_dense_layer')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
예제 #26
0
 def _guide(cls, input_tensor, params, is_training):
     n_guide_feats = params['guide_complexity']
     guidemap = conv(input_tensor,
                     n_guide_feats,
                     1,
                     batch_norm=True,
                     is_training=is_training,
                     scope='conv1')
     guidemap = conv(guidemap,
                     1,
                     1,
                     activation_fn=tf.nn.sigmoid,
                     scope='conv2')
     guidemap = tf.squeeze(guidemap, squeeze_dims=[
         3,
     ])
     return guidemap
예제 #27
0
def logit(x, dropout_mask=None, is_training=True, update_batch_stats=True, stochastic=True, seed=1234):

    rng = numpy.random.RandomState(seed)
    
    h = L.gl(x, std=FLAGS.sigma)
    h = L.conv(h, ksize=3, stride=1, f_in=3, f_out=layer_sizes[0], seed=rng.randint(123456), name='c1')
    h = L.lrelu(bn(h, layer_sizes[0], is_training=is_training, update_batch_stats=update_batch_stats, name='b1'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[0], f_out=layer_sizes[0], seed=rng.randint(123456), name='c2')
    h = L.lrelu(bn(h, layer_sizes[0], is_training=is_training, update_batch_stats=update_batch_stats, name='b2'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[0], f_out=layer_sizes[0], seed=rng.randint(123456), name='c3')
    h = L.lrelu(bn(h, layer_sizes[0], is_training=is_training, update_batch_stats=update_batch_stats, name='b3'), FLAGS.lrelu_a)

    h = L.max_pool(h, ksize=2, stride=2)
    
    h = tf.nn.dropout(h, keep_prob=0.5, seed=rng.randint(123456)) if stochastic else h
    
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[0], f_out=layer_sizes[1], seed=rng.randint(123456), name='c4')
    h = L.lrelu(bn(h, layer_sizes[1], is_training=is_training, update_batch_stats=update_batch_stats, name='b4'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[1], f_out=layer_sizes[1], seed=rng.randint(123456), name='c5')
    h = L.lrelu(bn(h, layer_sizes[1], is_training=is_training, update_batch_stats=update_batch_stats, name='b5'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[1], f_out=layer_sizes[1], seed=rng.randint(123456), name='c6')
    h = L.lrelu(bn(h, layer_sizes[1], is_training=is_training, update_batch_stats=update_batch_stats, name='b6'), FLAGS.lrelu_a)

    h = L.max_pool(h, ksize=2, stride=2)
    
    h = tf.nn.dropout(h, keep_prob=0.5, seed=rng.randint(123456)) if stochastic else h
    
    h = L.conv(h, ksize=3, stride=1, f_in=layer_sizes[1], f_out=layer_sizes[2], seed=rng.randint(123456), padding="VALID", name='c7')
    h = L.lrelu(bn(h, layer_sizes[2], is_training=is_training, update_batch_stats=update_batch_stats, name='b7'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=1, stride=1, f_in=layer_sizes[2], f_out=layer_sizes[3], seed=rng.randint(123456), name='c8')
    h = L.lrelu(bn(h, layer_sizes[3], is_training=is_training, update_batch_stats=update_batch_stats, name='b8'), FLAGS.lrelu_a)
    h = L.conv(h, ksize=1, stride=1, f_in=layer_sizes[3], f_out=layer_sizes[4], seed=rng.randint(123456), name='c9')
    h = L.lrelu(bn(h, layer_sizes[4], is_training=is_training, update_batch_stats=update_batch_stats, name='b9'), FLAGS.lrelu_a)

    h = tf.reduce_mean(h, reduction_indices=[1, 2])  # Global average pooling

    # dropout with mask
    if dropout_mask is None:
        # Base dropout mask is 1 (Fully Connected)
        dropout_mask = tf.ones_like(h)

    h = h*dropout_mask 

    h = L.fc(h, layer_sizes[4], 10, seed=rng.randint(123456), name='fc')

    if FLAGS.top_bn:
        h = bn(h, 10, is_training=is_training,
                 update_batch_stats=update_batch_stats, name='bfc')
    
    return h, dropout_mask
예제 #28
0
파일: resnet.py 프로젝트: pokaxpoka/cmcl
 def residual(name, l, in_channel, out_channel, stride):
     """Residual function.
     Args:
       name: Scope name of this function.
       l: Output of previous layer.
       in_channel: # of channels of l.
       out_channel: # of channels of each output feature.
       stride: Stride of the first convolution in residual function.
     """
     with tf.variable_scope(name):
         sc = l if stride == 1 else shortcut(l, in_channel, out_channel)
         l = layers.conv('conv_0', l, out_channel, stride=stride)
         l = layers.batchnorm('bn_0', l, is_train)
         l = tf.nn.relu(l)
         l = layers.conv('conv_1', l, out_channel, stride=1)
         l = layers.batchnorm('bn_1', l, is_train)
         l = tf.nn.relu(l + sc)
         return l
 def __init__(self):
     self.lr = 0.01
     # conv net
     self.c1 = conv(1, 6, kernel=5, learning_rate=self.lr)
     self.relu1 = relu()
     self.s2 = max_pool(kernel=2, stride=2)
     self.c3 = conv(6, 16, kernel=5, learning_rate=self.lr)
     self.relu3 = relu()
     self.s4 = max_pool(kernel=2, stride=2)
     self.c5 = conv(16, 120, kernel=4, learning_rate=self.lr)
     self.relu5 = relu()
     # fc net
     self.f6 = fc(120, 84, learning_rate=self.lr)
     self.relu6 = relu()
     self.f7 = fc(84, 10)
     self.sig7 = softmax()
     # record the shape between the conv net and fc net
     self.conv_out_shape = None
예제 #30
0
    def getConvLayers(self, input):
        conv = layers.conv(input, reuse=self.reuse, name='conv')
        genr64 = layers.genr(conv, 128, reuse=self.reuse, name='genr64')
        genr128 = layers.genr(genr64, 256, reuse=self.reuse, name='genr128')

        if not self.reuse:
            self.reuse = tf.AUTO_REUSE

        return conv, genr64, genr128
예제 #31
0
파일: model.py 프로젝트: txye/QANet
    def forward(self):
        config = self.config
        N, PL, QL, CL, d, dc, nh = config.batch_size if not self.demo else 1, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.num_heads

        with tf.variable_scope("Input_Embedding_Layer"):
            ch_emb = tf.reshape(tf.nn.embedding_lookup(
                self.char_mat, self.ch), [N * PL, CL, dc])
            qh_emb = tf.reshape(tf.nn.embedding_lookup(
                self.char_mat, self.qh), [N * QL, CL, dc])
            ch_emb = tf.nn.dropout(ch_emb, 1.0 - 0.5 * self.dropout)
            qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout)

			# Bidaf style conv-highway encoder
            ch_emb = conv(ch_emb, d,
                bias = True, activation = tf.nn.relu, kernel_size = 5, name = "char_conv", reuse = None)
            qh_emb = conv(qh_emb, d,
                bias = True, activation = tf.nn.relu, kernel_size = 5, name = "char_conv", reuse = True)

            ch_emb = tf.reduce_max(ch_emb, axis = 1)
            qh_emb = tf.reduce_max(qh_emb, axis = 1)

            ch_emb = tf.reshape(ch_emb, [N, PL, ch_emb.shape[-1]])
            qh_emb = tf.reshape(qh_emb, [N, QL, ch_emb.shape[-1]])

            c_emb = tf.nn.dropout(tf.nn.embedding_lookup(self.word_mat, self.c), 1.0 - self.dropout)
            q_emb = tf.nn.dropout(tf.nn.embedding_lookup(self.word_mat, self.q), 1.0 - self.dropout)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            c_emb = highway(c_emb, size = d, scope = "highway", dropout = self.dropout, reuse = None)
            q_emb = highway(q_emb, size = d, scope = "highway", dropout = self.dropout, reuse = True)

        with tf.variable_scope("Embedding_Encoder_Layer"):
            c = residual_block(c_emb,
                num_blocks = 1,
                num_conv_layers = 4,
                kernel_size = 7,
                mask = self.c_mask,
                num_filters = d,
                num_heads = nh,
                seq_len = self.c_len,
                scope = "Encoder_Residual_Block",
                bias = False,
                dropout = self.dropout)
            q = residual_block(q_emb,
                num_blocks = 1,
                num_conv_layers = 4,
                kernel_size = 7,
                mask = self.q_mask,
                num_filters = d,
                num_heads = nh,
                seq_len = self.q_len,
                scope = "Encoder_Residual_Block",
                reuse = True, # Share the weights between passage and question
                bias = False,
                dropout = self.dropout)

        with tf.variable_scope("Context_to_Query_Attention_Layer"):
            # C = tf.tile(tf.expand_dims(c,2),[1,1,self.q_maxlen,1])
            # Q = tf.tile(tf.expand_dims(q,1),[1,self.c_maxlen,1,1])
            # S = trilinear([C, Q, C*Q], input_keep_prob = 1.0 - self.dropout)
            S = optimized_trilinear_for_attention([c, q], self.c_maxlen, self.q_maxlen, input_keep_prob = 1.0 - self.dropout)
            mask_q = tf.expand_dims(self.q_mask, 1)
            S_ = tf.nn.softmax(mask_logits(S, mask = mask_q))
            mask_c = tf.expand_dims(self.c_mask, 2)
            S_T = tf.transpose(tf.nn.softmax(mask_logits(S, mask = mask_c), dim = 1),(0,2,1))
            self.c2q = tf.matmul(S_, q)
            self.q2c = tf.matmul(tf.matmul(S_, S_T), c)
            attention_outputs = [c, self.c2q, c * self.c2q, c * self.q2c]

        with tf.variable_scope("Model_Encoder_Layer"):
            inputs = tf.concat(attention_outputs, axis = -1)
            self.enc = [conv(inputs, d, name = "input_projection")]
            for i in range(3):
                if i % 2 == 0: # dropout every 2 blocks
                    self.enc[i] = tf.nn.dropout(self.enc[i], 1.0 - self.dropout)
                self.enc.append(
                    residual_block(self.enc[i],
                        num_blocks = 7,
                        num_conv_layers = 2,
                        kernel_size = 5,
                        mask = self.c_mask,
                        num_filters = d,
                        num_heads = nh,
                        seq_len = self.c_len,
                        scope = "Model_Encoder",
                        bias = False,
                        reuse = True if i > 0 else None,
                        dropout = self.dropout)
                    )

        with tf.variable_scope("Output_Layer"):
            start_logits = tf.squeeze(conv(tf.concat([self.enc[1], self.enc[2]],axis = -1),1, bias = False, name = "start_pointer"),-1)
            end_logits = tf.squeeze(conv(tf.concat([self.enc[1], self.enc[3]],axis = -1),1, bias = False, name = "end_pointer"), -1)
            self.logits = [mask_logits(start_logits, mask = self.c_mask),
                           mask_logits(end_logits, mask = self.c_mask)]

            logits1, logits2 = [l for l in self.logits]

            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, config.ans_limit)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits1, labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)

        if config.l2_norm is not None:
            variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = tf.contrib.layers.apply_regularization(regularizer, variables)
            self.loss += l2_loss

        if config.decay is not None:
            self.var_ema = tf.train.ExponentialMovingAverage(config.decay)
            ema_op = self.var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.loss = tf.identity(self.loss)

                self.assign_vars = []
                for var in tf.global_variables():
                    v = self.var_ema.average(var)
                    if v:
                        self.assign_vars.append(tf.assign(var,v))