コード例 #1
0
 def _classifier(self, net, is_train):
     axes = [1]  # [2, 3] if self['format'] == 'nchw' else [1, 2]
     net = log_1d(tf.reduce_mean(net, axes))
     for unit in self['classifier.units']:
         net = log_1d(tf.layers.dense(net, unit, activation=tf.nn.relu))
     net = log_1d(tf.layers.dense(net, 1, activation=None))
     return net
コード例 #2
0
 def __call__(self, x, is_train):
     x = log_1d(
         tf.layers.dropout(x,
                           self['dropout'],
                           training=is_train,
                           name='dropout'))
     x = log_1d(
         dense(x,
               self.vocab_length,
               activation=tf.nn.relu if self['use_activation'] else None,
               name='dense'))
     return x
コード例 #3
0
 def __call__(self, x, is_train):
     with tf.name_scope('resnet'):
         ResNet = resnet.Model(resnet_size=self['resnet_size'], bottleneck=self['bottleneck'], num_filters=self['num_filters'],
                               kernel_size=self['kernel_size'],
                               conv_stride=self['conv_stride'], first_pool_size=self[
             'first_pool_size'], first_pool_stride=self['first_pool_stride'],
             block_sizes=self['block_sizes'], block_strides=self['block_strides'], data_format=self._parse_format())
         return log_1d(ResNet(x, is_train))
コード例 #4
0
    def _rec_block(self, net, index, is_train, scope):
        with tf.variable_scope(scope):
            net = log_1d(
                tf.layers.dropout(net,
                                  self['dropout'],
                                  training=is_train,
                                  name='dropout'))
            with tf.name_scope('fw'):
                cell_fw, sum_fw = self._cell()
            with tf.name_scope('bw'):
                cell_bw, sum_bw = self._cell()

            output, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw,
                                                        cell_bw,
                                                        net,
                                                        dtype=tf.float32)
            net = log_1d(tf.concat(output, 2))
            sum_fw()
            sum_bw()
            return net
コード例 #5
0
    def build_graph(self):
        # cpu does not support nchw, so nhwc forcing
        if (self._cpu):
            self._config['format'] = DEFAULTS['format']

        ###################
        # PLACEHOLDER
        ###################
        with tf.name_scope('placeholder'):
            x = log_1d(
                tf.placeholder(tf.float32, [
                    None, self.image_height,
                    None if self['dynamic_width'] else self.image_width,
                    self.channels
                ],
                               name="x"))
            if self["scale"]:
                x = tf.truediv(x, tf.constant(255.0), name='scale')
            y = tf.sparse_placeholder(tf.int32, shape=[None, None], name="y")
            class_y = tf.placeholder(tf.float32,
                                     shape=[None, 1],
                                     name="class_y")
            l = tf.placeholder(tf.int32, shape=[None], name="l")
            is_train = tf.placeholder_with_default(False, (), name='is_train')

            if self['format'] == 'nchw':
                net = log_1d(tf.transpose(x, [0, 3, 1, 2], name='nhwc2nchw'))
            else:
                net = x

        ################
        # PHASE I: Encoding
        ###############
        with tf.name_scope('encoder'):
            net = self._encoder(net, is_train)

            if self['format'] == 'nchw':
                net = log_1d(tf.transpose(net, [0, 2, 3, 1], name='nchw2nhwc'))
            else:
                net = net

            net = log_1d(tf.transpose(net, [0, 2, 1, 3]))

        ################
        # PHASE II: Recurrent Block
        ###############
        with tf.name_scope('recurrent'):
            if self['dynamic_width']:
                # maybe theres a better way to do columnwise stacking
                net = log_1d(
                    tf.reshape(
                        net,
                        [-1, tf.shape(net)[1], net.shape[2] * net.shape[3]]))
            else:
                net = log_1d(
                    tf.reshape(
                        net, [-1, net.shape[1], net.shape[2] * net.shape[3]]))
            encoder_net = net
            net = self._recurrent(net, is_train)

        ################
        # PHASE III: Fully Connected
        ###############
        with tf.name_scope('fc'):
            fc = FullyConnected.FullyConnected(self['fc'], self.vocab_length)
            net = fc(net, is_train)

        ##################
        # PHASE IV: CTC
        #################
        logits = log_1d(tf.transpose(net, [1, 0, 2]))

        with tf.name_scope('loss'):
            total_loss = tf.nn.ctc_loss(y, logits, l)
            tf.summary.scalar('loss', tf.reduce_mean(total_loss))

        with tf.name_scope('train'):
            train_step = self._train_step(total_loss, self.learning_rate)

        with tf.name_scope('logits'):
            logits = tf.nn.softmax(logits)

        #################
        # PHASE V: Classifier
        ################
        with tf.name_scope('classifier'):
            class_logits = self._classifier(encoder_net, is_train)
            class_pred = tf.nn.sigmoid(class_logits)
            class_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                logits=class_logits, labels=class_y)
            # tf.summary.scalar('class_loss', tf.reduce_mean(class_loss))
            class_train = self._train_step(class_loss,
                                           self.class_learning_rate)
        """
        new_dict = {
            "classifier": {
                "x": x,
                "y": class_y,
                "train": class_train,
                "loss": class_loss,
                "logits": class_logits
            },
            "recognizer": {
                "x": x,
                "y": y,
                "l": l,
                "train": train_step,
                "loss": train_loss,
                "logits": logits
            }
            "is_train": is_train
        }
        """

        return dict(x=x,
                    y=y,
                    class_y=class_y,
                    class_pred=class_pred,
                    class_loss=class_loss,
                    class_train=class_train,
                    l=l,
                    is_train=is_train,
                    logits=logits,
                    total_loss=total_loss,
                    train_step=train_step,
                    viz=self.viz)