Example #1
0
    def _build_graph(self, input_vars, _):
        x, label = input_vars
        x = x / 256.0

        def quantize(x, name=None):
            # quantize to 2 bit
            return ((x * 3.0 + 0.5) // 1) / 3.0

        bn = lambda x, name: BatchNorm('bn', x, False, epsilon=1e-4)
        bnc = lambda x, name: tf.clip_by_value(
            bn(x, None), 0.0, 1.0, name=name)

        def conv_split(name, x, channel, shape):
            inputs = tf.split(3, 2, x)
            x0 = Conv2D(name + 'a', inputs[0], channel / 2, shape)
            x1 = Conv2D(name + 'b', inputs[1], channel / 2, shape)
            return tf.concat(3, [x0, x1])

        with argscope([Conv2D, FullyConnected], nl=bnc):
            x = Conv2D('conv1_1', x, 96, 12, stride=4, padding='VALID')
            x = quantize(x)
            x = conv_split('conv2_1', x, 256, 5)
            x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
            x = MaxPooling('pool1', x, 3, 2)
            x = quantize(x)

            x = Conv2D('conv3_1', x, 384, 3)
            x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
            x = MaxPooling('pool2', x, 3, 2)
            x = quantize(x)

            x = conv_split('conv4_1', x, 384, 3)
            x = quantize(x)

            x = conv_split('conv5_1', x, 256, 3)
            x = MaxPooling('pool3', x, 3, 2)
            x = quantize(x)
            x = tf.transpose(x, perm=[0, 3, 1, 2])

            x = tf.nn.dropout(x, keep_prob=1.)
            x = FullyConnected('fc0', x, out_dim=4096)
            x = quantize(x)
            x = tf.nn.dropout(x, keep_prob=1.)
            x = FullyConnected('fc1', x, out_dim=4096)
            logits = FullyConnected('fct', x, out_dim=1000, nl=bn)

        prob = tf.nn.softmax(logits, name='prob')
        nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label),
                                 name='wrong-top1')
        nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label, 5),
                                 name='wrong-top5')
Example #2
0
    def _build_graph(self, input_vars, _):
        x, label = input_vars
        x = x / 256.0

        def quantize(x, name=None):
            # quantize to 2 bit
            return ((x * 3.0 + 0.5) // 1) / 3.0

        bn = lambda x, name: BatchNorm('bn', x, False, epsilon=1e-4)
        bnc = lambda x, name: tf.clip_by_value(bn(x, None), 0.0, 1.0, name=name)

        def conv_split(name, x, channel, shape):
            inputs = tf.split(3, 2, x)
            x0 = Conv2D(name + 'a', inputs[0], channel/2, shape)
            x1 = Conv2D(name + 'b', inputs[1], channel/2, shape)
            return tf.concat(3, [x0, x1])

        with argscope([Conv2D, FullyConnected], nl=bnc):
            x = Conv2D('conv1_1', x, 96, 12, stride=4, padding='VALID')
            x = quantize(x)
            x = conv_split('conv2_1', x, 256, 5)
            x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]])
            x = MaxPooling('pool1', x, 3, 2)
            x = quantize(x)

            x = Conv2D('conv3_1', x, 384, 3)
            x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]])
            x = MaxPooling('pool2', x, 3, 2)
            x = quantize(x)

            x = conv_split('conv4_1', x, 384, 3)
            x = quantize(x)

            x = conv_split('conv5_1', x, 256, 3)
            x = MaxPooling('pool3', x, 3, 2)
            x = quantize(x)
            x = tf.transpose(x, perm=[0,3,1,2])

            x = tf.nn.dropout(x, keep_prob=1.)
            x = FullyConnected('fc0', x, out_dim=4096)
            x = quantize(x)
            x = tf.nn.dropout(x, keep_prob=1.)
            x = FullyConnected('fc1', x, out_dim=4096)
            logits = FullyConnected('fct', x, out_dim=1000, nl=bn)

        prob = tf.nn.softmax(logits, name='prob')
        nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label), name='wrong-top1')
        nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label, 5), name='wrong-top5')
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            logits = (LinearWrap(image)
                      .Conv2D('conv0')
                      .MaxPooling('pool0', 2)
                      .Conv2D('conv1')
                      .Conv2D('conv2')
                      .MaxPooling('pool1', 2)
                      .Conv2D('conv3')
                      .FullyConnected('fc0', 512, nl=tf.nn.relu)
                      .Dropout('dropout', 0.5)
                      .FullyConnected('fc1', out_dim=10, nl=tf.identity)())

        prob = tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        # compute the "incorrect vector", for the callback ClassificationError to use at validation time
        wrong = symbf.prediction_incorrect(logits, label, name='incorrect')
        accuracy = symbf.accuracy(logits, label, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(wrong, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
Example #4
0
    def _build_graph(self, input_vars):
        image, label = input_vars
        is_training = get_current_tower_context().is_training
        keep_prob = tf.constant(0.5 if is_training else 1.0)  # ドロップアウトする率

        if is_training:
            tf.image_summary("train_image", image, 10)

        image = image / 4.0  # just to make range smaller
        with argscope(Conv2D, nl=BNReLU(), use_bias=False, kernel_shape=3):
            logits = LinearWrap(image) \
                    .Conv2D('conv1', out_channel=96, stride=4, kernel_shape=7) \
                    .tf.nn.relu(name='relu2') \
                    .MaxPooling('pool1', 3, stride=2) \
                    .tf.nn.local_response_normalization(depth_radius=5, alpha=0.0001, beta=0.75, name='norm1') \
                    .Conv2D('conv2', out_channel=256, kernel_shape=5) \
                    .tf.nn.relu('relu2') \
                    .MaxPooling('pool2', 3, stride=2) \
                    .tf.nn.local_response_normalization(alpha=0.0001, beta=0.75, name='norm2') \
                    .Conv2D('conv3', out_channel=384, kernel_shape=3) \
                    .tf.nn.relu(name='relu3') \
                    .MaxPooling('pool5', 3, stride=2) \
                    .FullyConnected('fc6', 512) \
                    .tf.nn.relu(name='relu6') \
                    .tf.nn.dropout(keep_prob) \
                    .FullyConnected('fc7', 512) \
                    .tf.nn.relu(name='relu7') \
                    .tf.nn.dropout(keep_prob) \
                    .FullyConnected('fc8', out_dim=8, nl=tf.identity)()
        prob = tf.nn.softmax(logits, name='prob')

        #cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.nn.softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbf.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_cost = tf.mul(0.004,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        add_moving_summary(cost, wd_cost)

        add_param_summary([('.*/W', ['histogram'])])  # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #5
0
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training
        keep_prob = tf.constant(0.5 if is_training else 1.0)

        if is_training:
            tf.summary.image("train_image", image, 10)
        if tf.test.is_gpu_available():
            image = tf.transpose(image, [0, 3, 1, 2])
            data_format = 'NCHW'
        else:
            data_format = 'NHWC'

        image = image / 4.0  # just to make range smaller
        with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3), \
                argscope([Conv2D, MaxPooling, BatchNorm], data_format=data_format):
            logits = LinearWrap(image) \
                .Conv2D('conv1.1', out_channel=64) \
                .Conv2D('conv1.2', out_channel=64) \
                .MaxPooling('pool1', 3, stride=2, padding='SAME') \
                .Conv2D('conv2.1', out_channel=128) \
                .Conv2D('conv2.2', out_channel=128) \
                .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                .Conv2D('conv3.1', out_channel=128, padding='VALID') \
                .Conv2D('conv3.2', out_channel=128, padding='VALID') \
                .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
                .tf.nn.dropout(keep_prob) \
                .FullyConnected('fc1', 512, nl=tf.nn.relu) \
                .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label)
        accuracy = symbf.accuracy(logits, label, name='accuracy')

        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'), accuracy)

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W',
                                  l2_regularizer(4e-4),
                                  name='regularize_loss')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))  # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #6
0
    def _build_graph(self, input_vars, is_training):
        image, label = input_vars
        keep_prob = tf.constant(0.5 if is_training else 1.0)

        if is_training:
            tf.image_summary("train_image", image, 10)

        image = image / 4.0  # just to make range smaller
        with argscope(Conv2D,
                      nl=BNReLU(is_training),
                      use_bias=False,
                      kernel_shape=3):
            logits = LinearWrap(image) \
                    .Conv2D('conv1.1', out_channel=64) \
                    .Conv2D('conv1.2', out_channel=64) \
                    .MaxPooling('pool1', 3, stride=2, padding='SAME') \
                    .Conv2D('conv2.1', out_channel=128) \
                    .Conv2D('conv2.2', out_channel=128) \
                    .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                    .Conv2D('conv3.1', out_channel=128, padding='VALID') \
                    .Conv2D('conv3.2', out_channel=128, padding='VALID') \
                    .FullyConnected('fc0', 1024 + 512,
                           b_init=tf.constant_initializer(0.1)) \
                    .tf.nn.dropout(keep_prob) \
                    .FullyConnected('fc1', 512,
                           b_init=tf.constant_initializer(0.1)) \
                    .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)

        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbf.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY,
                             tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_cost = tf.mul(0.004,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)

        add_param_summary([('.*/W', ['histogram'])])  # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
    def _build_graph(self, input_vars, is_training):
        image, label = input_vars
        keep_prob = tf.constant(0.5 if is_training else 1.0)

        if is_training:
            tf.image_summary("train_image", image, 10)

        image = image / 4.0     # just to make range smaller
        with argscope(Conv2D, nl=BNReLU(is_training), use_bias=False, kernel_shape=3):
            l = Conv2D('conv1.1', image, out_channel=64)
            l = Conv2D('conv1.2', l, out_channel=64)
            l = MaxPooling('pool1', l, 3, stride=2, padding='SAME')

            l = Conv2D('conv2.1', l, out_channel=128)
            l = Conv2D('conv2.2', l, out_channel=128)
            l = MaxPooling('pool2', l, 3, stride=2, padding='SAME')

            l = Conv2D('conv3.1', l, out_channel=128, padding='VALID')
            l = Conv2D('conv3.2', l, out_channel=128, padding='VALID')
        l = FullyConnected('fc0', l, 1024 + 512,
                           b_init=tf.constant_initializer(0.1))
        l = tf.nn.dropout(l, keep_prob)
        l = FullyConnected('fc1', l, 512,
                           b_init=tf.constant_initializer(0.1))
        # fc will have activation summary by default. disable for the output layer
        logits = FullyConnected('linear', l, out_dim=self.cifar_classnum, nl=tf.identity)

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)

        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbf.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
        tf.add_to_collection(
            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_cost = tf.mul(0.004,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)

        add_param_summary([('.*/W', ['histogram'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #8
0
    def _build_graph(self, inputs):

        image, label = inputs
        image = tf.expand_dims(image * 2 - 1, 3)

        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            c0 = Conv2D('conv0', image)
            p0 = MaxPooling('pool0', c0, 2)
            c1 = Conv2D('conv1', p0)
            c2 = Conv2D('conv2', c1)
            p1 = MaxPooling('pool1', c2, 2)
            c3 = Conv2D('conv3', p1)
            fc1 = FullyConnected('fc0', c3, 512, nl=tf.nn.relu)
            fc1 = Dropout('dropout', fc1, 0.5)
            logits = FullyConnected('fc1', fc1, out_dim=10, nl=tf.identity)

        with tf.name_scope('visualizations'):
            visualize_conv_weights(c0.variables.W, 'conv0')
            visualize_conv_activations(c0, 'conv0')
            visualize_conv_weights(c1.variables.W, 'conv1')
            visualize_conv_activations(c1, 'conv1')
            visualize_conv_weights(c2.variables.W, 'conv2')
            visualize_conv_activations(c2, 'conv2')
            visualize_conv_weights(c3.variables.W, 'conv3')
            visualize_conv_activations(c3, 'conv3')

            tf.summary.image('input', (image + 1.0) * 128., 3)

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label, name='incorrect')
        accuracy = symbf.accuracy(logits, label)

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost, accuracy)

        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
Example #9
0
    def _build_graph(self, input_vars):
        image, label = input_vars
        is_training = get_current_tower_context().is_training
        keep_prob = tf.constant(0.5 if is_training else 1.0)

        if is_training:
            tf.image_summary("train_image", image, 10)

        image = image / 4.0     # just to make range smaller
        with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3):
            logits = LinearWrap(image) \
                    .Conv2D('conv1.1', out_channel=64) \
                    .Conv2D('conv1.2', out_channel=64) \
                    .MaxPooling('pool1', 3, stride=2, padding='SAME') \
                    .Conv2D('conv2.1', out_channel=128) \
                    .Conv2D('conv2.2', out_channel=128) \
                    .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                    .Conv2D('conv3.1', out_channel=128, padding='VALID') \
                    .Conv2D('conv3.2', out_channel=128, padding='VALID') \
                    .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
                    .tf.nn.dropout(keep_prob) \
                    .FullyConnected('fc1', 512, nl=tf.nn.relu) \
                    .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_cost = tf.mul(0.0004,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        add_moving_summary(cost, wd_cost)

        add_param_summary([('.*/W', ['histogram'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #10
0
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training
        keep_prob = tf.constant(0.5 if is_training else 1.0)

        if is_training:
            tf.summary.image("train_image", image, 10)
        image = tf.transpose(image, [0, 3, 1, 2])

        image = image / 4.0     # just to make range smaller
        with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3), \
                argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'):
            logits = LinearWrap(image) \
                .Conv2D('conv1.1', out_channel=64) \
                .Conv2D('conv1.2', out_channel=64) \
                .MaxPooling('pool1', 3, stride=2, padding='SAME') \
                .Conv2D('conv2.1', out_channel=128) \
                .Conv2D('conv2.2', out_channel=128) \
                .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                .Conv2D('conv3.1', out_channel=128, padding='VALID') \
                .Conv2D('conv3.2', out_channel=128, padding='VALID') \
                .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
                .tf.nn.dropout(keep_prob) \
                .FullyConnected('fc1', 512, nl=tf.nn.relu) \
                .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(4e-4), name='regularize_loss')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #11
0
    def _build_graph(self, inputs):
        image, label = inputs

        image = image / 128.0 - 1

        with argscope(Conv2D, nl=BNReLU, use_bias=False):
            logits = (LinearWrap(image).Conv2D(
                'conv1', 24, 5,
                padding='VALID').MaxPooling('pool1', 2, padding='SAME').Conv2D(
                    'conv2', 32, 3, padding='VALID').Conv2D(
                        'conv3', 32, 3, padding='VALID').MaxPooling(
                            'pool2', 2, padding='SAME').Conv2D(
                                'conv4', 64, 3, padding='VALID').Dropout(
                                    'drop', 0.5).FullyConnected(
                                        'fc0',
                                        512,
                                        b_init=tf.constant_initializer(0.1),
                                        nl=tf.nn.relu).FullyConnected(
                                            'linear',
                                            out_dim=10,
                                            nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')

        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram', 'rms']))  # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
Example #12
0
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False, nl=tf.identity):
            logits = (
                LinearWrap(image).Conv2D('conv0',
                                         48,
                                         5,
                                         padding='VALID',
                                         use_bias=True).MaxPooling(
                                             'pool0', 2,
                                             padding='SAME').apply(activate)
                # 18
                .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm(
                    'bn1').apply(activate).Conv2D(
                        'conv2', 64, 3,
                        padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling(
                            'pool1', 2, padding='SAME').apply(activate)
                # 9
                .Conv2D(
                    'conv3', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn3').apply(activate)
                # 7
                .Conv2D('conv4', 128, 3, padding='SAME').apply(fg).
                BatchNorm('bn4').apply(activate).Conv2D(
                    'conv5', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn5').apply(activate)
                # 5
                .tf.nn.dropout(0.5 if is_training else 1.0).Conv2D(
                    'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm(
                        'bn6').apply(cabs).FullyConnected('fc1',
                                                          10,
                                                          nl=tf.identity)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
Example #13
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 255.0   # ?

        def proj_kk(l, k, ch_r, ch, stride=1):
            l = Conv2D('conv{0}{0}r'.format(k), l, ch_r, 1)
            return Conv2D('conv{0}{0}'.format(k), l, ch, k, stride=stride,
                          padding='VALID' if stride > 1 else 'SAME')

        def proj_233(l, ch_r, ch, stride=1):
            l = Conv2D('conv233r', l, ch_r, 1)
            l = Conv2D('conv233a', l, ch, 3)
            return Conv2D('conv233b', l, ch, 3, stride=stride,
                          padding='VALID' if stride > 1 else 'SAME')

        def pool_proj(l, ch, pool_type):
            if pool_type == 'max':
                l = MaxPooling('maxpool', l, 3, 1)
            else:
                l = AvgPooling('maxpool', l, 3, 1, padding='SAME')
            return Conv2D('poolproj', l, ch, 1)

        def proj_77(l, ch_r, ch):
            return (LinearWrap(l)
                    .Conv2D('conv77r', ch_r, 1)
                    .Conv2D('conv77a', ch_r, [1, 7])
                    .Conv2D('conv77b', ch, [7, 1])())

        def proj_277(l, ch_r, ch):
            return (LinearWrap(l)
                    .Conv2D('conv277r', ch_r, 1)
                    .Conv2D('conv277aa', ch_r, [7, 1])
                    .Conv2D('conv277ab', ch_r, [1, 7])
                    .Conv2D('conv277ba', ch_r, [7, 1])
                    .Conv2D('conv277bb', ch, [1, 7])())

        with argscope(Conv2D, nl=BNReLU, use_bias=False),\
                argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
            l = (LinearWrap(image)
                 .Conv2D('conv0', 32, 3, stride=2, padding='VALID')  # 299
                 .Conv2D('conv1', 32, 3, padding='VALID')  # 149
                 .Conv2D('conv2', 64, 3, padding='SAME')  # 147
                 .MaxPooling('pool2', 3, 2)
                 .Conv2D('conv3', 80, 1, padding='SAME')  # 73
                 .Conv2D('conv4', 192, 3, padding='VALID')  # 71
                 .MaxPooling('pool4', 3, 2)())  # 35

            with tf.variable_scope('incep-35-256a'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 32, 'avg')
                ], 3, name='concat')
            with tf.variable_scope('incep-35-288a'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 64, 'avg')
                ], 3, name='concat')
            with tf.variable_scope('incep-35-288b'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 64, 'avg')
                ], 3, name='concat')
            # 35x35x288
            with tf.variable_scope('incep-17-768a'):
                l = tf.concat([
                    Conv2D('conv3x3', l, 384, 3, stride=2, padding='VALID'),
                    proj_233(l, 64, 96, stride=2),
                    MaxPooling('maxpool', l, 3, 2)
                ], 3, name='concat')
            with tf.variable_scope('incep-17-768b'):
                l = tf.concat([
                    Conv2D('conv11', l, 192, 1),
                    proj_77(l, 128, 192),
                    proj_277(l, 128, 192),
                    pool_proj(l, 192, 'avg')
                ], 3, name='concat')
            for x in ['c', 'd']:
                with tf.variable_scope('incep-17-768{}'.format(x)):
                    l = tf.concat([
                        Conv2D('conv11', l, 192, 1),
                        proj_77(l, 160, 192),
                        proj_277(l, 160, 192),
                        pool_proj(l, 192, 'avg')
                    ], 3, name='concat')
            with tf.variable_scope('incep-17-768e'):
                l = tf.concat([
                    Conv2D('conv11', l, 192, 1),
                    proj_77(l, 192, 192),
                    proj_277(l, 192, 192),
                    pool_proj(l, 192, 'avg')
                ], 3, name='concat')
            # 17x17x768

            with tf.variable_scope('br1'):
                br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID')
                br1 = Conv2D('conv11', br1, 128, 1)
                shape = br1.get_shape().as_list()
                br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID')
                br1 = FullyConnected('fc', br1, 1000, nl=tf.identity)

            with tf.variable_scope('incep-17-1280a'):
                l = tf.concat([
                    proj_kk(l, 3, 192, 320, stride=2),
                    Conv2D('conv73', proj_77(l, 192, 192), 192, 3, stride=2, padding='VALID'),
                    MaxPooling('maxpool', l, 3, 2)
                ], 3, name='concat')
            for x in ['a', 'b']:
                with tf.variable_scope('incep-8-2048{}'.format(x)):
                    br11 = Conv2D('conv11', l, 320, 1)
                    br33 = Conv2D('conv133r', l, 384, 1)
                    br33 = tf.concat([
                        Conv2D('conv133a', br33, 384, [1, 3]),
                        Conv2D('conv133b', br33, 384, [3, 1])
                    ], 3, name='conv133')

                    br233 = proj_kk(l, 3, 448, 384)
                    br233 = tf.concat([
                        Conv2D('conv233a', br233, 384, [1, 3]),
                        Conv2D('conv233b', br233, 384, [3, 1]),
                    ], 3, name='conv233')

                    l = tf.concat([
                        br11, br33, br233,
                        pool_proj(l, 192, 'avg')
                    ], 3, name='concat')

            l = GlobalAvgPooling('gap', l)
            # 1x1x2048
            l = Dropout('drop', l, 0.8)
            logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)

        loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label)
        loss1 = tf.reduce_mean(loss1, name='loss1')

        loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss2 = tf.reduce_mean(loss2, name='loss2')

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.00004, get_global_step_var(),
                                          80000, 0.7, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')

        self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
        add_moving_summary(loss1, loss2, wd_cost, self.cost)
Example #14
0
    def _build_graph(self, inputs):
        image, label = inputs

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        old_get_variable = tf.get_variable

        def monitor(x, name):
            if MONITOR == 1:
                return tf.Print(x, [x],
                                message='\n\n' + name + ': ',
                                summarize=1000,
                                name=name)
            else:
                return x

        def new_get_variable(v):
            name = v.op.name
            if not name.endswith('W') or 'conv1_1' in name or 'fc8' in name:
                return v
            else:
                logger.info("Quantizing weight {}".format(v.op.name))
                if MONITOR == 1:
                    return tf.Print(fw(v), [fw(v)],
                                    message='\n\n' + v.name +
                                    ', Quantized weights are:',
                                    summarize=100)
                else:
                    return fw(v)

        def bn_activate(name, x):
            X = BatchNorm(name, x)
            x = monitor(x, name + '_noact_out')
            return activate(x)

        def activate(x):
            if BITA == 32:
                return tf.nn.relu(x)
            else:
                return fa(tf.nn.relu(x))

        # VGG 16
        with remap_variables(new_get_variable), \
             argscope(Conv2D, kernel_shape=3, use_bias=False, nl = tf.identity):
            logits = (
                LinearWrap(image).apply(monitor, 'image_out').Conv2D(
                    'conv1_1',
                    64).apply(fg).BatchNorm('bn1_1').apply(activate).apply(
                        monitor, 'conv1_1_out').Conv2D('conv1_2', 64).apply(
                            fg).BatchNorm('bn1_2').apply(activate).apply(
                                monitor,
                                'conv1_2_out').MaxPooling('pool1', 2).apply(
                                    monitor, 'pool1_out')
                # 112
                .Conv2D(
                    'conv2_1',
                    128).apply(fg).BatchNorm('bn2_1').apply(activate).apply(
                        monitor, 'conv2_1_out').Conv2D('conv2_2', 128).apply(
                            fg).BatchNorm('bn2_2').apply(activate).apply(
                                monitor, 'conv2_2_out').MaxPooling(
                                    'pool2', 2).apply(monitor, 'pool2_out')
                # 56
                .Conv2D(
                    'conv3_1',
                    256).apply(fg).BatchNorm('bn3_1').apply(activate).apply(
                        monitor, 'conv3_1_out').Conv2D(
                            'conv3_2', 256).apply(fg).BatchNorm('bn3_2').
                apply(activate).apply(monitor, 'conv3_2_out').Conv2D(
                    'conv3_3',
                    256).apply(fg).BatchNorm('bn3_3').apply(activate).apply(
                        monitor, 'conv3_3_out').MaxPooling('pool3', 2).apply(
                            monitor, 'pool3_out')
                # 28
                .Conv2D(
                    'conv4_1',
                    512).apply(fg).BatchNorm('bn4_1').apply(activate).apply(
                        monitor, 'conv4_1_out').Conv2D(
                            'conv4_2', 512).apply(fg).BatchNorm('bn4_2').
                apply(activate).apply(monitor, 'conv4_2_out').Conv2D(
                    'conv4_3',
                    512).apply(fg).BatchNorm('bn4_3').apply(activate).apply(
                        monitor, 'conv4_3_out').MaxPooling('pool4', 2).apply(
                            monitor, 'pool4_out')
                # 14
                .Conv2D(
                    'conv5_1',
                    512).apply(fg).BatchNorm('bn5_1').apply(activate).apply(
                        monitor, 'conv5_1_out').Conv2D(
                            'conv5_2', 512).apply(fg).BatchNorm('bn5_2').
                apply(activate).apply(monitor, 'conv5_2_out').Conv2D(
                    'conv5_3',
                    512).apply(fg).BatchNorm('bn5_3').apply(activate).apply(
                        monitor, 'conv5_3_out').MaxPooling('pool5', 2).apply(
                            monitor, 'pool5_out').FullyConnected(
                                'fc6', use_bias=False,
                                out_dim=512).apply(activate).apply(
                                    monitor, 'fc6_out').FullyConnected(
                                        'fc7', use_bias=False,
                                        out_dim=512).apply(activate).apply(
                                            monitor, 'fc7_out').FullyConnected(
                                                'fc8',
                                                use_bias=False,
                                                out_dim=self.cifar_classnum,
                                                nl=tf.identity).apply(
                                                    monitor, 'fc8_out')())

        prob = tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label, name='incorrect')
        accuracy = symbf.accuracy(logits, label, name='accuracy')

        train_error = tf.reduce_mean(wrong, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)
Example #15
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 255.0  # ?

        def proj_kk(l, k, ch_r, ch, stride=1):
            l = Conv2D('conv{0}{0}r'.format(k), l, ch_r, 1)
            return Conv2D('conv{0}{0}'.format(k),
                          l,
                          ch,
                          k,
                          stride=stride,
                          padding='VALID' if stride > 1 else 'SAME')

        def proj_233(l, ch_r, ch, stride=1):
            l = Conv2D('conv233r', l, ch_r, 1)
            l = Conv2D('conv233a', l, ch, 3)
            return Conv2D('conv233b',
                          l,
                          ch,
                          3,
                          stride=stride,
                          padding='VALID' if stride > 1 else 'SAME')

        def pool_proj(l, ch, pool_type):
            if pool_type == 'max':
                l = MaxPooling('maxpool', l, 3, 1)
            else:
                l = AvgPooling('maxpool', l, 3, 1, padding='SAME')
            return Conv2D('poolproj', l, ch, 1)

        def proj_77(l, ch_r, ch):
            return (LinearWrap(l).Conv2D('conv77r', ch_r, 1).Conv2D(
                'conv77a', ch_r, [1, 7]).Conv2D('conv77b', ch, [7, 1])())

        def proj_277(l, ch_r, ch):
            return (LinearWrap(l).Conv2D('conv277r', ch_r, 1).Conv2D(
                'conv277aa', ch_r,
                [7, 1]).Conv2D('conv277ab', ch_r, [1, 7]).Conv2D(
                    'conv277ba', ch_r, [7, 1]).Conv2D('conv277bb', ch,
                                                      [1, 7])())

        with argscope(Conv2D, nl=BNReLU, use_bias=False),\
                argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
            l = (
                LinearWrap(image).Conv2D('conv0',
                                         32,
                                         3,
                                         stride=2,
                                         padding='VALID')  # 299
                .Conv2D('conv1', 32, 3, padding='VALID')  # 149
                .Conv2D('conv2', 64, 3, padding='SAME')  # 147
                .MaxPooling('pool2', 3, 2).Conv2D('conv3',
                                                  80,
                                                  1,
                                                  padding='SAME')  # 73
                .Conv2D('conv4', 192, 3, padding='VALID')  # 71
                .MaxPooling('pool4', 3, 2)())  # 35

            with tf.variable_scope('incep-35-256a'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 32, 'avg')
                ],
                              3,
                              name='concat')
            with tf.variable_scope('incep-35-288a'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 64, 'avg')
                ],
                              3,
                              name='concat')
            with tf.variable_scope('incep-35-288b'):
                l = tf.concat([
                    Conv2D('conv11', l, 64, 1),
                    proj_kk(l, 5, 48, 64),
                    proj_233(l, 64, 96),
                    pool_proj(l, 64, 'avg')
                ],
                              3,
                              name='concat')
            # 35x35x288
            with tf.variable_scope('incep-17-768a'):
                l = tf.concat([
                    Conv2D('conv3x3', l, 384, 3, stride=2, padding='VALID'),
                    proj_233(l, 64, 96, stride=2),
                    MaxPooling('maxpool', l, 3, 2)
                ],
                              3,
                              name='concat')
            with tf.variable_scope('incep-17-768b'):
                l = tf.concat([
                    Conv2D('conv11', l, 192, 1),
                    proj_77(l, 128, 192),
                    proj_277(l, 128, 192),
                    pool_proj(l, 192, 'avg')
                ],
                              3,
                              name='concat')
            for x in ['c', 'd']:
                with tf.variable_scope('incep-17-768{}'.format(x)):
                    l = tf.concat([
                        Conv2D('conv11', l, 192, 1),
                        proj_77(l, 160, 192),
                        proj_277(l, 160, 192),
                        pool_proj(l, 192, 'avg')
                    ],
                                  3,
                                  name='concat')
            with tf.variable_scope('incep-17-768e'):
                l = tf.concat([
                    Conv2D('conv11', l, 192, 1),
                    proj_77(l, 192, 192),
                    proj_277(l, 192, 192),
                    pool_proj(l, 192, 'avg')
                ],
                              3,
                              name='concat')
            # 17x17x768

            with tf.variable_scope('br1'):
                br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID')
                br1 = Conv2D('conv11', br1, 128, 1)
                shape = br1.get_shape().as_list()
                br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID')
                br1 = FullyConnected('fc', br1, 1000, nl=tf.identity)

            with tf.variable_scope('incep-17-1280a'):
                l = tf.concat([
                    proj_kk(l, 3, 192, 320, stride=2),
                    Conv2D('conv73',
                           proj_77(l, 192, 192),
                           192,
                           3,
                           stride=2,
                           padding='VALID'),
                    MaxPooling('maxpool', l, 3, 2)
                ],
                              3,
                              name='concat')
            for x in ['a', 'b']:
                with tf.variable_scope('incep-8-2048{}'.format(x)):
                    br11 = Conv2D('conv11', l, 320, 1)
                    br33 = Conv2D('conv133r', l, 384, 1)
                    br33 = tf.concat([
                        Conv2D('conv133a', br33, 384, [1, 3]),
                        Conv2D('conv133b', br33, 384, [3, 1])
                    ],
                                     3,
                                     name='conv133')

                    br233 = proj_kk(l, 3, 448, 384)
                    br233 = tf.concat([
                        Conv2D('conv233a', br233, 384, [1, 3]),
                        Conv2D('conv233b', br233, 384, [3, 1]),
                    ],
                                      3,
                                      name='conv233')

                    l = tf.concat(
                        [br11, br33, br233,
                         pool_proj(l, 192, 'avg')],
                        3,
                        name='concat')

            l = GlobalAvgPooling('gap', l)
            # 1x1x2048
            l = Dropout('drop', l, 0.8)
            logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)

        loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1,
                                                               labels=label)
        loss1 = tf.reduce_mean(loss1, name='loss1')

        loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=label)
        loss2 = tf.reduce_mean(loss2, name='loss2')

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.00004, get_global_step_var(),
                                          80000, 0.7, True)
        wd_cost = tf.multiply(wd_w,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='l2_regularize_loss')

        self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
        add_moving_summary(loss1, loss2, wd_cost, self.cost)
Example #16
0
    def build_graph(self, image, label):
        image = image / 255.0

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def new_get_variable(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fct' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)  # still use relu for 32bit cases
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        with remap_variables(new_get_variable), \
                argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False):
            logits = (LinearWrap(image).Conv2D(
                'conv0', 96, 12, strides=4,
                padding='VALID').apply(activate).Conv2D(
                    'conv1', 256, 5, padding='SAME',
                    split=2).apply(fg).BatchNorm('bn1').MaxPooling(
                        'pool1', 3, 2, padding='SAME').apply(activate).Conv2D(
                            'conv2', 384,
                            3).apply(fg).BatchNorm('bn2').MaxPooling(
                                'pool2', 3, 2,
                                padding='SAME').apply(activate).Conv2D(
                                    'conv3', 384, 3, split=2).apply(fg).
                      BatchNorm('bn3').apply(activate).Conv2D(
                          'conv4', 256, 3,
                          split=2).apply(fg).BatchNorm('bn4').MaxPooling(
                              'pool4', 3, 2,
                              padding='VALID').apply(activate).FullyConnected(
                                  'fc0', 4096).apply(fg).BatchNorm('bnfc0').
                      apply(activate).FullyConnected(
                          'fc1', 4096,
                          use_bias=False).apply(fg).BatchNorm('bnfc1').apply(
                              nonlin).FullyConnected('fct',
                                                     1000,
                                                     use_bias=True)())

        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W',
                                  l2_regularizer(5e-6),
                                  name='regularize_cost')

        add_param_summary(('.*/W', ['histogram', 'rms']))
        total_cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, total_cost)
        return total_cost
Example #17
0
    def _build_graph(self, inputs):
        inp, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa = get_dorefa(self.bitw, self.bita)

        def binarize_weight(v):
            name = v.op.name
            if not (name.endswith('W') or name.endswith('b')):
                logger.info("Not quantizing {}".format(name))
                return v
            elif not self.quant_ends and 'conv0' in name:
                logger.info("Not quantizing {}".format(name))
                return v
            elif not self.quant_ends and 'last_linear' in name:
                logger.info("Not quantizing {}".format(name))
                return v
            elif not self.quant_ends and (self.net_fn == fcn1_net or self.net_fn == fcn2_net) and 'linear0' in name:
                logger.info("Not quantizing {}".format(name))
                return v
            else:
                logger.info("Quantizing weight {}".format(name))
                return fw(v)

        def nonlin(x, name="activate"):
            if self.bita == 32:
                return fa(tf.nn.relu(BNWithTrackedMults(x)))
            else:
                return fa(tf.clip_by_value(BNWithTrackedMults(x), 0.0, 1.0))

        with remap_variables(binarize_weight), \
                argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([Conv2DWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4):
            l = self.net_fn(inp, nonlin, self.n_context)
            logits = FullyConnectedWithTrackedMults('last_linear', l, out_dim=self.n_spks, nl=tf.identity)

        prob = tf.nn.softmax(logits, name='output')

        # used for validation accuracy of utterance
        identity_guesses = flatten(tf.argmax(prob, axis=1))
        uniq_identities, _, count = tf.unique_with_counts(identity_guesses)
        idx_to_identity_with_most_votes = tf.argmax(count)
        chosen_identity = tf.gather(uniq_identities, idx_to_identity_with_most_votes)
        wrong = tf.expand_dims(tf.not_equal(chosen_identity, tf.cast(label[0], tf.int64)), axis=0, name='utt-wrong')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        add_moving_summary(cost)

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        with tf.name_scope('original-weight-summaries'):
            add_param_summary(('.*/W', ['rms', 'histogram']))
            add_param_summary(('.*/b', ['rms', 'histogram']))

        with tf.name_scope('activation-summaries'):
            def fn(name):
                return (name.endswith('output') or name.endswith('output:0')) and "Inference" not in name and 'quantized' not in name
            tensors = get_tensors_from_graph(tf.get_default_graph(), fn) 
            logger.info("Adding activation tensors to summary: {}".format(tensors))
            for tensor in tensors:
                add_tensor_summary(tensor, ['rms', 'histogram'])

        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
        add_moving_summary(wd_cost)
        self.cost = tf.add_n([cost, wd_cost], name='cost')

        tf.constant([self.network_complexity['mults']], name='TotalMults')
        tf.constant([self.network_complexity['weights']], name='TotalWeights')
        logger.info("Parameter count: {}".format(self.network_complexity))
Example #18
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 128.0

        def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype):
            stride = 2 if nr1x1 == 0 else 1
            with tf.variable_scope(name):
                outs = []
                if nr1x1 != 0:
                    outs.append(Conv2D('conv1x1', x, nr1x1, 1))
                x2 = Conv2D('conv3x3r', x, nr3x3r, 1)
                outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride))

                x3 = Conv2D('conv233r', x, nr233r, 1)
                x3 = Conv2D('conv233a', x3, nr233, 3)
                outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride))

                if pooltype == 'max':
                    x4 = MaxPooling('mpool', x, 3, stride, padding='SAME')
                else:
                    assert pooltype == 'avg'
                    x4 = AvgPooling('apool', x, 3, stride, padding='SAME')
                if nrpool != 0:  # pool + passthrough if nrpool == 0
                    x4 = Conv2D('poolproj', x4, nrpool, 1)
                outs.append(x4)
                return tf.concat(outs, 3, name='concat')

        with argscope(Conv2D, nl=BNReLU, use_bias=False):
            l = (LinearWrap(image)
                 .Conv2D('conv0', 64, 7, stride=2)
                 .MaxPooling('pool0', 3, 2, padding='SAME')
                 .Conv2D('conv1', 64, 1)
                 .Conv2D('conv2', 192, 3)
                 .MaxPooling('pool2', 3, 2, padding='SAME')())
            # 28
            l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg')
            l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg')
            l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max')

            br1 = (LinearWrap(l)
                   .Conv2D('loss1conv', 128, 1)
                   .FullyConnected('loss1fc', 1024, nl=tf.nn.relu)
                   .FullyConnected('loss1logit', 1000, nl=tf.identity)())
            loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label)
            loss1 = tf.reduce_mean(loss1, name='loss1')

            # 14
            l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg')
            l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg')
            l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg')
            l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg')
            l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max')

            br2 = Conv2D('loss2conv', l, 128, 1)
            br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu)
            br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity)
            loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br2, labels=label)
            loss2 = tf.reduce_mean(loss2, name='loss2')

            # 7
            l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg')
            l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max')
            l = GlobalAvgPooling('gap', l)

            logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)
        tf.nn.softmax(logits, name='output')
        loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss3 = tf.reduce_mean(loss3, name='loss3')

        cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost')
        add_moving_summary([cost, loss1, loss2, loss3])

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          80000, 0.7, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')

        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(wd_cost, self.cost)
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False, nl=tf.identity):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', 48, 5, padding='VALID', use_bias=True)
                      .MaxPooling('pool0', 2, padding='SAME')
                      .apply(activate)
                      # 18
                      .Conv2D('conv1', 64, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn1').apply(activate)

                      .Conv2D('conv2', 64, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool1', 2, padding='SAME')
                      .apply(activate)
                      # 9
                      .Conv2D('conv3', 128, 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn3').apply(activate)
                      # 7

                      .Conv2D('conv4', 128, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn4').apply(activate)

                      .Conv2D('conv5', 128, 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn5').apply(activate)
                      # 5
                      .tf.nn.dropout(0.5 if is_training else 1.0)
                      .Conv2D('conv6', 512, 5, padding='VALID')
                      .apply(fg).BatchNorm('bn6')
                      .apply(cabs)
                      .FullyConnected('fc1', 10, nl=tf.identity)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
Example #20
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 128.0

        def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype):
            stride = 2 if nr1x1 == 0 else 1
            with tf.variable_scope(name):
                outs = []
                if nr1x1 != 0:
                    outs.append(Conv2D('conv1x1', x, nr1x1, 1))
                x2 = Conv2D('conv3x3r', x, nr3x3r, 1)
                outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride))

                x3 = Conv2D('conv233r', x, nr233r, 1)
                x3 = Conv2D('conv233a', x3, nr233, 3)
                outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride))

                if pooltype == 'max':
                    x4 = MaxPooling('mpool', x, 3, stride, padding='SAME')
                else:
                    assert pooltype == 'avg'
                    x4 = AvgPooling('apool', x, 3, stride, padding='SAME')
                if nrpool != 0:  # pool + passthrough if nrpool == 0
                    x4 = Conv2D('poolproj', x4, nrpool, 1)
                outs.append(x4)
                return tf.concat(outs, 3, name='concat')

        with argscope(Conv2D, nl=BNReLU, use_bias=False):
            l = (LinearWrap(image)
                 .Conv2D('conv0', 64, 7, stride=2)
                 .MaxPooling('pool0', 3, 2, padding='SAME')
                 .Conv2D('conv1', 64, 1)
                 .Conv2D('conv2', 192, 3)
                 .MaxPooling('pool2', 3, 2, padding='SAME')())
            # 28
            l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg')
            l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg')
            l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max')

            br1 = (LinearWrap(l)
                   .Conv2D('loss1conv', 128, 1)
                   .FullyConnected('loss1fc', 1024, nl=tf.nn.relu)
                   .FullyConnected('loss1logit', 1000, nl=tf.identity)())
            loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label)
            loss1 = tf.reduce_mean(loss1, name='loss1')

            # 14
            l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg')
            l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg')
            l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg')
            l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg')
            l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max')

            br2 = Conv2D('loss2conv', l, 128, 1)
            br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu)
            br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity)
            loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br2, labels=label)
            loss2 = tf.reduce_mean(loss2, name='loss2')

            # 7
            l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg')
            l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max')
            l = GlobalAvgPooling('gap', l)

            logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)
        tf.nn.softmax(logits, name='output')
        loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss3 = tf.reduce_mean(loss3, name='loss3')

        cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost')
        add_moving_summary([cost, loss1, loss2, loss3])

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          80000, 0.7, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')

        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(wd_cost, self.cost)
Example #21
0
    def _build_graph(self, inputs):
        image, label = inputs
        """Add a single channel here"""
        image = tf.expand_dims(image, 3)

        image = image * 256
        image = tf.round(image)

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        old_get_variable = tf.get_variable

        def monitor(x, name):
            if MONITOR == 1:
                return tf.Print(x, [x],
                                message='\n\n' + name + ': ',
                                summarize=1000,
                                name=name)
            else:
                return x

        def new_get_variable(v):
            name = v.op.name
            if not name.endswith('W') or 'conv0' in name or 'fc1' in name:
                return v
            else:
                logger.info("Quantizing weight {}".format(v.op.name))
                if MONITOR == 1:
                    return tf.Print(fw(v), [fw(v)],
                                    message='\n\n' + v.name +
                                    ', Quantized weights are:',
                                    summarize=100)
                else:
                    return fw(v)

        def activate(x):
            if BITA == 32:
                return tf.nn.relu(x)
            else:
                return fa(tf.nn.relu(x))

        with remap_variables(new_get_variable), \
             argscope(Conv2D, kernel_shape=3, use_bias=False, nl=tf.identity, out_channel=32):
            logits = (LinearWrap(image).apply(monitor, 'image_out').Conv2D(
                'conv0').apply(fg).BatchNorm('bn0').apply(activate).apply(
                    monitor, 'conv0_out').MaxPooling('pool0', 2).apply(
                        monitor, 'pool0_out').Conv2D('conv1').apply(
                            fg).BatchNorm('bn1').apply(activate).apply(
                                monitor, 'conv1_out').Conv2D('conv2').apply(
                                    fg).BatchNorm('bn2').apply(activate).apply(
                                        monitor, 'conv2_out').MaxPooling(
                                            'pool1', 2).apply(
                                                monitor,
                                                'pool1_out').Conv2D('conv3').
                      apply(fg).BatchNorm('bn3').apply(activate).apply(
                          monitor, 'conv3_out').FullyConnected(
                              'fc0',
                              use_bias=False,
                              out_dim=20,
                              nl=tf.identity).apply(activate).apply(
                                  monitor, 'fc0_out').FullyConnected(
                                      'fc1',
                                      use_bias=False,
                                      out_dim=10,
                                      nl=tf.identity).apply(
                                          monitor, 'fc1_out')())

        prob = tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label, name='incorrect')
        accuracy = symbf.accuracy(logits, label, name='accuracy')

        train_error = tf.reduce_mean(wrong, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)
Example #22
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 255.0

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def new_get_variable(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fct' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)    # still use relu for 32bit cases
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        with remap_variables(new_get_variable), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', 96, 12, stride=4, padding='VALID')
                      .apply(activate)
                      .Conv2D('conv1', 256, 5, padding='SAME', split=2)
                      .apply(fg)
                      .BatchNorm('bn1')
                      .MaxPooling('pool1', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv2', 384, 3)
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool2', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv3', 384, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn3')
                      .apply(activate)

                      .Conv2D('conv4', 256, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn4')
                      .MaxPooling('pool4', 3, 2, padding='VALID')
                      .apply(activate)

                      .FullyConnected('fc0', 4096)
                      .apply(fg)
                      .BatchNorm('bnfc0')
                      .apply(activate)

                      .FullyConnected('fc1', 4096)
                      .apply(fg)
                      .BatchNorm('bnfc1')
                      .apply(nonlin)
                      .FullyConnected('fct', 1000, use_bias=True)())

        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost')

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
    def _build_graph(self, inputs):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)],
                       dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')  # p x 3

        image, label = inputs

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image).AvgPooling('downsample', 2).Conv2D(
                'conv0', 20, 5, padding='VALID').MaxPooling('pool0', 2).Conv2D(
                    'conv1', 20, 5, padding='VALID').FullyConnected(
                        'fc1', out_dim=32).FullyConnected(
                            'fct',
                            out_dim=6,
                            nl=tf.identity,
                            W_init=tf.constant_initializer(),
                            b_init=tf.constant_initializer(
                                [1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]),
                             [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3],
                                'sampled_coords')  # b h w 2
            sampled = ImageSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF],
                                        [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF],
                                        [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]],
                                 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]],
                                   1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]],
                                   1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1),
                             max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled).FullyConnected(
            'fc1', out_dim=256, nl=tf.nn.relu).FullyConnected(
                'fc2', out_dim=128,
                nl=tf.nn.relu).FullyConnected('fct',
                                              out_dim=19,
                                              nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label)
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        self.cost = tf.add_n([wd_cost, cost], name='cost')