Exemple #1
0
    def build_model(self, inputs, labels, is_training):
        # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling
        pad = int((self.image_size - self.height) / 2)
        inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]])

        # convolution with 11x11 kernel and stride 4 (new size: 55x55x96)
        self.network = ops.convolution(inputs, self.channels, 96, 11, 96, stride=4,
                                       padding='VALID', is_training=is_training, scope='conv1')

        # pooling with 3x3 kernel and stride 2 (new size: 27x27x96)
        self.network = ops.pooling(self.network, k_size=3, scope='pool1')

        # convolution with 5x5 kernel and stride 1 (new size: 27x27x256)
        self.network = ops.convolution(self.network, 96, 256, 5, 256,
                                       is_training=is_training, scope='conv2')

        # pooling with 3x3 kernel and stride 2 (new size: 13x13x256)
        self.network = ops.pooling(self.network, k_size=3, scope='pool2')

        # convolution with 3x3 kernel and stride 1 (new size: 13x13x384)
        self.network = ops.convolution(self.network, 256, 384, 3, 384, batch_norm=False,
                                       is_training=is_training, scope='conv3')

        # convolution with 3x3 kernel and stride 1 (new size: 13x13x384)
        self.network = ops.convolution(self.network, 384, 384, 3, 384, batch_norm=False,
                                       is_training=is_training, scope='conv4')

        # convolution with 3x3 kernel and stride 1 (new size: 13x13x256)
        self.network = ops.convolution(self.network, 384, 256, 3, 256, batch_norm=False,
                                       is_training=is_training, scope='conv5')

        # pooling with 3x3 kernel and stride 2 (new size: 6x6x256)
        self.network = ops.pooling(self.network, k_size=3, scope='pool3')

        # flatten (new size: 9216)
        self.network = ops.flatten(self.network, scope='flatten')

        # fully connected layer (new size: 4096)
        self.network = ops.dense(self.network, 9216, 4096, dropout=True, dropout_rate=0.2,
                                 is_training=is_training, scope='fc1')

        # fully connected layer (new size: 1024) -- Original Paper Size: 4096 (for ImageNet)
        self.network = ops.dense(self.network, 4096, 1024, dropout=True, dropout_rate=0.2,
                                 is_training=is_training, scope='fc2')

        # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet)
        self.network = ops.dense(self.network, 1024, 10, activation=None,
                                 is_training=is_training, scope='fc3')

        self.loss = ops.loss(self.network, labels, scope='loss')

        if is_training:
            self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update')
    def build_model(self, inputs, labels, is_training=False):
        self.network = ops.convolution(inputs,
                                       self.channels,
                                       50,
                                       5,
                                       50,
                                       is_training=is_training,
                                       scope='conv1')

        self.network = ops.pooling(self.network, scope='pool1')

        self.network = ops.convolution(self.network,
                                       50,
                                       20,
                                       5,
                                       20,
                                       is_training=is_training,
                                       scope='conv2')

        self.network = ops.pooling(self.network, scope='pool2')

        self.network = ops.flatten(self.network, scope='flatten')

        self.network = ops.dense(self.network,
                                 self.network.get_shape().as_list()[1],
                                 200,
                                 scope='fc1')

        self.network = ops.dense(self.network, 200, 50, scope='fc2')

        self.network = ops.dense(self.network,
                                 50,
                                 10,
                                 activation=None,
                                 scope='fc3')

        self.loss = ops.loss(self.network, labels, scope='loss')
        self.accuracy = ops.accuracy(self.network, labels, scope='accuracy')

        if is_training:
            self.optimizer = ops.optimize(self.loss,
                                          self.learning_rate,
                                          scope='update')
    def build_model(self, inputs, labels, is_training):
        pad = int((self.image_size - self.height) / 2)
        inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]])

        # convolution with 7x7 kernel and stride 2 (new size: 112x112x64)
        self.network = ops.convolution(inputs,
                                       self.channels,
                                       64,
                                       7,
                                       64,
                                       stride=2,
                                       is_training=is_training,
                                       scope='conv1')

        # pooling with 3x3 kernel and stride 2 (new size: 56x56x64)
        self.network = ops.pooling(self.network, k_size=3, scope='pool1')

        # convolution with 1x1 kernel and stride 1 (new size: 56x56x192)
        self.network = ops.convolution(self.network,
                                       64,
                                       192,
                                       1,
                                       192,
                                       batch_norm=False,
                                       is_training=is_training,
                                       scope='conv2')

        # convolution with 3x3 kernel and stride 1 (new size: 56x56x192)
        self.network = ops.convolution(self.network,
                                       192,
                                       192,
                                       3,
                                       192,
                                       is_training=is_training,
                                       scope='conv3')

        # pooling with 3x3 kernel and stride 2 (new size: 28x28x192)
        self.network = ops.pooling(self.network, k_size=3, scope='pool2')

        # inception module (3a)
        self.network = self.inception_module(self.network,
                                             [[64, 96, 16], [128, 32, 32]],
                                             scope='incept1')

        # inception module (3b)
        self.network = self.inception_module(self.network,
                                             [[128, 128, 32], [192, 96, 64]],
                                             final_pool=True,
                                             scope='incept' + str(i))

        # inception module (4a)
        self.network = self.inception_module(self.network,
                                             [[192, 96, 16], [208, 48, 64]],
                                             scope='incept' + str(i))

        # auxiliary classifier
        if is_training:
            aux_loss1 = self.aux_classifier(self.network,
                                            labels,
                                            512,
                                            is_training,
                                            scope='auxclass1')

        # inception module (4b)
        self.network = self.inception_module(self.network,
                                             [[160, 112, 24], [224, 64, 64]],
                                             scope='incept' + str(i))

        # inception module (4c)
        self.network = self.inception_module(self.network,
                                             [[128, 128, 24], [256, 64, 64]],
                                             scope='incept' + str(i))

        # inception module (4d)
        self.network = self.inception_module(self.network,
                                             [[112, 144, 32], [288, 64, 64]],
                                             scope='incept' + str(i))

        # auxiliary classifier
        if is_training:
            aux_loss2 = self.aux_classifier(self.network,
                                            labels,
                                            528,
                                            is_training,
                                            scope='auxclass2')

        # inception module (4e)
        self.network = self.inception_module(self.network,
                                             [[256, 160, 32], [320, 128, 128]],
                                             final_pool=True,
                                             scope='incept' + str(i))

        # inception module (5a)
        self.network = self.inception_module(self.network,
                                             [[256, 160, 32], [320, 128, 128]],
                                             scope='incept' + str(i))

        # inception module (5b)
        self.network = self.inception_module(self.network,
                                             [[384, 192, 48], [384, 128, 128]],
                                             scope='incept' + str(i))

        # pooling with 7x7 kernel and stride 1 (new size: 1x1x1024)
        with tf.variable_scope('final_pool', reuse=tf.AUTO_REUSE):
            self.network = tf.nn.avg_pool(self.network,
                                          7,
                                          1,
                                          'SAME',
                                          scope='pool')

        # flatten (new size: 1024)
        self.network = ops.flatten(self.network, scope='flatten')

        # fully connected layer (new size: 1024)
        self.network = ops.dense(self.network,
                                 1024,
                                 1024,
                                 dropout=True,
                                 dropout_rate=0.4,
                                 is_training=is_training,
                                 scope='fc1')

        # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet)
        self.network = ops.dense(self.network,
                                 1024,
                                 10,
                                 activation=None,
                                 is_training=is_training,
                                 scope='fc2')

        loss = ops.loss(self.network, labels, scope='loss')
        self.accuracy = ops.accuracy(self.network, labels, scope='accuracy')

        if is_training:  # if training use auxiliary classifiers as well
            self.loss = loss + aux_loss1 + aux_loss2
            self.optimizer = ops.optimize(self.loss,
                                          self.learning_rate,
                                          scope='update')
        else:
            self.loss = loss
    def build_model(self, inputs, labels, is_training):
        def res_block(inputs, in_channels, out_channels, is_training, idx):
            net = ops.convolution(inputs,
                                  in_channels[0],
                                  out_channels[0],
                                  1,
                                  out_channels[0],
                                  is_training=is_training,
                                  scope='res%s_conv1' % idx)

            net = ops.convolution(net,
                                  in_channels[1],
                                  out_channels[1],
                                  3,
                                  out_channels[1],
                                  is_training=is_training,
                                  scope='res%s_conv2' % idx)

            net = ops.convolution(net,
                                  in_channels[2],
                                  out_channels[2],
                                  1,
                                  out_channels[2],
                                  activation=None,
                                  is_training=is_training,
                                  scope='res%s_conv3' % idx)

            return tf.nn.relu(inputs + net, scope='res%s_relu' % idx)

        def res_conv_block(inputs, in_channel, out_channel, stride,
                           is_training, idx):
            skip = ops.convolution(inputs,
                                   in_channels[0],
                                   out_channels[2],
                                   1,
                                   out_channels[2],
                                   stride=stride,
                                   activation=None,
                                   is_training=is_training,
                                   scope='res%s_skip' % idx)

            net = ops.convolution(inputs,
                                  in_channels[0],
                                  out_channels[0],
                                  1,
                                  out_channels[0],
                                  is_training=is_training,
                                  scope='res%s_conv1' % idx)

            net = ops.convolution(net,
                                  in_channels[1],
                                  out_channels[1],
                                  3,
                                  out_channels[1],
                                  is_training=is_training,
                                  scope='res%s_conv2' % idx)

            net = ops.convolution(net,
                                  in_channels[2],
                                  out_channels[2],
                                  1,
                                  out_channels[2],
                                  stride=stride,
                                  activation=None,
                                  is_training=is_training,
                                  scope='res%s_conv3' % idx)

            return tf.nn.relu(skip + net, scope='res%s_relu' % idx)

        # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling
        pad = int((self.image_size - self.height) / 2)
        inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]])

        # convolution with 7x7 kernel and stride 2 (new size: 112x112x64)
        self.network = ops.convolution(inputs,
                                       self.channels,
                                       64,
                                       7,
                                       64,
                                       stride=2,
                                       is_training=is_training,
                                       scope='conv1')

        # pooling with 3x3 kernel and stride 2 (new size: 56x56x64)
        self.network = ops.pooling(self.network, k_size=3, scope='pool1')

        # residual block 1
        stride = 1
        out_channels = [64, 64, 256]
        self.network = res_conv_block(self.network, [64, 64, 64], out_channels,
                                      stride, is_training, 1)
        self.network = res_block(self.network, [256, 64, 64], out_channels,
                                 is_training, 2)
        self.network = res_block(self.network, [256, 64, 64], out_channels,
                                 is_training, 3)

        # residual block 2
        stride = 2
        out_channels = [128, 128, 512]
        self.network = res_conv_block(self.network, [256, 128, 128],
                                      out_channels, stride, is_training, 4)
        self.network = res_block(self.network, [512, 128, 128], out_channels,
                                 is_training, 5)
        self.network = res_block(self.network, [512, 128, 128], out_channels,
                                 is_training, 6)
        self.network = res_block(self.network, [512, 128, 128], out_channels,
                                 is_training, 7)

        # residual block 3
        stride = 2
        out_channels = [256, 256, 1024]
        self.network = res_conv_block(self.network, [512, 256, 256],
                                      out_channels, stride, is_training, 8)
        self.network = res_block(self.network, [1024, 256, 256], out_channels,
                                 is_training, 9)
        self.network = res_block(self.network, [1024, 256, 256], out_channels,
                                 is_training, 10)
        self.network = res_block(self.network, [1024, 256, 256], out_channels,
                                 is_training, 11)
        self.network = res_block(self.network, [1024, 256, 256], out_channels,
                                 is_training, 12)
        self.network = res_block(self.network, [1024, 256, 256], out_channels,
                                 is_training, 13)

        # residual block 4
        stride = 2
        out_channels = [512, 512, 2048]
        self.network = res_conv_block(self.network, [1024, 512, 512],
                                      out_channels, stride, is_training, 14)
        self.network = res_block(self.network, [2048, 512, 512], out_channels,
                                 is_training, 15)
        self.network = res_block(self.network, [2048, 512, 512], out_channels,
                                 is_training, 16)

        # average pooling
        self.network = tf.nn.avg_pool(self.network,
                                      7,
                                      1,
                                      'SAME',
                                      scope='avg_pool')
        self.network = ops.flatten(self.network, scope='flatten')

        # fully connected
        self.network = ops.dense(self.network,
                                 2048,
                                 10,
                                 activation=None,
                                 is_training=is_training,
                                 scope='fc')

        self.loss = ops.loss(self.network, labels, scope='loss')
        self.accuracy = ops.accuracy(self.network, labels, scope='accuracy')

        if is_training:
            self.optimizer = ops.optimize(self.loss,
                                          self.learning_rate,
                                          scope='update')
    def build_model(self, inputs, labels, is_training):
        # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling
        pad = int((self.image_size - self.height) / 2)
        inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]])

        # convolution with 3x3 kernel and stride 1 (new size: 224x224x64)
        self.network = ops.convolution(inputs,
                                       self.channels,
                                       64,
                                       3,
                                       64,
                                       is_training=is_training,
                                       scope='conv1')

        # convolution with 3x3 kernel and stride 1 (new size: 224x224x64)
        self.network = ops.convolution(self.network,
                                       64,
                                       64,
                                       3,
                                       64,
                                       is_training=is_training,
                                       scope='conv2')

        # pooling with 2x2 kernel and stride 2 (new size: 112x112x64)
        self.network = ops.pooling(self.network, scope='pool1')

        # convolution with 3x3 kernel and stride 1 (new size: 112x112x128)
        self.network = ops.convolution(self.network,
                                       64,
                                       128,
                                       3,
                                       128,
                                       is_training=is_training,
                                       scope='conv3')

        # convolution with 3x3 kernel and stride 1 (new size: 112x112x128)
        self.network = ops.convolution(self.network,
                                       128,
                                       128,
                                       3,
                                       128,
                                       is_training=is_training,
                                       scope='conv4')

        # pooling with 2x2 kernel and stride 2 (new size: 56x56x128)
        self.network = ops.pooling(self.network, scope='pool2')

        # convolution with 3x3 kernel and stride 1 (new size: 56x56x256)
        self.network = ops.convolution(self.network,
                                       128,
                                       256,
                                       3,
                                       256,
                                       is_training=is_training,
                                       scope='conv5')

        # 3 convolutions with 3x3 kernel and stride 1 (new size: 56x56x256)
        for idx in range(6, 9):
            self.network = ops.convolution(self.network,
                                           256,
                                           256,
                                           3,
                                           256,
                                           is_training=is_training,
                                           scope='conv' + str(idx))

        # pooling with 2x2 kernel and stride 2 (new size: 28x28x256)
        self.network = ops.pooling(self.network, scope='pool3')

        # convolution with 3x3 kernel and stride 1 (new size: 28x28x512)
        self.network = ops.convolution(self.network,
                                       256,
                                       512,
                                       3,
                                       512,
                                       is_training=is_training,
                                       scope='conv9')

        # 3 convolutions with 3x3 kernel and stride 1 (new size: 28x28x512)
        for idx in range(10, 13):
            self.network = ops.convolution(self.network,
                                           512,
                                           512,
                                           3,
                                           512,
                                           is_training=is_training,
                                           scope='conv' + str(idx))

        # pooling with 2x2 kernel and stride 2 (new size: 14x14x512)
        self.network = ops.pooling(self.network, scope='pool4')

        # 4 convolutions with 3x3 kernel and stride 1 (new size: 14x14x512)
        for idx in range(13, 17):
            self.network = ops.convolution(self.network,
                                           512,
                                           512,
                                           3,
                                           512,
                                           is_training=is_training,
                                           scope='conv' + str(idx))

        # pooling with 2x2 kernel and stride 2 (new size: 7x7x512)
        self.network = ops.pooling(self.network, scope='pool5')

        # flatten (new size: 25088)
        self.network = ops.flatten(self.network, scope='flatten')

        # fully connected layer (new size: 4096)
        self.network = ops.dense(self.network,
                                 25088,
                                 4096,
                                 dropout=True,
                                 dropout_rate=0.2,
                                 is_training=is_training,
                                 scope='fc1')

        # fully connected layer (new size: 1024) -- Original Paper Size: 4096 (for ImageNet)
        self.network = ops.dense(self.network,
                                 4096,
                                 1024,
                                 dropout=True,
                                 dropout_rate=0.2,
                                 is_training=is_training,
                                 scope='fc2')

        # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet)
        self.network = ops.dense(self.network,
                                 1024,
                                 10,
                                 activation=None,
                                 is_training=is_training,
                                 scope='fc3')

        self.loss = ops.loss(self.network, labels, scope='loss')
        self.accuracy = ops.accuracy(self.network, labels, scope='accuracy')

        if is_training:
            self.optimizer = ops.optimize(self.loss,
                                          self.learning_rate,
                                          scope='update')