예제 #1
0
def ssd_multibox_layer(net,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    if normalization > 0:
        net = Layers.l2_normalization(net, scaling=True)

    # Number of anchors
    num_anchors = len(sizes) + len(ratios)

    # Location
    num_loc_pred = num_anchors * 4
    loc_pred = Layers.conv2d(net,
                             net.get_shape[-1],
                             num_loc_pred,
                             3,
                             1,
                             'SAME',
                             'conv_loc',
                             activation_fn=False)
    loc_pred = Layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

    # Class prediction
    pass
예제 #2
0
    def subsampled(inputs, reuse=False):
        # Less border effect
        inputs = Layers.pad(inputs)

        with tf.variable_scope('subsampled', reuse=reuse):
            conv1 = Layers.conv2d(inputs, 3, 32, 9, 1, 'SAME', 'conv1')
            norm1 = Layers.instance_norm(conv1)
            relu1 = Layers.relu(norm1)

            conv2 = Layers.conv2d(relu1, 32, 64, 3, 2, 'SAME', 'conv2')
            norm2 = Layers.instance_norm(conv2)
            relu2 = Layers.relu(norm2)

            conv3 = Layers.conv2d(relu2, 64, 128, 3, 2, 'SAME', 'conv3')
            norm3 = Layers.instance_norm(conv3)
            relu3 = Layers.relu(norm3)

        return relu3
예제 #3
0
파일: PI.py 프로젝트: whgusdn321/PI
class PI(object):
    def __init__(self, d, lr, lambda_pi_usl, use_pi):
        """ flags for each regularizor """
        self.use_pi = use_pi
        """ data and external toolkits """
        self.d = d  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, d, self.encoder)
        """ placeholders defined outside"""
        self.lr = lr
        self.lambda_pi_usl = lambda_pi_usl

    def encoder(self, x, is_train=True, do_update_bn=True):
        """ https://arxiv.org/pdf/1610.02242.pdf """

        if is_train:
            h = self.distort(x)
            h = self.ls.get_corrupted(x, 0.15)
        else:
            h = x

        scope = '1'
        h = self.ls.conv2d(scope + '_1', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 128, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 128, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '2'
        h = self.ls.conv2d(scope + '_1', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2', h, 256, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_3', h, 256, activation=self.ls.lrelu)
        h = self.ls.max_pool(h)
        if is_train: h = tf.nn.dropout(h, 0.5)

        scope = '3'
        h = self.ls.conv2d(scope + '_1', h, 512, activation=self.ls.lrelu)
        h = self.ls.conv2d(scope + '_2',
                           h,
                           256,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = self.ls.conv2d(scope + '_3',
                           h,
                           128,
                           activation=self.ls.lrelu,
                           filter_size=(1, 1))
        h = tf.reduce_mean(h, reduction_indices=[1,
                                                 2])  # Global average pooling
        h = self.ls.dense(scope, h, self.d.l)

        return h

    def build_graph_train(self, x_l, y_l, x, is_supervised=True):

        o = dict()  # output
        loss = 0

        logit = self.encoder(x)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            logit_l = self.encoder(
                x_l, is_train=True,
                do_update_bn=False)  # for pyx and vat loss computation
        """ Classification Loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ PI Model Loss """
        if self.use_pi:
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                _, _, o['Lp'] = self.lf.get_loss_pi(x, logit, is_train=True)
                loss += self.lambda_pi_usl * o['Lp']
        else:
            o['Lp'] = tf.constant(0)
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        #self.op = optimizer.minimize(loss)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)
        self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        logit_l = self.encoder(
            x_l, is_train=False,
            do_update_bn=False)  # for pyx and vat loss computation
        """ classification loss """
        o['Ly'], o['accur'] = self.lf.get_loss_pyx(logit_l, y_l)
        loss += o['Ly']
        """ set losses """
        o['loss'] = loss
        self.o_test = o

    def distort(self, x):

        _d = self.d

        def _distort(a_image):
            """
            bounding_boxes: A Tensor of type float32.
                3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. 
            Bounding boxes are supplied and returned as [y_min, x_min, y_max, x_max]
            """
            # shape: [1, 1, 4]
            bounding_boxes = tf.constant([[[1 / 10, 1 / 10, 9 / 10, 9 / 10]]],
                                         dtype=tf.float32)

            begin, size, _ = tf.image.sample_distorted_bounding_box(
                (_d.h, _d.w, _d.c),
                bounding_boxes,
                min_object_covered=(8.5 / 10.0),
                aspect_ratio_range=[7.0 / 10.0, 10.0 / 7.0])

            a_image = tf.slice(a_image, begin, size)
            """ for the purpose of distorting not use tf.image.resize_image_with_crop_or_pad under """
            a_image = tf.image.resize_images(a_image, [_d.h, _d.w])
            """ due to the size of channel returned from tf.image.resize_images is not being given,
                specify it manually. """
            a_image = tf.reshape(a_image, [_d.h, _d.w, _d.c])
            return a_image

        """ process batch times in parallel """
        return tf.map_fn(_distort, x)
예제 #4
0
class ImageInterface(object):

    def __init__(self, is_3d, is_read_attention, is_write_attention, read_n, write_n, h, w, c):
    
        """ to manage do_share flag inside Layers object, ImageInterface has Layers as its own property """
        self.do_share = False
        self.ls       = Layers()
        self.is_3d    = is_3d
        self.read_n   = read_n
        self.write_n  = write_n
        self.h = h
        self.w = w
        self.c = c

        if is_read_attention:
            self.read = self._read_attention
        else:
            self.read = self._read_no_attention
    
        if is_write_attention:
            self.write = self._write_attention
        else:
            self.write = self._write_no_attention
     
    def set_do_share(self, flag):
        self.do_share    = flag
        self.ls.set_do_share(flag)
        
    ###########################
    """       READER        """
    ###########################
    def _read_no_attention(self, x,x_hat, h_dec):
        _h,_w,_c = self.h, self.w, self.c
        if self.is_3d:
            # x is a raw image and x_hat is an error one, and eash is handled as a different channel,
            # so the shape of r and return are [-1, _h,_w,_c*2]

            USE_CONV_READ = False # 170720
            if USE_CONV_READ:
                scope = 'read_1'
                x = self.ls.conv2d(scope+'_1', x, 64, activation=tf.nn.elu)
                x = self.ls.max_pool(x)
                x = self.ls.conv2d(scope+'_2', x, 64, activation=tf.nn.elu)
                x = self.ls.max_pool(x)
                x = self.ls.conv2d(scope+'_3', x, 64, activation=tf.nn.elu)

                scope = 'read_hat_1'
                x_hat = self.ls.conv2d(scope+'_1', x_hat, 16, activation=tf.nn.elu)
                x_hat = self.ls.max_pool(x_hat)
                x_hat = self.ls.conv2d(scope+'_2', x_hat, 16, activation=tf.nn.elu)
                x_hat = self.ls.max_pool(x_hat)
                x_hat = self.ls.conv2d(scope+'_3', x_hat, 16, activation=tf.nn.elu)

                r = tf.concat([x,x_hat], 3)
                h_dec = tf.reshape( self.ls.dense(scope, h_dec, _h*_w*_c), [-1, int(_h/4), int(_w/4),_c*4*4])
                return tf.concat([r,h_dec], 3)
            elif False:

                scope = 'read_1'
                x = self.ls.conv2d(scope+'_1', x, 128, activation=tf.nn.elu)
                x = self.ls.conv2d(scope+'_2', x, 128, activation=tf.nn.elu)
                x = self.ls.conv2d(scope+'_3', x, 128, activation=tf.nn.elu)
                x = self.ls.max_pool(x)
                scope = 'read_2'
                x = self.ls.conv2d(scope+'_1', x, 256, activation=tf.nn.elu)
                x = self.ls.conv2d(scope+'_2', x, 256, activation=tf.nn.elu)
                x = self.ls.conv2d(scope+'_3', x, 256, activation=tf.nn.elu)
                x = self.ls.max_pool(x)
                scope = 'read_3'
                x = self.ls.conv2d(scope+'_1', x, 512, activation=tf.nn.elu)
                x = self.ls.conv2d(scope+'_2', x, 256, activation=tf.nn.elu, filter_size=(1,1))
                x = self.ls.conv2d(scope+'_3', x, 128, activation=tf.nn.elu, filter_size=(1,1))
                x = self.ls.conv2d(scope+'_4', x, 64, activation=tf.nn.elu, filter_size=(1,1))

                scope = 'read_hat_1'
                x_hat = self.ls.conv2d(scope+'_1', x_hat, 128, activation=tf.nn.elu)
                x_hat = self.ls.max_pool(x_hat)
                scope = 'read_hat_2'
                x_hat = self.ls.conv2d(scope+'_1', x_hat, 256, activation=tf.nn.elu)
                x_hat = self.ls.max_pool(x_hat)
                scope = 'read_hat_3'
                x_hat = self.ls.conv2d(scope+'_4', x_hat, 16, activation=tf.nn.elu, filter_size=(1,1))
                r = tf.concat([x,x_hat], 3)
                h_dec = tf.reshape( self.ls.dense(scope, h_dec, _h*_w*_c), [-1, int(_h/4), int(_w/4),_c*4*4])
                return tf.concat([r,h_dec], 3)
            else:
                r = tf.concat([x,x_hat], 3)
            USE_DEC_LOWEST_PREV = True
            if USE_DEC_LOWEST_PREV:
                # use decoder feedback as element-wise adding   
                # Eq.(21) in [Gregor, 2016]
                scope = 'read'
                USE_CONV = True
                if USE_CONV:
                    h_dec = tf.reshape( self.ls.dense(scope, h_dec, _h*_w*_c), [-1, _h,_w,_c])
                    h_dec = self.ls.conv2d("conv", h_dec, _c*2, activation=tf.nn.elu)
                    return r + h_dec
                else:
                    h_dec = tf.reshape( self.ls.dense(scope, h_dec, _h*_w*_c*2), [-1, _h,_w,_c*2])
                    return r + h_dec
            else:
                return r
        else:
            return tf.concat([x,x_hat], 1)
    
    def _read_attention( self, x, x_hat, h_dec ):
        _h,_w,_c = self.h, self.w, self.c
        N = self.read_n
        if self.is_3d:
            Fx,Fy,gamma = self._set_window("read", h_dec,N)
            # Fx is (?, 5, 32, 3)
            # gamma is (?, 3)
            def filter_img(img,Fx,Fy,gamma, N):
                # Fx and Fy are (?, 5, 32, 3)
                Fxt = tf.transpose(Fx,perm=[0,3,2,1])
                Fy  = tf.transpose(Fy,perm=[0,3,2,1])
                
                # img.get_shape() has already been (?, 32, 32, 3)
                img  = tf.transpose(img, perm=[0,3,2,1])
                # tf.matmul(img,Fxt) is (?, 3, 32, 5)
                img_Fxt = tf.matmul(img,Fxt)
                img_Fxt = tf.transpose(img_Fxt, perm=[0,1,3,2])
                # Fy: (?, 3, 32, 5)
                Fy  = tf.transpose(Fy,perm=[0,1,3,2])
                glimpse = tf.matmul(Fy, img_Fxt, transpose_b=True)
                # glimpse.get_shape() is (?, 3, 32, 32)
                glimpse = tf.transpose(glimpse, perm=[0,2,3,1])
                glimpse = tf.reshape(glimpse,[-1,N*N, _c])
    
                glimpse = tf.transpose(glimpse, perm=[0,2,1])
                gamma   = tf.reshape(gamma,[-1,1, _c])
                gamma   = tf.transpose(gamma,   perm=[0,2,1])
                o = glimpse*gamma
                o = tf.transpose(o, perm=[0,2,1])
                return o
            x = filter_img( x, Fx, Fy, gamma, N) # batch x (read_n*read_n)
            x_hat = filter_img( x_hat, Fx, Fy, gamma, N)
            x = tf.reshape(x, [-1, N,N,_c])
            x_hat = tf.reshape(x_hat, [-1, N,N,_c])
            return tf.concat([x,x_hat], 3)
        else:
            Fx,Fy,gamma = self._set_window("read", h_dec,N)
            # Fx: (?, 5, 32), gamma: (?, 1)
            def filter_img(img,Fx,Fy,gamma,N):
                #print('filter_img in is_image == False')
                Fxt = tf.transpose(Fx,perm=[0,2,1])
                img = tf.reshape(img,[-1,_w,_h])
                # Fxt : (?, 32, 5)
                # img : (?, 32, 32)
                glimpse = tf.matmul(Fy,tf.matmul(img,Fxt))
                glimpse = tf.reshape(glimpse,[-1,N*N])
                return glimpse*tf.reshape(gamma,[-1,1])
            x = filter_img( x, Fx, Fy, gamma, N) # batch x (read_n*read_n)
            x_hat = filter_img( x_hat, Fx, Fy, gamma, N)
            return tf.concat([x,x_hat], 1) # concat along feature axis
    
    
    ###########################
    """       WRITER        """
    ###########################
    def _write_no_attention(self, h):
        scope = "write"
        _h,_w,_c = self.h, self.w, self.c
        if self.is_3d:
            IS_SIMPLE_WRITE = True
            if IS_SIMPLE_WRITE :
                print('IS_SIMPLE_WRITE:', IS_SIMPLE_WRITE)  
                return tf.reshape( self.ls.dense(scope, h, _h*_w*_c, tf.nn.elu), [-1, _h, _w, _c])
            else:
                IS_CONV_LSTM = True
                if IS_CONV_LSTM :
                    raise NotImplementedError
                else:
                    activation = tf.nn.elu
                    print('h in write:', h) # h.shape is (_b, RNN_SIZES[0])
                    L = 1
                    h = tf.reshape( h, (-1, 2,2,64*3)) # should match to RNN_SIZES[0]
                    h = self.ls.deconv2d(scope+'_1', h, 64*2) # 4
                    h = activation(h)
                    L = 2
                    h = self.ls.deconv2d(scope+'_2', h, 16*3) # 8
                    h = activation(h)
                    h = PS(h, 4, color=True)
                    print('h in write:', h)
                return tf.reshape( h, [-1, _h, _w, _c])
        else:
            return self.ls.dense( scope,h, _h*_w*_c )
    
    def _write_attention(self, h_dec):
        scope = "writeW"
        N          = self.write_n
        write_size = N*N
        _h,_w,_c = self.h, self.w, self.c
        Fx, Fy, gamma = self._set_window("write", h_dec, N)
        if self.is_3d:
            # Fx and Fy are (?, 5, 32, 3), gamma is (?, 3)
            w = self.ls.dense( scope, h_dec, write_size*_c) # batch x (write_n*write_n) [ToDo] replace self.ls.dense with deconv
            w = tf.reshape(w,[tf.shape(h_dec)[0],N,N,_c])
            w = tf.transpose(w, perm=[0,3,1,2])
            Fyt = tf.transpose(Fx,perm=[0,3,2,1])
            Fx  = tf.transpose(Fx, perm=[0,3,1,2])
    
            w_Fx = tf.matmul(w, Fx)
            # w_Fx.get_shape() is (?, 3, 5, 32)
            w_Fx = tf.transpose(w_Fx, perm=[0,1,3,2])
    
            wr = tf.matmul(Fyt, w_Fx, transpose_b=True)
            wr = tf.reshape(wr,[tf.shape(h_dec)[0],_w*_h, _c])
            wr = tf.transpose(wr, perm=[0,2,1])
            inv_gamma   = tf.reshape(1.0/gamma,[-1,1, _c])
            inv_gamma   = tf.transpose(inv_gamma, perm=[0,2,1])
            o = wr*inv_gamma
            o = tf.transpose(o, perm=[0,2,1])
            o = tf.reshape(o, [tf.shape(h_dec)[0], _w, _h, _c])
            return o
        else:
            w = self.ls.dense( scope, h_dec,write_size) # batch x (write_n*write_n)
            w = tf.reshape(w,[tf.shape(h_dec)[0],N,N])
            Fyt = tf.transpose(Fy,perm=[0,2,1])
            wr = tf.matmul(Fyt,tf.matmul(w,Fx))
            wr = tf.reshape(wr,[tf.shape(h_dec)[0],_w*_h])
            return wr*tf.reshape(1.0/gamma,[-1,1])

    ###########################
    """  Filter Functions   """
    ###########################
    def _filterbank(self, gx, gy, sigma2,delta, N):
        if self.is_3d:

            _h,_w,_c = self.h, self.w, self.c

            # gx and delta are (?,3)
            grid_i = tf.reshape(tf.cast(tf.range(N*_c), tf.float32), [1, -1, _c])
            mu_x = gx + (grid_i - N / 2 - 0.5) * delta # eq 19
            mu_y = gy + (grid_i - N / 2 - 0.5) * delta # eq 20
            # shape : [1, N, _c]
            w = tf.reshape( tf.cast( tf.range(_w*_c), tf.float32), [1, 1, -1, _c])
            h = tf.reshape( tf.cast( tf.range(_h*_c), tf.float32), [1, 1, -1, _c])
            mu_x = tf.reshape(mu_x, [-1, N, 1, _c])
            mu_y = tf.reshape(mu_y, [-1, N, 1, _c])
            sigma2 = tf.reshape(sigma2, [-1, 1, 1, _c])
            Fx = tf.exp(-tf.square((w - mu_x) / (2*sigma2))) # 2*sigma2?
            Fy = tf.exp(-tf.square((h - mu_y) / (2*sigma2))) # batch x N x B
            # normalize, sum over A and B dims
            Fx=Fx/tf.maximum(tf.reduce_sum(Fx,2,keep_dims=True),eps)
            Fy=Fy/tf.maximum(tf.reduce_sum(Fy,2,keep_dims=True),eps)
            return Fx,Fy
    
        else:
            grid_i = tf.reshape(tf.cast(tf.range(N), tf.float32), [1, -1])
            # gx, delta and mu_x are (?, 1), and grid_i is (1, 5))
            mu_x = gx + (grid_i - N / 2 - 0.5) * delta # eq 19
            mu_y = gy + (grid_i - N / 2 - 0.5) * delta # eq 20
            h = tf.reshape(tf.cast(tf.range(_h), tf.float32), [1, 1, -1])
            w = tf.reshape(tf.cast(tf.range(_w), tf.float32), [1, 1, -1])
            mu_x = tf.reshape(mu_x, [-1, N, 1])
            mu_y = tf.reshape(mu_y, [-1, N, 1])
            sigma2 = tf.reshape(sigma2, [-1, 1, 1])
            Fx = tf.exp(-tf.square((w - mu_x) / (2*sigma2))) # 2*sigma2?
            Fy = tf.exp(-tf.square((h - mu_y) / (2*sigma2))) # batch x N x B
            # normalize, sum over A and B dims
            Fx=Fx/tf.maximum(tf.reduce_sum(Fx,2,keep_dims=True),eps)
            Fy=Fy/tf.maximum(tf.reduce_sum(Fy,2,keep_dims=True),eps)
            return Fx,Fy
    
    def _set_window(self, scope, h_dec,N):
        if self.is_3d:
            _h,_w,_c = self.h, self.w, self.c
            # get five (BATCH_SIZE, _c) matrixes
            gx_, gy_, log_sigma2, log_delta, log_gamma = self.ls.split( self.ls.dense(scope, h_dec, _c*5), 1, [_c]*5)
            gx_ = tf.reshape(gx_, [-1,1,_c])
            gy_ = tf.reshape(gy_, [-1,1,_c])
            log_sigma2 = tf.reshape(log_sigma2, [-1,1,_c])
            log_delta = tf.reshape(log_delta, [-1,1,_c])
            log_gamma = tf.reshape(log_gamma, [-1,1,_c])
            gx = (_w + 1)/2*(gx_+1)
            gy = (_h + 1)/2*(gy_+1)
            sigma2 = tf.exp(log_sigma2)
            delta = ( max(_h, _w) -1 ) / ( N -1 ) * tf.exp( log_delta ) # batch x N
            return self._filterbank( gx, gy, sigma2, delta, N) + ( tf.exp(log_gamma),)
        else:
            params = self.ls.dense(scope, h_dec,5)
            gx_,gy_,log_sigma2,log_delta,log_gamma=tf.split(value=params, num_or_size_splits=5, axis=1)
            gx=(_w + 1)/2*(gx_+1)
            gy=(_h + 1)/2*(gy_+1)
            sigma2=tf.exp(log_sigma2)
            delta=(max(_h, _w)-1)/(N-1)*tf.exp(log_delta) # batch x N
            return self._filterbank(gx,gy,sigma2,delta,N)+(tf.exp(log_gamma),)
예제 #5
0
def ssd_net(args,
            inputs,
            num_classes=SSDNet.default_params.num_classes,
            feat_layers=SSDNet.default_params.feat_layers,
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,
            dropout_keep_prob=0.5,
            reuse=None,
            scope='ssd_300_vgg'):
    # End_points collect relevant activations for external use.
    end_points = {}
    with tf.variable_scope(scope,
                           default_name='ssd_300_vgg',
                           values=[inputs],
                           reuse=reuse):
        # Original VGG-16 blocks.
        # Block 1
        net = Layers.conv2d(inputs, 3, 64, 3, 1, 'SAME', 'block1_conv1')
        net = Layers.conv2d(net, 64, 64, 3, 1, 'SAME', 'block1_conv2')
        end_points['block1'] = net
        net = Layers.max_pool2d(net, 2, 2, 'VALID', 'block1_pool')
        # Block 2
        net = Layers.conv2d(net, 64, 128, 3, 1, 'SAME', 'block2_conv1')
        net = Layers.conv2d(net, 128, 128, 3, 1, 'SAME', 'block2_conv2')
        end_points['block2'] = net
        net = Layers.max_pool2d(net, 2, 2, 'VALID', 'block2_pool')
        # Block 3
        net = Layers.conv2d(net, 128, 256, 3, 1, 'SAME', 'block3_conv1')
        net = Layers.conv2d(net, 256, 256, 3, 1, 'SAME', 'block3_conv2')
        net = Layers.conv2d(net, 256, 256, 3, 1, 'SAME', 'block3_conv3')
        end_points['block3'] = net
        net = Layers.max_pool2d(net, 2, 2, 'VALID', 'block3_pool')
        # Block 4
        net = Layers.conv2d(net, 256, 512, 3, 1, 'SAME', 'block4_conv1')
        net = Layers.conv2d(net, 512, 512, 3, 1, 'SAME', 'block4_conv2')
        net = Layers.conv2d(net, 512, 512, 3, 1, 'SAME', 'block4_conv3')
        end_points['block4'] = net
        net = Layers.max_pool2d(net, 2, 2, 'VALID', 'block4_pool')
        # Block 5
        net = Layers.conv2d(net, 512, 512, 3, 1, 'SAME', 'block5_conv1')
        net = Layers.conv2d(net, 512, 512, 3, 1, 'SAME', 'block5_conv2')
        net = Layers.conv2d(net, 512, 512, 3, 1, 'SAME', 'block5_conv3')
        end_points['block5'] = net
        net = Layers.max_pool2d(net, 2, 2, 'VALID', 'block5_pool')

        # Additional SSD blocks
        # Block 6
        net = Layers.atrous_conv2d(net,
                                   512,
                                   1024,
                                   3,
                                   6,
                                   'SAME',
                                   scope='block6_atrous_conv')
        end_points['block6'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=args.is_training)
        # Block 7
        net = Layers.conv2d(net, 1024, 1024, 1, 1, 'SAME', 'block7_conv')
        end_points['block7'] = net
        net = tf.layers.dropout(net,
                                rate=dropout_keep_prob,
                                training=args.is_training)

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except last 2)
        # Block 8
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = Layers.conv2d(net, 1024, 256, 1, 1, 'SAME', 'conv1x1')
            net = Layers.pad2d(net, pad=(1, 1))
            net = Layers.conv2d(net, 256, 512, 3, 2, 'VALID', 'conv3x3')
        end_points[end_point] = net
        # Block 9
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = Layers.conv2d(net, 512, 128, 1, 1, 'SAME', 'conv1x1')
            net = Layers.pad2d(net, pad=(1, 1))
            net = Layers.conv2d(net, 128, 256, 3, 2, 'VALID', 'conv3x3')
        end_points[end_point] = net
        # Block 10
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = Layers.conv2d(net, 256, 128, 1, 1, 'SAME', 'conv1x1')
            net = Layers.pad2d(net, pad=(1, 1))
            net = Layers.conv2d(net, 128, 256, 3, 1, 'VALID', 'conv3x3')
        end_points[end_point] = net
        # Block 11
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = Layers.conv2d(net, 256, 128, 1, 1, 'SAME', 'conv1x1')
            net = Layers.conv2d(net, 128, 256, 3, 1, 'VALID', 'conv3x3')
        end_points[end_point] = net

        # Prediction and localisations layers
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_box'):
                prediction_, localisation_ = ssd_multibox_layer(
                    end_points[layer], num_classes, anchor_sizes[i],
                    anchor_ratios[i], normalizations[i])
                pass
예제 #6
0
파일: VAE.py 프로젝트: geosada/LVAT
class VAE(object):
    def __init__(self, resource):
        """ data and external toolkits """
        self.d = resource.dh  # dataset manager
        self.ls = Layers()
        self.lf = LossFunctions(self.ls, self.d, self.encoder)
        """ placeholders defined outside"""
        if c.DO_TRAIN:
            self.lr = resource.ph['lr']

    def encoder(self, h, is_train, y=None):

        if is_train:
            _d = self.d
            #_ = tf.summary.image('image', tf.reshape(h, [-1, _d.h, _d.w, _d.c]), 10)

        scope = 'e_1'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           128,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)

        scope = 'e_2'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           256,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)

        scope = 'e_3'
        h = self.ls.conv2d(scope + '_1',
                           h,
                           512,
                           filter_size=(2, 2),
                           strides=(1, 2, 2, 1),
                           padding="VALID")
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        #h = tf.nn.relu(h)
        h = tf.nn.tanh(h)

        # -> (b, 4, 4, 512)

        print('h:', h)
        #h = tf.reshape(h, (c.BATCH_SIZE, -1))
        h = tf.reshape(h, (-1, 4 * 4 * 512))
        print('h:', h)

        #sys.exit('aa')
        h = self.ls.denseV2('top_of_encoder', h, c.Z_SIZE * 2, activation=None)
        print('h:', h)
        return self.ls.vae_sampler_w_feature_slice(h, c.Z_SIZE)

    def decoder(self, h, is_train):

        scope = 'top_of_decoder'
        #h = self.ls.denseV2(scope, h, 128, activation=self.ls.lrelu)
        h = self.ls.denseV2(scope, h, 512, activation=self.ls.lrelu)
        print('h:', scope, h)

        h = tf.reshape(h, (-1, 4, 4, 32))
        print('h:', scope, h)

        scope = 'd_1'
        h = self.ls.deconv2d(scope + '_1', h, 512, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_2'
        h = self.ls.deconv2d(scope + '_2', h, 256, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_3'
        h = self.ls.deconv2d(scope + '_3', h, 128, filter_size=(2, 2))
        h = tf.layers.batch_normalization(h, training=is_train, name=scope)
        h = tf.nn.relu(h)
        print('h:', scope, h)

        scope = 'd_4'
        h = self.ls.conv2d(scope + '_4',
                           h,
                           3,
                           filter_size=(1, 1),
                           strides=(1, 1, 1, 1),
                           padding="VALID",
                           activation=tf.nn.sigmoid)
        print('h:', scope, h)

        return h

    def build_graph_train(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        if c.IS_AUGMENTATION_ENABLED:
            x_l = distorted = self.distort(x_l)

            if c.IS_AUG_NOISE_TRUE:
                x_l = self.ls.get_corrupted(x_l, 0.15)

        z, mu, logsigma = self.encoder(x_l, is_train=True, y=y_l)

        x_reconst = self.decoder(z, is_train=True)
        """ p(x|z) Reconstruction Loss """
        o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'Bernoulli')
        o['x_reconst'] = x_reconst
        o['x'] = x_l
        loss += o['Lr']
        """ VAE KL-Divergence Loss """
        LAMBDA_VAE = 0.1
        o['mu'], o['logsigma'] = mu, logsigma
        # work around. [ToDo] make sure the root cause that makes kl loss inf
        #logsigma = tf.clip_by_norm( logsigma, 10)
        o['Lz'] = self.lf.get_loss_vae(c.Z_SIZE, mu, logsigma, _lambda=0.0)
        loss += LAMBDA_VAE * o['Lz']
        """ set losses """
        o['loss'] = loss
        self.o_train = o
        """ set optimizer """
        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
        grads = optimizer.compute_gradients(loss)
        for i, (g, v) in enumerate(grads):
            if g is not None:
                #g = tf.Print(g, [g], "g %s = "%(v))
                grads[i] = (tf.clip_by_norm(g, 5), v)  # clip gradients
            else:
                print('g is None:', v)
                v = tf.Print(v, [v], "v = ", summarize=10000)

        # update ema in batch_normalization
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            self.op = optimizer.apply_gradients(grads)  # return train_op

    def build_graph_test(self, x_l, y_l):

        o = dict()  # output
        loss = 0

        z, mu, logsigma = self.encoder(x_l, is_train=False, y=y_l)

        x_reconst = self.decoder(mu, is_train=False)
        o['x_reconst'] = x_reconst
        o['x'] = x_l
        #o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'LeastSquare')
        o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'Bernoulli')
        #o['Lr'] = self.lf.get_loss_pxz(x_reconst, x_l, 'DiscretizedLogistic')
        #o['Lr'] = tf.reduce_mean(tf.keras.losses.binary_crossentropy(x_l, x_reconst))
        loss += o['Lr']
        """ set losses """
        o['loss'] = loss
        self.o_test = o

    def distort(self, x):
        """
        maybe helpful http://www.redhub.io/Tensorflow/tensorflow-models/src/master/inception/inception/image_processing.py
        """
        _d = self.d

        def _distort(a_image):
            """
            bounding_boxes: A Tensor of type float32.
                3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. 
            Bounding boxes are supplied and returned as [y_min, x_min, y_max, x_max]
            """
            if c.IS_AUG_TRANS_TRUE:
                a_image = tf.pad(a_image, [[2, 2], [2, 2], [0, 0]])
                a_image = tf.random_crop(a_image, [_d.h, _d.w, _d.c])

            if c.IS_AUG_FLIP_TRUE:
                a_image = tf.image.random_flip_left_right(a_image)

            if c.IS_AUG_ROTATE_TRUE:
                from math import pi
                radian = tf.random_uniform(shape=(), minval=0,
                                           maxval=360) * pi / 180
                a_image = tf.contrib.image.rotate(a_image,
                                                  radian,
                                                  interpolation='BILINEAR')

            if c.IS_AUG_COLOR_TRUE:
                a_image = tf.image.random_brightness(a_image, max_delta=0.2)
                a_image = tf.image.random_contrast(a_image,
                                                   lower=0.2,
                                                   upper=1.8)
                a_image = tf.image.random_hue(a_image, max_delta=0.2)

            if c.IS_AUG_CROP_TRUE:
                # shape: [1, 1, 4]
                bounding_boxes = tf.constant(
                    [[[1 / 10, 1 / 10, 9 / 10, 9 / 10]]], dtype=tf.float32)

                begin, size, _ = tf.image.sample_distorted_bounding_box(
                    (_d.h, _d.w, _d.c),
                    bounding_boxes,
                    min_object_covered=(9.8 / 10.0),
                    aspect_ratio_range=[9.5 / 10.0, 10.0 / 9.5])

                a_image = tf.slice(a_image, begin, size)
                """ for the purpose of distorting not use tf.image.resize_image_with_crop_or_pad under """
                a_image = tf.image.resize_images(a_image, [_d.h, _d.w])
                """ due to the size of channel returned from tf.image.resize_images is not being given,
                    specify it manually. """
                a_image = tf.reshape(a_image, [_d.h, _d.w, _d.c])
            return a_image

        """ process batch times in parallel """
        return tf.map_fn(_distort, x)