Esempio n. 1
0
class YOLO:
    def __init__(self, backend, input_size, labels, max_box_per_image,
                 anchors):
        """

        :param backend: 特征提取器
        :param input_size: 输入图像的维度
        :param labels: 标签
        :param max_box_per_image: 每张图像最多所拥有的框数量
        :param anchors: 锚框
        """

        self.input_size = input_size
        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype=np.float32)
        self.anchors = anchors
        self.max_box_per_image = max_box_per_image

        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        self.feature_extractor = FullYoloFeature(self.input_size)
        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # 创建物体检测层
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # 初始化物体检测层的参数
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_w *
                                                              self.grid_h)

        layer.set_weights([new_kernel, new_bias])

        # 打印模型的摘要信息
        self.model.summary()

    def custom_loss(self, y_true, y_pred):
        mask_shape = tf.shape(y_true)[:4]

    def load_weights(self, weight_path):
        self.model.load_weights(weight_path)
Esempio n. 2
0
    def __init__(self, backend,
                       input_size, 
                       labels, 
                       max_box_per_image,
                       anchors):

        self.input_size = input_size
        
        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = len(anchors)//2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors  = anchors

        self.max_box_per_image = max_box_per_image

        # Make the model
        # make the feature extractor layers
        input_image     = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

        
        if backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        else:
            raise Exception('Architecture not supported!')

        print(self.feature_extractor.get_output_shape())    
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()        
        features = self.feature_extractor.extract(input_image)            

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), 
                        (1,1), strides=(1,1), 
                        padding='same', 
                        name='DetectionLayer', 
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
Esempio n. 3
0
def import_feature_extractor(backend, input_size):
    if backend == 'Inception3':
        feature_extractor = Inception3Feature(input_size)  
    elif backend == 'SqueezeNet':
        feature_extractor = SqueezeNetFeature(input_size)        
    elif backend == 'MobileNet':
        feature_extractor = MobileNetFeature(input_size)
    elif backend == 'Full Yolo':
        feature_extractor = FullYoloFeature(input_size)
    elif backend == 'Tiny Yolo':
        feature_extractor = TinyYoloFeature(input_size)
    elif backend == 'VGG16':
        feature_extractor = VGG16Feature(input_size)
    elif backend == 'ResNet50':
        feature_extractor = ResNet50Feature(input_size)
    elif os.path.dirname(backend) != "":
        basePath = os.path.dirname(backend)
        sys.path.append(basePath)
        custom_backend_name = os.path.basename(backend)
        custom_backend = import_dynamically(custom_backend_name)
        feature_extractor = custom_backend(input_size)
        if not issubclass(custom_backend,BaseFeatureExtractor):
            raise RuntimeError('You are trying to import a custom backend, your backend must'
            ' be in inherited from "backend.BaseFeatureExtractor".')
        print('Using a custom backend called {}.'.format(custom_backend_name))
    else:
        raise RuntimeError('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet,' 
            'SqueezeNet, VGG16, ResNet50, or Inception3 at the moment!')

    return feature_extractor
Esempio n. 4
0
    def __init__(self, architecture, input_size, labels, max_box_per_image, anchors):
        self.architecture = architecture
        self.input_size = input_size
        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = 5
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors  = anchors
        self.max_box_per_image = max_box_per_image

        # make the feature extractor layers
        input_image     = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))

        if architecture == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)
        elif architecture == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size)
        elif architecture == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif architecture == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif architecture == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif architecture == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif architecture == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        print (self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
                        (1,1), strides=(1,1),
                        padding='same',
                        name='conv_23',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
Esempio n. 5
0
    def __init__(self,
                 backend,
                 input_size,
                 labels,
                 max_box_per_image,
                 anchors,
                 load_from_json=None,
                 trained_weights=None):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        if load_from_json == None:

            ##########################
            # Make the model
            ##########################

            # make the feature extractor layers
            input_image = Input(shape=(self.input_size, self.input_size, 3))
            self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

            if backend == 'Inception3':
                self.feature_extractor = Inception3Feature(self.input_size)
            elif backend == 'SqueezeNet':
                self.feature_extractor = SqueezeNetFeature(self.input_size)
            elif backend == 'MobileNet':
                self.feature_extractor = MobileNetFeature(self.input_size)
            elif backend == 'Full Yolo':
                self.feature_extractor = FullYoloFeature(self.input_size)
            elif backend == 'Tiny Yolo':
                self.feature_extractor = TinyYoloFeature(self.input_size)
            elif backend == 'VGG16':
                self.feature_extractor = VGG16Feature(self.input_size)
            elif backend == 'ResNet50':
                self.feature_extractor = ResNet50Feature(self.input_size)
            elif backend == 'Tiniest':
                self.feature_extractor = TiniestYoloFeature(self.input_size)
            else:
                raise Exception(
                    'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
                )

            print(self.feature_extractor.get_output_shape())
            self.grid_h, self.grid_w = self.feature_extractor.get_output_shape(
            )
            features = self.feature_extractor.extract(input_image)

            # make the object detection layer
            output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                            strides=(1, 1),
                            padding='same',
                            name='DetectionLayer',
                            kernel_initializer='lecun_normal')(features)
            output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                              4 + 1 + self.nb_class))(output)
            output = Lambda(lambda args: args[0])([output, self.true_boxes])

            self.model = Model([input_image, self.true_boxes], output)

            # initialize the weights of the detection layer
            layer = self.model.layers[-4]
            weights = layer.get_weights()

            new_kernel = np.random.normal(
                size=weights[0].shape) / (self.grid_h * self.grid_w)
            new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                                  self.grid_w)

            layer.set_weights([new_kernel, new_bias])

        else:
            self.feature_extractor = None
            with open(load_from_json, 'rb') as f:
                cfg = pickle.load(f)
                self.model = model_from_json(cfg)

            with open(trained_weights, 'rb') as f:
                weights = pickle.load(f)
                self.model.set_weights(weights)

        self.grid_h, self.grid_w = self.model.get_output_shape_at(-1)[1:3]
        # print a summary of the whole model
        self.model.summary()
Esempio n. 6
0
class YOLO(object):
    def __init__(self, backend,
                       input_size, 
                       labels, 
                       max_box_per_image,
                       anchors):

        self.input_size = input_size
        
        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = len(anchors)//2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors  = anchors

        self.max_box_per_image = max_box_per_image

        # Make the model
        # make the feature extractor layers
        input_image     = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

        
        if backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        else:
            raise Exception('Architecture not supported!')

        print(self.feature_extractor.get_output_shape())    
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()        
        features = self.feature_extractor.extract(input_image)            

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), 
                        (1,1), strides=(1,1), 
                        padding='same', 
                        name='DetectionLayer', 
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()

    def custom_loss(self, y_true, y_pred):
        mask_shape = tf.shape(y_true)[:4]
        
        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))
        cell_y = tf.transpose(cell_x, (0,2,1,3,4))

        cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])
        
        coord_mask = tf.zeros(mask_shape)
        conf_mask  = tf.zeros(mask_shape)
        class_mask = tf.zeros(mask_shape)
        
        seen = tf.Variable(0.)
        total_recall = tf.Variable(0.)
        
        """
        Adjust prediction
        """
        ### adjust x and y      
        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
        
        ### adjust w and h
        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1,1,1,self.nb_box,2])
        
        ### adjust confidence
        pred_box_conf = tf.sigmoid(y_pred[..., 4])
        
        ### adjust class probabilities
        pred_box_class = y_pred[..., 5:]
        
        """
        Adjust ground truth
        """
        ### adjust x and y
        true_box_xy = y_true[..., 0:2] # relative position to the containing cell
        
        ### adjust w and h
        true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
        
        ### adjust confidence
        true_wh_half = true_box_wh / 2.
        true_mins    = true_box_xy - true_wh_half
        true_maxes   = true_box_xy + true_wh_half
        
        pred_wh_half = pred_box_wh / 2.
        pred_mins    = pred_box_xy - pred_wh_half
        pred_maxes   = pred_box_xy + pred_wh_half       
        
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)
        
        true_box_conf = iou_scores * y_true[..., 4]
        
        ### adjust class probabilities
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        
        """
        Determine the masks
        """
        ### coordinate mask: simply the position of the ground truth boxes (the predictors)
        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale
        
        ### confidence mask: penelize predictors + penalize boxes with low IOU
        # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
        true_xy = self.true_boxes[..., 0:2]
        true_wh = self.true_boxes[..., 2:4]
        
        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half
        
        pred_xy = tf.expand_dims(pred_box_xy, 4)
        pred_wh = tf.expand_dims(pred_box_wh, 4)
        
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
        pred_maxes   = pred_xy + pred_wh_half    
        
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale
        
        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
        conf_mask = conf_mask + y_true[..., 4] * self.object_scale
        
        ### class mask: simply the position of the ground truth boxes (the predictors)
        class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale       
        
        """
        Warm-up training
        """
        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale/2.)
        seen = tf.assign_add(seen, 1.)
        
        true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), 
                              lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                       true_box_wh + tf.ones_like(true_box_wh) * \
                                       np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \
                                       no_boxes_mask, 
                                       tf.ones_like(coord_mask)],
                              lambda: [true_box_xy, 
                                       true_box_wh,
                                       coord_mask])
        
        """
        Finalize the loss
        """
        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
        nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
        
        loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.
        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
        loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
        
        loss = tf.cond(tf.less(seen, self.warmup_batches+1), 
                      lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
                      lambda: loss_xy + loss_wh + loss_conf + loss_class)
        
        if self.debug:
            nb_true_box = tf.reduce_sum(y_true[..., 4])
            nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))
            
            current_recall = nb_pred_box/(nb_true_box + 1e-6)
            total_recall = tf.assign_add(total_recall, current_recall) 

            loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
            loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
            loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
            loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
            loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
            loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
            loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
        
        return loss

    def load_weights(self, weight_path):
        self.model.load_weights(weight_path)

    def train(self, train_imgs,     # the list of images to train the model
                    valid_imgs,     # the list of images used to validate the model
                    train_times,    # the number of time to repeat the training set, often used for small datasets
                    valid_times,    # the number of times to repeat the validation set, often used for small datasets
                    nb_epochs,      # number of epoches
                    learning_rate,  # the learning rate
                    batch_size,     # the size of the batch
                    warmup_epochs,  # number of initial batches to let the model familiarize with the new dataset
                    object_scale,
                    no_object_scale,
                    coord_scale,
                    class_scale,
                    saved_weights_name='best_weights.h5',
                    debug=False):     

        self.batch_size = batch_size

        self.object_scale    = object_scale
        self.no_object_scale = no_object_scale
        self.coord_scale     = coord_scale
        self.class_scale     = class_scale

        self.debug = debug

        # Make train and validation generators

        generator_config = {
            'IMAGE_H'         : self.input_size, 
            'IMAGE_W'         : self.input_size,
            'GRID_H'          : self.grid_h,  
            'GRID_W'          : self.grid_w,
            'BOX'             : self.nb_box,
            'LABELS'          : self.labels,
            'CLASS'           : len(self.labels),
            'ANCHORS'         : self.anchors,
            'BATCH_SIZE'      : self.batch_size,
            'TRUE_BOX_BUFFER' : self.max_box_per_image,
        }    

        train_generator = BatchGenerator(train_imgs, 
                                     generator_config, 
                                     norm=self.feature_extractor.normalize)
        valid_generator = BatchGenerator(valid_imgs, 
                                     generator_config, 
                                     norm=self.feature_extractor.normalize,
                                     jitter=False)   
                                     
        self.warmup_batches  = warmup_epochs * (train_times*len(train_generator) + valid_times*len(valid_generator))   

        # Compile the model

        optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        self.model.compile(loss=self.custom_loss, optimizer=optimizer)

        # Make a few callbacks

        early_stop = EarlyStopping(monitor='val_loss', 
                           min_delta=0.001, 
                           patience=3, 
                           mode='min', 
                           verbose=1)
        checkpoint = ModelCheckpoint(saved_weights_name, 
                                     monitor='val_loss', 
                                     verbose=1, 
                                     save_best_only=True, 
                                     mode='min', 
                                     period=1)
        tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/'), 
                                  histogram_freq=0, 
                                  #write_batch_performance=True,
                                  write_graph=True, 
                                  write_images=False)

        # Start the training process

        self.model.fit_generator(generator        = train_generator, 
                                 steps_per_epoch  = len(train_generator) * train_times, 
                                 epochs           = warmup_epochs + nb_epochs, 
                                 verbose          = 2 if debug else 1,
                                 validation_data  = valid_generator,
                                 validation_steps = len(valid_generator) * valid_times,
                                 callbacks        = [early_stop, checkpoint, tensorboard], 
                                 workers          = 3,
                                 max_queue_size   = 8)      

        # Compute mAP on the validation set
  
        average_precisions = self.evaluate(valid_generator)     

        # print evaluation
        for label, average_precision in average_precisions.items():
            print(self.labels[label], '{:.4f}'.format(average_precision))
        print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))         

    def evaluate(self, 
                 generator, 
                 iou_threshold=0.3,
                 score_threshold=0.3,
                 max_detections=100,
                 save_path=None):
        
        # gather all detections and annotations
        all_detections     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
        all_annotations    = [[None for i in range(generator.num_classes())] for j in range(generator.size())]

        for i in range(generator.size()):
            raw_image = generator.load_image(i)

            # make the boxes and the labels
            pred_boxes  = self.predict(raw_image)
            
            score = np.array([box.score for box in pred_boxes])
            pred_labels = np.array([box.label for box in pred_boxes])        
            
            if len(pred_boxes) > 0:
                pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.score] for box in pred_boxes]) 
            else:
                pred_boxes = np.array([[]])  
            
            # sort the boxes and the labels according to scores
            score_sort = np.argsort(-score)
            pred_labels = pred_labels[score_sort]
            pred_boxes  = pred_boxes[score_sort]
            
            # copy detections to all_detections
            for label in range(generator.num_classes()):
                all_detections[i][label] = pred_boxes[pred_labels == label, :]
                
            annotations = generator.load_annotation(i)
            
            # copy detections to all_annotations
            for label in range(generator.num_classes()):
                all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
                
        # compute mAP by comparing all detections and all annotations
        average_precisions = {}
        
        for label in range(generator.num_classes()):
            false_positives = np.zeros((0,))
            true_positives  = np.zeros((0,))
            scores          = np.zeros((0,))
            num_annotations = 0.0

            for i in range(generator.size()):
                detections           = all_detections[i][label]
                annotations          = all_annotations[i][label]
                num_annotations     += annotations.shape[0]
                detected_annotations = []

                for d in detections:
                    scores = np.append(scores, d[4])

                    if annotations.shape[0] == 0:
                        false_positives = np.append(false_positives, 1)
                        true_positives  = np.append(true_positives, 0)
                        continue

                    overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
                    assigned_annotation = np.argmax(overlaps, axis=1)
                    max_overlap         = overlaps[0, assigned_annotation]

                    if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                        false_positives = np.append(false_positives, 0)
                        true_positives  = np.append(true_positives, 1)
                        detected_annotations.append(assigned_annotation)
                    else:
                        false_positives = np.append(false_positives, 1)
                        true_positives  = np.append(true_positives, 0)

            # no annotations -> AP for this class is 0 (is this correct?)
            if num_annotations == 0:
                average_precisions[label] = 0
                continue

            # sort by score
            indices         = np.argsort(-scores)
            false_positives = false_positives[indices]
            true_positives  = true_positives[indices]

            # compute false positives and true positives
            false_positives = np.cumsum(false_positives)
            true_positives  = np.cumsum(true_positives)

            # compute recall and precision
            recall    = true_positives / num_annotations
            precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)

            # compute average precision
            average_precision  = compute_ap(recall, precision)
            average_precisions[label] = average_precision

        return average_precisions    

    def predict(self, image):
        image_h, image_w, _ = image.shape
        image = cv2.resize(image, (self.input_size, self.input_size))
        image = self.feature_extractor.normalize(image)

        input_image = image[:,:,::-1]
        input_image = np.expand_dims(input_image, 0)
        dummy_array = np.zeros((1,1,1,1,self.max_box_per_image,4))

        netout = self.model.predict([input_image, dummy_array])[0]
        boxes  = decode_netout(netout, self.anchors, self.nb_class)

        for i in range(len(boxes)):
            boxes[i].xmin = int(boxes[i].xmin*image_w)
            boxes[i].ymin = int(boxes[i].ymin*image_h)
            boxes[i].xmax = int(boxes[i].xmax*image_w)
            boxes[i].ymax = int(boxes[i].ymax*image_h)

        return boxes
    def __init__(self, backend, input_size, labels, max_box_per_image, anchors, verbose=1):
        ##########################
        # Save the network parameters
        ##########################
        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################
        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(input_size=self.input_size)
        elif backend == 'Squeezenet':
            self.feature_extractor = SqueezeNetFeature(input_size=self.input_size)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(input_size=self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(input_size=self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(input_size=self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(input_size=self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(input_size=self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        print('Feature extractor shape: {}'.format(self.feature_extractor.get_output_shape()))
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image=input_image)

        # make the object detection layer
        output = Conv2D(filters=self.nb_box * (4 + 1 + self.nb_class),
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        padding='same',
                        kernel_initializer='lecun_normal',
                        name='DetectionLayer')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)

        # small hack to allow true_boxes to be registered when Keras build the model
        # for more information: https://github.com/fchollet/keras/issues/2790
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # ??? Why have to redefine Conv2D_DetectionLayer as below?
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w)
        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        if verbose: self.model.summary()
Esempio n. 8
0
    def __init__(self, backend, input_width, input_height, input_channel,
                 labels, max_box_per_image, anchors, saved_config_name):
        self.input_width = input_width
        self.input_height = input_height
        self.input_channel = input_channel

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2  # each anchor has 2 (w,h) number.
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors
        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################
        # models.model_1(self.input_height, self.input_width, self.input_channel, \
        #                self.max_box_per_image, self.nb_box, self.nb_class)
        # make the feature extractor layers
        input_image = Input(shape=(self.input_height, self.input_width,
                                   self.input_channel))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_height,
                                                      self.input_width,
                                                      self.input_channel)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_height,
                                                     self.input_width,
                                                     self.input_channel)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_height,
                                                     self.input_width,
                                                     self.input_channel)
        elif backend == 'Tiny Yolo_1':
            self.feature_extractor = TinyYoloFeature_1(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_2':
            self.feature_extractor = TinyYoloFeature_2(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_3':
            self.feature_extractor = TinyYoloFeature_3(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_4':
            self.feature_extractor = TinyYoloFeature_4(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_5':
            self.feature_extractor = TinyYoloFeature_5(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_height,
                                                  self.input_width)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_height,
                                                     self.input_width)
        elif backend == 'My Yolo':
            self.feature_extractor = MyYoloFeature(self.input_height,
                                                   self.input_width)
        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
            )

        # print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()

        #features = self.feature_extractor.extract(input_image)
        features = self.feature_extractor.feature_extractor.output

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        #self.model = Model([input_image, self.true_boxes], output)
        self.model = Model(
            [self.feature_extractor.feature_extractor.input, self.true_boxes],
            output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # save model config
        model_json = self.model.to_json()
        with open(str(saved_config_name), "w") as json_file:
            json_file.write(model_json)

        # print a summary of the whole model
        self.feature_extractor.feature_extractor.summary()
        self.model.summary()
Esempio n. 9
0
class YOLO(object):
    def __init__(self, architecture, input_size, labels, max_box_per_image,
                 anchors):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = 5
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if architecture == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)

        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
            )

        print self.feature_extractor.get_output_shape()
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='conv_23',
                        kernel_initializer='lecun_normal')(features)
        # output = Conv2D(1024,
        #                 (3,3), strides=(1,1),
        #                 padding='same',
        #                 name='conv_23',
        #                 kernel_initializer='lecun_normal',
        #                 use_bias=False)(features)

        # output = Flatten()(output)
        # output = Dense(512)(output)
        # # output = Dense(16384)(output)
        # output = LeakyReLU(alpha=0.1)(output)
        # output = Dense(self.grid_h*self.grid_w*self.nb_box*( 4 + 1 + self.nb_class))(output)

        # make the object detection layer

        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()

    def custom_loss(self, y_true, y_pred):
        mask_shape = tf.shape(y_true)[:4]

        cell_x = tf.to_float(
            tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]),
                       (1, self.grid_h, self.grid_w, 1, 1)))
        cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))

        cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1),
                            [self.batch_size, 1, 1, 5, 1])

        coord_mask = tf.zeros(mask_shape)
        conf_mask = tf.zeros(mask_shape)
        class_mask = tf.zeros(mask_shape)

        seen = tf.Variable(0.)
        total_recall = tf.Variable(0.)
        """
        Adjust prediction
        """
        ### adjust x and y
        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid

        ### adjust w and h
        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(
            self.anchors, [1, 1, 1, self.nb_box, 2])

        ### adjust confidence
        pred_box_conf = tf.sigmoid(y_pred[..., 4])

        ### adjust class probabilities
        pred_box_class = y_pred[..., 5:]
        """
        Adjust ground truth
        """
        ### adjust x and y
        true_box_xy = y_true[...,
                             0:2]  # relative position to the containing cell

        ### adjust w and h
        true_box_wh = y_true[
            ..., 2:4]  # number of cells accross, horizontally and vertically

        ### adjust confidence
        true_wh_half = true_box_wh / 2.
        true_mins = true_box_xy - true_wh_half
        true_maxes = true_box_xy + true_wh_half

        pred_wh_half = pred_box_wh / 2.
        pred_mins = pred_box_xy - pred_wh_half
        pred_maxes = pred_box_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        true_box_conf = iou_scores * y_true[..., 4]

        ### adjust class probabilities
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        """
        Determine the masks
        """
        ### coordinate mask: simply the position of the ground truth boxes (the predictors)
        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale

        ### confidence mask: penelize predictors + penalize boxes with low IOU
        # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
        true_xy = self.true_boxes[..., 0:2]
        true_wh = self.true_boxes[..., 2:4]

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy, 4)
        pred_wh = tf.expand_dims(pred_box_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_mask = conf_mask + tf.to_float(
            best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale

        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
        conf_mask = conf_mask + y_true[..., 4] * self.object_scale

        ### class mask: simply the position of the ground truth boxes (the predictors)
        class_mask = y_true[..., 4] * tf.gather(
            self.class_wt, true_box_class) * self.class_scale
        """
        Warm-up training
        """
        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.)
        seen = tf.assign_add(seen, 1.)

        true_box_xy, true_box_wh, coord_mask = tf.cond(
            tf.less(seen, self.warmup_bs), lambda: [
                true_box_xy + (0.5 + cell_grid) * no_boxes_mask, true_box_wh
                + tf.ones_like(true_box_wh) * np.reshape(
                    self.anchors, [1, 1, 1, self.nb_box, 2]) * no_boxes_mask,
                tf.ones_like(coord_mask)
            ], lambda: [true_box_xy, true_box_wh, coord_mask])
        """
        Finalize the loss
        """
        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
        nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))

        loss_xy = tf.reduce_sum(
            tf.square(true_box_xy - pred_box_xy) *
            coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_wh = tf.reduce_sum(
            tf.square(true_box_wh - pred_box_wh) *
            coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_conf = tf.reduce_sum(
            tf.square(true_box_conf - pred_box_conf) *
            conf_mask) / (nb_conf_box + 1e-6) / 2.
        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=true_box_class, logits=pred_box_class)
        loss_class = tf.reduce_sum(
            loss_class * class_mask) / (nb_class_box + 1e-6)

        loss = loss_xy + loss_wh + loss_conf + loss_class

        if self.debug:
            nb_true_box = tf.reduce_sum(y_true[..., 4])
            nb_pred_box = tf.reduce_sum(
                tf.to_float(true_box_conf > 0.5) *
                tf.to_float(pred_box_conf > 0.3))

            current_recall = nb_pred_box / (nb_true_box + 1e-6)
            total_recall = tf.assign_add(total_recall, current_recall)

        return loss

    def load_weights(self, weight_path):
        self.model.load_weights(weight_path)

    def predict(self, image):
        image = cv2.resize(image, (self.input_size, self.input_size))
        image = self.feature_extractor.normalize(image)

        input_image = image[:, :, ::-1]
        input_image = np.expand_dims(input_image, 0)
        dummy_array = dummy_array = np.zeros(
            (1, 1, 1, 1, self.max_box_per_image, 4))

        netout = self.model.predict([input_image, dummy_array])[0]
        boxes = self.decode_netout(netout)

        return boxes

    def bbox_iou(self, box1, box2):
        x1_min = box1.x - box1.w / 2
        x1_max = box1.x + box1.w / 2
        y1_min = box1.y - box1.h / 2
        y1_max = box1.y + box1.h / 2

        x2_min = box2.x - box2.w / 2
        x2_max = box2.x + box2.w / 2
        y2_min = box2.y - box2.h / 2
        y2_max = box2.y + box2.h / 2

        intersect_w = self.interval_overlap([x1_min, x1_max], [x2_min, x2_max])
        intersect_h = self.interval_overlap([y1_min, y1_max], [y2_min, y2_max])

        intersect = intersect_w * intersect_h

        union = box1.w * box1.h + box2.w * box2.h - intersect

        return float(intersect) / union

    def interval_overlap(self, interval_a, interval_b):
        x1, x2 = interval_a
        x3, x4 = interval_b

        if x3 < x1:
            if x4 < x1:
                return 0
            else:
                return min(x2, x4) - x1
        else:
            if x2 < x3:
                return 0
            else:
                return min(x2, x4) - x3

    def decode_netout(self, netout, obj_threshold=0.3, nms_threshold=0.3):
        grid_h, grid_w, nb_box = netout.shape[:3]

        boxes = []

        # decode the output by the network
        netout[..., 4] = self.sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * self.softmax(
            netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > obj_threshold

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row, col, b, 5:]

                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row, col, b, :4]

                        x = (col + self.sigmoid(x)
                             ) / grid_w  # center position, unit: image width
                        y = (row + self.sigmoid(y)
                             ) / grid_h  # center position, unit: image height
                        w = self.anchors[2 * b + 0] * np.exp(
                            w) / grid_w  # unit: image width
                        h = self.anchors[2 * b + 1] * np.exp(
                            h) / grid_h  # unit: image height
                        confidence = netout[row, col, b, 4]

                        box = BoundBox(x, y, w, h, confidence, classes)

                        boxes.append(box)

        # suppress non-maximal boxes
        for c in range(self.nb_class):
            sorted_indices = list(
                reversed(np.argsort([box.classes[c] for box in boxes])))

            for i in xrange(len(sorted_indices)):
                index_i = sorted_indices[i]

                if boxes[index_i].classes[c] == 0:
                    continue
                else:
                    for j in xrange(i + 1, len(sorted_indices)):
                        index_j = sorted_indices[j]

                        if self.bbox_iou(boxes[index_i],
                                         boxes[index_j]) >= nms_threshold:
                            boxes[index_j].classes[c] = 0

        # remove the boxes which are less likely than a obj_threshold
        boxes = [box for box in boxes if box.get_score() > obj_threshold]

        return boxes

    def sigmoid(self, x):
        return 1. / (1. + np.exp(-x))

    def softmax(self, x, axis=-1, t=-100.):
        x = x - np.max(x)

        if np.min(x) < t:
            x = x / np.min(x) * t

        e_x = np.exp(x)

        return e_x / e_x.sum(axis, keepdims=True)

    def train(
            self,
            train_imgs,  # the list of images to train the model
            valid_imgs,  # the list of images used to validate the model
            train_times,  # the number of time to repeat the training set, often used for small datasets
            valid_times,  # the number of times to repeat the validation set, often used for small datasets
            nb_epoch,  # number of epoches
            learning_rate,  # the learning rate
            batch_size,  # the size of the batch
            warmup_epochs,  # number of initial batches to let the model familiarize with the new dataset
            object_scale,
            no_object_scale,
            coord_scale,
            class_scale,
            saved_weights_name='best_weights.h5',
            debug=False):

        self.batch_size = batch_size
        self.warmup_bs = warmup_epochs * (train_times *
                                          (len(train_imgs) / batch_size + 1) +
                                          valid_times *
                                          (len(valid_imgs) / batch_size + 1))

        self.object_scale = object_scale
        self.no_object_scale = no_object_scale
        self.coord_scale = coord_scale
        self.class_scale = class_scale

        self.debug = debug

        if warmup_epochs > 0:
            nb_epoch = warmup_epochs  # if it's warmup stage, don't train more than warmup_epochs

        ############################################
        # Compile the model
        ############################################

        optimizer = Adam(lr=learning_rate,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        self.model.compile(loss=self.custom_loss, optimizer=optimizer)

        ############################################
        # Make train and validation generators
        ############################################

        generator_config = {
            'IMAGE_H': self.input_size,
            'IMAGE_W': self.input_size,
            'GRID_H': self.grid_h,
            'GRID_W': self.grid_w,
            'BOX': self.nb_box,
            'LABELS': self.labels,
            'CLASS': len(self.labels),
            'ANCHORS': self.anchors,
            'BATCH_SIZE': self.batch_size,
            'TRUE_BOX_BUFFER': self.max_box_per_image,
        }

        train_batch = BatchGenerator(train_imgs,
                                     generator_config,
                                     norm=self.feature_extractor.normalize)
        valid_batch = BatchGenerator(valid_imgs,
                                     generator_config,
                                     norm=self.feature_extractor.normalize,
                                     jitter=False)

        ############################################
        # Make a few callbacks
        ############################################

        early_stop = EarlyStopping(monitor='val_loss',
                                   min_delta=0.001,
                                   patience=3,
                                   mode='min',
                                   verbose=1)
        checkpoint = ModelCheckpoint(saved_weights_name,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min',
                                     period=1)
        tb_counter = len([
            log for log in os.listdir(os.path.expanduser('~/logs/'))
            if 'yolo' in log
        ]) + 1
        tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/') +
                                  'yolo' + '_' + str(tb_counter),
                                  histogram_freq=0,
                                  write_graph=True,
                                  write_images=False)

        ############################################
        # Start the training process
        ############################################

        self.model.fit_generator(
            generator=train_batch,
            steps_per_epoch=len(train_batch) * train_times,
            epochs=nb_epoch,
            verbose=1,
            validation_data=valid_batch,
            validation_steps=len(valid_batch) * valid_times,
            callbacks=[checkpoint, tensorboard],
            workers=3,
            max_queue_size=8)
Esempio n. 10
0
    def __init__(self,
                 backend,
                 input_size,
                 labels,
                 max_box_per_image,
                 anchors,
                 training=True):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors
        self.training = training

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size,
                                                       training=self.training)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
            )

        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # make the object detection layer
        output_01 = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='DetectionLayer',
                           kernel_initializer='lecun_normal')(features)
        output_02 = Reshape((self.grid_h, self.grid_w, self.nb_box,
                             4 + 1 + self.nb_class))(output_01)
        output_03 = Lambda(lambda args: args[0])([output_02, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output_03)
        # self.model = Model(input_image, output_01)
        # self.batch_size = 2
        # optimizer = Adam(lr=.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        # self.model.compile(loss='MSE', optimizer=optimizer)

        # initialize the weights of the detection layer
        # layer = self.model.layers[-4]
        # weights = layer.get_weights()
        #
        # new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        # new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)
        #
        # layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
Esempio n. 11
0
    def __init__(self, backend,
                       input_size,
                       labels,
                       max_box_per_image,
                       anchors):

        self.input_size = input_size

        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = len(anchors)//2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors  = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image     = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()

        features = self.feature_extractor.feature_extractor.output        # To join the feature extractor and the detection layer
        #features = self.feature_extractor.extract(input_image) #original
        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
                        (1,1), strides=(1,1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
        #the purpose of the lambda layer is when it is training, for inference you can remove it
        output = Lambda(lambda args: args[0])([output, self.true_boxes])
        self.model = Model([self.feature_extractor.feature_extractor.input , self.true_boxes], output)        # To join the feature extractor and the detection layer
        #self.model = Model([input_image, self.true_boxes], output) #original

        #for layer in self.model.layers[:-5]:                                    # this is to freeze the layers
        #  layer.trainable = False

        ####################    this part uncomment only when you're training from scratch
        # initialize the weights of the detection layer
        #layer = self.model.layers[-4]
        #weights = layer.get_weights()

        #new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        #new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)

        #layer.set_weights([new_kernel, new_bias])
        ####################
        # print a summary of the whole model
        self.model.summary()
Esempio n. 12
0
    def __init__(self, backend,
                       input_size, 
                       labels, 
                       max_box_per_image,
                       anchors,
                       threshold,
                       max_sur):

        self.input_size = input_size
        
        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = len(anchors)//2 #应该是期望的box数量?还没仔细看paper
        self.class_wt = np.ones(self.nb_class, dtype='float32') #这里没有拓展,可以各类型的修改权重
        self.anchors  = anchors
        self.threshold = threshold
        self.max_sur = max_sur

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        # 构建了一个图片的input层
        input_image     = Input(shape=(self.input_size, self.input_size, 3))

        # 构建了bounding box回归的输入层
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

        # 从backend获取卷积部分的结构,这里返回的是bankend中定义的不同的几个类的对象,都是BaseFeatureExtractor类的子类
        # backend中子类的self.feature_extractor变量就是构建的keras的model对象,在backend中只是加了个包装
        # 这里的YOLO类中的变量也叫feature_extractor,但是对应的是backend中的几个子类的对象,不能弄混了
        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)  
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size)        
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        # 通过在父类定义的get_output_shape()获得输出的特征矩阵的大小
        print(issubclass(Model,Layer))
        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()

        # 这个extract()和get_output_shape()一样也是backend中BaseFeatureExtractor类定义的父类方法
        # 这里是把上面定义的图片输入层和模型的特征提取模块接在一起
        # 看backen的代码其实这一步有点多余,因为backend已经有输入层了,可能应为方便?我觉得这个操作可以去掉
        # 总之,这里的features就是一个构建到一半的模型(的特征提取部分),如果直接调用predict出来的是一个特征矩阵
        features = self.feature_extractor.extract(input_image)            

        # make the object detection layer
        # 构造模型的分类层
        # 输出的shape为:
        # self.nb_box * (4 + 1 + self.nb_class), self.grid_h, self.grid_w)
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
                        (1,1), strides=(1,1), 
                        padding='same', 
                        name='DetectionLayer', 
                        kernel_initializer='lecun_normal')(features)
        # 有13*13组对每个bounding box(max/2个)的predict
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)

        #加的这层lambda层贼奇怪,把true_boxes放进来以后又不取,相当于没放进来,不知道有啥用
        #注意,这里隐形的加了一个input-layer,对于true_boxes的输入
        output = Lambda(lambda args: args[0])([output, self.true_boxes])
        self.model = Model([input_image, self.true_boxes], output)

        """
        这里要注意的是:
        现在的model是一个:
        input->BACKEND->Covn2d->Reshape->(input)->Lambda的模型
        一共是6层
        虽然backend里面的构造很复杂,但是在这里被当成一个layer(因为其实MODEL对象是layer对象的子类)
        """

        #输出的shape:(self.grid_h, self.grid_w, max_box_num, dim(也就是4 + 1 + self.nb_class))
        print(self.model.layers)
        print(f"number of layers:{len(self.model.layers)}")
        print(self.model.output_shape)

        
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()
        print(f"weigth_2D:{weights}")
        # 第一个array的shape是(w,h,d(上一层传下来有多少个feature-map),number)
        # 第二个array的shape是(number)也就是说每个kernel一个bias
        print(f"weigth_2D_shape:{(weights[0].shape,weights[1].shape)}")
        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)
        #分类layer是高斯随机的
        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()